diff --git a/examples/dxPtexViewer/dxPtexViewer.cpp b/examples/dxPtexViewer/dxPtexViewer.cpp old mode 100755 new mode 100644 index 448d6e69..5dd60e61 --- a/examples/dxPtexViewer/dxPtexViewer.cpp +++ b/examples/dxPtexViewer/dxPtexViewer.cpp @@ -819,7 +819,7 @@ createOsdMesh(int level, int kernel) { //------------------------------------------------------------------------------ static void -bindProgram(Effect effect, OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch) { +bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) { EffectDesc effectDesc(patch.GetDescriptor(), effect); @@ -984,7 +984,7 @@ drawModel() { UINT hOffsets = 0; g_pd3dDeviceContext->IASetVertexBuffers(0, 1, &buffer, &hStrides, &hOffsets); - OpenSubdiv::Osd::D3D11PatchTable::PatchArrayVector const & patches = + OpenSubdiv::Osd::PatchArrayVector const & patches = g_mesh->GetPatchTable()->GetPatchArrays(); g_pd3dDeviceContext->IASetIndexBuffer( @@ -993,7 +993,7 @@ drawModel() { // patch drawing for (int i = 0; i < (int)patches.size(); ++i) { - OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch = patches[i]; + OpenSubdiv::Osd::PatchArray const & patch = patches[i]; OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType(); diff --git a/examples/dxViewer/dxviewer.cpp b/examples/dxViewer/dxviewer.cpp index d3189e5d..5e59b1eb 100644 --- a/examples/dxViewer/dxviewer.cpp +++ b/examples/dxViewer/dxviewer.cpp @@ -630,7 +630,7 @@ ShaderCache g_shaderCache; //------------------------------------------------------------------------------ static void -bindProgram(Effect effect, OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch) { +bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) { EffectDesc effectDesc(patch.GetDescriptor(), effect); typedef OpenSubdiv::Far::PatchDescriptor Descriptor; @@ -822,7 +822,7 @@ display() { UINT hOffsets = 0; g_pd3dDeviceContext->IASetVertexBuffers(0, 1, &buffer, &hStrides, &hOffsets); - OpenSubdiv::Osd::D3D11PatchTable::PatchArrayVector const & patches = + OpenSubdiv::Osd::PatchArrayVector const & patches = g_mesh->GetPatchTable()->GetPatchArrays(); g_pd3dDeviceContext->IASetIndexBuffer( @@ -834,7 +834,7 @@ display() { int numDrawCalls = 0; for (int i=0; i<(int)patches.size(); ++i) { - OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch = patches[i]; + OpenSubdiv::Osd::PatchArray const & patch = patches[i]; OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType(); diff --git a/examples/glEvalLimit/glEvalLimit.cpp b/examples/glEvalLimit/glEvalLimit.cpp old mode 100755 new mode 100644 index ad6df89d..2dc54d9f --- a/examples/glEvalLimit/glEvalLimit.cpp +++ b/examples/glEvalLimit/glEvalLimit.cpp @@ -44,15 +44,54 @@ GLFWmonitor* g_primary=0; #include #include +#include #include #include +#ifdef OPENSUBDIV_HAS_TBB + #include +#endif + +#ifdef OPENSUBDIV_HAS_OPENMP + #include +#endif + +#ifdef OPENSUBDIV_HAS_CUDA + #include + #include + #include + #include + #include "../common/cudaDeviceContext.h" + + CudaDeviceContext g_cudaDeviceContext; +#endif + +#ifdef OPENSUBDIV_HAS_OPENCL + #include + #include + #include + #include + #include "../common/clDeviceContext.h" + CLDeviceContext g_clDeviceContext; +#endif + +#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK + #include + #include + #include +#endif + +#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE + #include + #include + #include +#endif + #include #include #include #include #include -#include #include @@ -75,32 +114,44 @@ GLFWmonitor* g_primary=0; using namespace OpenSubdiv; //------------------------------------------------------------------------------ +enum KernelType { kCPU = 0, + kOPENMP = 1, + kTBB = 2, + kCUDA = 3, + kCL = 4, + kGLXFB = 5, + kGLCompute = 6 }; + +enum EndCap { kEndCapBSplineBasis, + kEndCapGregoryBasis }; + +enum DrawMode { kUV, + kVARYING, + kNORMAL, + kSHADE, + kFACEVARYING }; + std::vector g_orgPositions, g_positions, g_varyingColors; int g_currentShape = 0, g_level = 3, + g_kernel = kCPU, + g_endCap = kEndCapBSplineBasis, g_numElements = 3; std::vector g_coarseEdges; std::vector g_coarseEdgeSharpness; std::vector g_coarseVertexSharpness; -enum DrawMode { kRANDOM=0, - kUV, - kVARYING, - kNORMAL, - kSHADE, - kFACEVARYING }; - int g_running = 1, g_width = 1024, g_height = 1024, g_fullscreen = 0, g_drawCageEdges = 1, g_drawCageVertices = 1, - g_drawMode = kVARYING, + g_drawMode = kUV, g_prev_x = 0, g_prev_y = 0, g_mbutton[3] = {0, 0, 0}, @@ -131,11 +182,9 @@ float g_computeTime = 0; Stopwatch g_fpsTimer; //------------------------------------------------------------------------------ -int g_nparticles=0, - g_nsamples=101, - g_nsamplesFound=0; +int g_nParticles = 65536; -bool g_randomStart=false; +bool g_randomStart = true;//false; GLuint g_cageEdgeVAO = 0, g_cageEdgeVBO = 0, @@ -155,6 +204,7 @@ struct Program { GLuint attrColor; GLuint attrTangentU; GLuint attrTangentV; + GLuint attrPatchCoord; } g_defaultProgram; //------------------------------------------------------------------------------ @@ -204,37 +254,189 @@ createCoarseMesh(OpenSubdiv::Far::TopologyRefiner const & refiner) { } //------------------------------------------------------------------------------ -Far::TopologyRefiner * g_topologyRefiner = 0; - -Far::StencilTable const * g_vertexStencils = NULL; -Far::StencilTable const * g_varyingStencils = NULL; - Far::PatchTable const * g_patchTable = NULL; -Far::PatchMap const * g_patchMap = NULL; -std::vector g_patchCoords; -Osd::VertexBufferDescriptor g_idesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 3), - g_odesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6), - g_vdesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6), - g_duDesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6), - g_dvDesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6), - g_fvidesc(/*offset*/ 0, /*legnth*/ 2, /*stride*/ 2), - g_fvodesc(/*offset*/ 3, /*legnth*/ 2, /*stride*/ 6); +// input and output vertex data +class EvalOutputBase { +public: + virtual ~EvalOutputBase() {} + virtual GLuint BindVertexData() const = 0; + virtual GLuint BindDerivatives() const = 0; + virtual GLuint BindPatchCoords() const = 0; + virtual void UpdateData(const float *src, int startVertex, int numVertices) = 0; + virtual void UpdateVaryingData(const float *src, int startVertex, int numVertices) = 0; + virtual void Refine() = 0; + virtual void EvalPatches() = 0; + virtual void EvalPatchesWithDerivatives() = 0; + virtual void EvalPatchesVarying() = 0; + virtual void UpdatePatchCoords( + std::vector const &patchCoords) = 0; +}; -// input vertex data (coarse + refined) -Osd::CpuVertexBuffer * g_vertexData = 0; -Osd::CpuVertexBuffer * g_varyingData = 0; +// note: Since we don't have a class for device-patchcoord container in osd, +// we cheat to use vertexbuffer as a patch-coord (5int) container. +// +// Please don't follow the pattern in your actual application. +// +template +class EvalOutput : public EvalOutputBase { +public: + typedef OpenSubdiv::Osd::EvaluatorCacheT EvaluatorCache; -// output vertex data (limit locations) -Osd::CpuGLVertexBuffer * g_outVertexData = NULL; -Osd::CpuGLVertexBuffer * g_outDerivatives = NULL; + EvalOutput(Far::StencilTable const *vertexStencils, + Far::StencilTable const *varyingStencils, + int numCoarseVerts, int numTotalVerts, int numParticles, + Far::PatchTable const *patchTable, + EvaluatorCache *evaluatorCache = NULL, + DEVICE_CONTEXT *deviceContext = NULL) + : _srcDesc( /*offset*/ 0, /*length*/ 3, /*stride*/ 3), + _srcVaryingDesc(/*offset*/ 0, /*length*/ 3, /*stride*/ 3), + _vertexDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6), + _varyingDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6), + _duDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6), + _dvDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6), + _deviceContext(deviceContext) { + _srcData = SRC_VERTEX_BUFFER::Create(3, numTotalVerts, _deviceContext); + _srcVaryingData = SRC_VERTEX_BUFFER::Create(3, numTotalVerts, _deviceContext); + _vertexData = EVAL_VERTEX_BUFFER::Create(6, numParticles, _deviceContext); + _derivatives = EVAL_VERTEX_BUFFER::Create(6, numParticles, _deviceContext); + _patchTable = PATCH_TABLE::Create(patchTable, _deviceContext); + _patchCoords = NULL; + _numCoarseVerts = numCoarseVerts; + _vertexStencils = + Osd::convertToCompatibleStencilTable(vertexStencils, _deviceContext); + _varyingStencils = + Osd::convertToCompatibleStencilTable(varyingStencils, _deviceContext); + _evaluatorCache = evaluatorCache; + } + ~EvalOutput() { + delete _srcData; + delete _srcVaryingData; + delete _vertexData; + delete _derivatives; + delete _patchTable; + delete _patchCoords; + } + virtual GLuint BindVertexData() const { + return _vertexData->BindVBO(); + } + virtual GLuint BindDerivatives() const { + return _derivatives->BindVBO(); + } + virtual GLuint BindPatchCoords() const { + return _patchCoords->BindVBO(); + } + virtual void UpdateData(const float *src, int startVertex, int numVertices) { + _srcData->UpdateData(src, startVertex, numVertices, _deviceContext); + } + virtual void UpdateVaryingData(const float *src, int startVertex, int numVertices) { + _srcVaryingData->UpdateData(src, startVertex, numVertices, _deviceContext); + } + virtual void Refine() { + Osd::VertexBufferDescriptor dstDesc = _srcDesc; + dstDesc.offset += _numCoarseVerts * _srcDesc.stride; + + EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator( + _evaluatorCache, _srcDesc, dstDesc, _deviceContext); + + EVALUATOR::EvalStencils(_srcData, _srcDesc, + _srcData, dstDesc, + _vertexStencils, + evalInstance, + _deviceContext); + + dstDesc = _srcVaryingDesc; + dstDesc.offset += _numCoarseVerts * _srcVaryingDesc.stride; + evalInstance = OpenSubdiv::Osd::GetEvaluator( + _evaluatorCache, _srcVaryingDesc, dstDesc, _deviceContext); + + EVALUATOR::EvalStencils(_srcVaryingData, _srcVaryingDesc, + _srcVaryingData, dstDesc, + _varyingStencils, + evalInstance, + _deviceContext); + } + virtual void EvalPatches() { + EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator( + _evaluatorCache, _srcDesc, _vertexDesc, _deviceContext); + + EVALUATOR::EvalPatches( + _srcData, _srcDesc, + _vertexData, _vertexDesc, + _patchCoords->GetNumVertices(), + _patchCoords, + _patchTable, evalInstance, _deviceContext); + } + virtual void EvalPatchesWithDerivatives() { + EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator( + _evaluatorCache, _srcDesc, _vertexDesc, _deviceContext); + EVALUATOR::EvalPatches( + _srcData, _srcDesc, + _vertexData, _vertexDesc, + _derivatives, _duDesc, + _derivatives, _dvDesc, + _patchCoords->GetNumVertices(), + _patchCoords, + _patchTable, evalInstance, _deviceContext); + } + virtual void EvalPatchesVarying() { + EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator( + _evaluatorCache, _srcVaryingDesc, _varyingDesc, _deviceContext); + + EVALUATOR::EvalPatches( + _srcVaryingData, _srcVaryingDesc, + // varyingdata is interleved in vertexData. + _vertexData, _varyingDesc, + _patchCoords->GetNumVertices(), + _patchCoords, + _patchTable, evalInstance, _deviceContext); + } + virtual void UpdatePatchCoords( + std::vector const &patchCoords) { + if (_patchCoords and + _patchCoords->GetNumVertices() != (int)patchCoords.size()) { + delete _patchCoords; + _patchCoords = NULL; + } + if (not _patchCoords) { + _patchCoords = EVAL_VERTEX_BUFFER::Create(5, + (int)patchCoords.size(), + _deviceContext); + } + _patchCoords->UpdateData((float*)&patchCoords[0], 0, (int)patchCoords.size(), _deviceContext); + } +private: + SRC_VERTEX_BUFFER *_srcData; + SRC_VERTEX_BUFFER *_srcVaryingData; + EVAL_VERTEX_BUFFER *_vertexData; + EVAL_VERTEX_BUFFER *_derivatives; + EVAL_VERTEX_BUFFER *_varyingData; + EVAL_VERTEX_BUFFER *_patchCoords; + PATCH_TABLE *_patchTable; + Osd::VertexBufferDescriptor _srcDesc; + Osd::VertexBufferDescriptor _srcVaryingDesc; + Osd::VertexBufferDescriptor _vertexDesc; + Osd::VertexBufferDescriptor _varyingDesc; + Osd::VertexBufferDescriptor _duDesc; + Osd::VertexBufferDescriptor _dvDesc; + int _numCoarseVerts; + + STENCIL_TABLE const *_vertexStencils; + STENCIL_TABLE const *_varyingStencils; + + EvaluatorCache *_evaluatorCache; + DEVICE_CONTEXT *_deviceContext; +}; + +EvalOutputBase *g_evalOutput = NULL; STParticles * g_particles=0; //------------------------------------------------------------------------------ static void updateGeom() { - int nverts = (int)g_orgPositions.size() / 3; const float *p = &g_orgPositions[0]; @@ -255,28 +457,18 @@ updateGeom() { Stopwatch s; s.Start(); - g_vertexData->UpdateData( &g_positions[0], 0, nverts); + // update coarse vertices + g_evalOutput->UpdateData(&g_positions[0], 0, nverts); - if (! g_topologyRefiner) return; + // update coarse varying + if (g_drawMode == kVARYING) { + g_evalOutput->UpdateVaryingData(&g_varyingColors[0], 0, nverts); - // note that for patch eval we need coarse+refined combined buffer. - int nCoarseVertices = g_topologyRefiner->GetLevel(0).GetNumVertices(); - Osd::CpuEvaluator::EvalStencils(g_vertexData, - Osd::VertexBufferDescriptor(0, 3, 3), - g_vertexData, - Osd::VertexBufferDescriptor( - nCoarseVertices*3, 3, 3), - g_vertexStencils); - - if (g_varyingData) { - Osd::CpuEvaluator::EvalStencils(g_varyingData, - Osd::VertexBufferDescriptor(0, 3, 3), - g_varyingData, - Osd::VertexBufferDescriptor( - nCoarseVertices*3, 3, 3), - g_varyingStencils); } + // Refine + g_evalOutput->Refine(); + s.Stop(); g_computeTime = float(s.GetElapsed() * 1000.0f); @@ -287,60 +479,28 @@ updateGeom() { // Apply 'dynamics' update assert(g_particles); + g_particles->Update(g_evalTime); // XXXX g_evalTime is not really elapsed time... + std::vector const &patchCoords + = g_particles->GetPatchCoords(); - // resolve particle positions into patch handles - // XXX: this process should be handled by OsdKernel in parallel - g_patchCoords.clear(); - for (int i = 0; i < g_particles->GetNumParticles(); ++i) { - STParticles::Position const &position = g_particles->GetPositions()[i]; - Far::PatchTable::PatchHandle const *handle = - g_patchMap->FindPatch(position.ptexIndex, position.s, position.t); - if (handle) { - g_patchCoords.push_back(Osd::PatchCoord( - *handle, position.s, position.t)); - } - } + // update patchcoord to be evaluated + g_evalOutput->UpdatePatchCoords(patchCoords); // Evaluate the positions of the samples on the limit surface if (g_drawMode == kNORMAL || g_drawMode == kSHADE) { // evaluate positions and derivatives - g_nsamplesFound = Osd::CpuEvaluator::EvalPatches( - g_vertexData, g_idesc, - g_outVertexData, g_odesc, - g_outDerivatives, g_duDesc, - g_outDerivatives, g_dvDesc, - (int)g_patchCoords.size(), - &g_patchCoords[0], - g_patchTable, NULL); + g_evalOutput->EvalPatchesWithDerivatives(); } else { // evaluate positions - g_nsamplesFound = Osd::CpuEvaluator::EvalPatches( - g_vertexData, g_idesc, - g_outVertexData, g_odesc, - (int)g_patchCoords.size(), - &g_patchCoords[0], - g_patchTable, NULL); + g_evalOutput->EvalPatches(); } // color - if (g_drawMode == kUV) { - // store patchCoords as colors - float *p = g_outVertexData->BindCpuBuffer() + g_vdesc.offset; - for (int i = 0; i < (int)g_patchCoords.size(); ++i) { - p[0] = g_patchCoords[i].s; - p[1] = g_patchCoords[i].t; - p[2] = 0; - p += g_vdesc.stride; - } - } else if (g_drawMode == kVARYING) { + if (g_drawMode == kVARYING) { // XXX: is this really varying? - Osd::CpuEvaluator::EvalPatches(g_varyingData, g_idesc, - g_outVertexData, g_vdesc, - (int)g_patchCoords.size(), - &g_patchCoords[0], - g_patchTable, NULL); + g_evalOutput->EvalPatchesVarying(); } s.Stop(); @@ -359,8 +519,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape); OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape); - delete g_topologyRefiner; - g_topologyRefiner = + Far::TopologyRefiner *topologyRefiner = OpenSubdiv::Far::TopologyRefinerFactory::Create(*shape, OpenSubdiv::Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); @@ -371,22 +530,16 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { float speed = g_particles ? g_particles->GetSpeed() : 0.2f; - // Create the 'uv particles' manager - this class manages the limit - // location samples (ptex face index, (s,t) and updates them between frames. - // Note: the number of limit locations can be entirely arbitrary - delete g_particles; - g_particles = new STParticles(*g_topologyRefiner, g_nsamples, !g_randomStart); - g_nparticles = g_particles->GetNumParticles(); - g_particles->SetSpeed(speed); - - createCoarseMesh(*g_topologyRefiner); + createCoarseMesh(*topologyRefiner); + Far::StencilTable const * vertexStencils = NULL; + Far::StencilTable const * varyingStencils = NULL; int nverts=0; { // Apply feature adaptive refinement to the mesh so that we can use the // limit evaluation API features. Far::TopologyRefiner::AdaptiveOptions options(level); - g_topologyRefiner->RefineAdaptive(options); + topologyRefiner->RefineAdaptive(options); // Generate stencil table to update the bi-cubic patches control // vertices after they have been re-posed (both for vertex & varying @@ -395,27 +548,33 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { soptions.generateOffsets=true; soptions.generateIntermediateLevels=true; - Far::StencilTable const * vertexStencils = - Far::StencilTableFactory::Create(*g_topologyRefiner, soptions); + vertexStencils = + Far::StencilTableFactory::Create(*topologyRefiner, soptions); soptions.interpolationMode = Far::StencilTableFactory::INTERPOLATE_VARYING; - Far::StencilTable const * varyingStencils = - Far::StencilTableFactory::Create(*g_topologyRefiner, soptions); + + varyingStencils = + Far::StencilTableFactory::Create(*topologyRefiner, soptions); // Generate bi-cubic patch table for the limit surface Far::PatchTableFactory::Options poptions; - poptions.SetEndCapType( - Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS); + if (g_endCap == kEndCapBSplineBasis) { + poptions.SetEndCapType( + Far::PatchTableFactory::Options::ENDCAP_BSPLINE_BASIS); + } else { + poptions.SetEndCapType( + Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS); + } Far::PatchTable const * patchTable = - Far::PatchTableFactory::Create(*g_topologyRefiner, poptions); + Far::PatchTableFactory::Create(*topologyRefiner, poptions); // append endcap stencils if (Far::StencilTable const *endCapVertexStencilTable = patchTable->GetEndCapVertexStencilTable()) { Far::StencilTable const *table = Far::StencilTableFactory::AppendEndCapStencilTable( - *g_topologyRefiner, + *topologyRefiner, vertexStencils, endCapVertexStencilTable); delete vertexStencils; vertexStencils = table; @@ -424,7 +583,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { patchTable->GetEndCapVaryingStencilTable()) { Far::StencilTable const *table = Far::StencilTableFactory::AppendEndCapStencilTable( - *g_topologyRefiner, + *topologyRefiner, varyingStencils, endCapVaryingStencilTable); delete varyingStencils; varyingStencils = table; @@ -434,45 +593,100 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { nverts = vertexStencils->GetNumControlVertices() + vertexStencils->GetNumStencils(); - if (g_vertexStencils) delete g_vertexStencils; - g_vertexStencils = vertexStencils; - if (g_varyingStencils) delete g_varyingStencils; - g_varyingStencils = varyingStencils; - if (g_patchTable) delete g_patchTable; g_patchTable = patchTable; - - // Create a far patch map - if (g_patchMap) delete g_patchMap; - g_patchMap = new Far::PatchMap(*g_patchTable); } - { // Create vertex primvar buffer for the CVs - delete g_vertexData; - g_vertexData = Osd::CpuVertexBuffer::Create(3, nverts); + // note that for patch eval we need coarse+refined combined buffer. + int nCoarseVertices = topologyRefiner->GetLevel(0).GetNumVertices(); - // Create varying primvar buffer for the CVs with random colors. - // These are immediately interpolated (once) and saved for display. - delete g_varyingData; g_varyingData = 0; - if (g_drawMode==kVARYING) { - g_varyingData = Osd::CpuVertexBuffer::Create(3, nverts); - g_varyingData->UpdateData( - &g_varyingColors[0], 0, (int)g_varyingColors.size()/3 ); - } - - // Create output buffers for the limit samples (position & tangents) - delete g_outVertexData; - g_outVertexData = Osd::CpuGLVertexBuffer::Create(6, g_nparticles); - memset(g_outVertexData->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float)); - if (g_drawMode==kRANDOM) { - createRandomColors(g_nparticles, 6, g_outVertexData->BindCpuBuffer()+3); - } - - delete g_outDerivatives; - g_outDerivatives = Osd::CpuGLVertexBuffer::Create(6, g_nparticles); - memset(g_outDerivatives->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float)); + delete g_evalOutput; + if (g_kernel == kCPU) { + g_evalOutput = new EvalOutput + (vertexStencils, varyingStencils, + nCoarseVertices, nverts, g_nParticles, g_patchTable); +#ifdef OPENSUBDIV_HAS_OPENMP + } else if (g_kernel == kOPENMP) { + g_evalOutput = new EvalOutput + (vertexStencils, varyingStencils, + nCoarseVertices, nverts, g_nParticles, g_patchTable); +#endif +#ifdef OPENSUBDIV_HAS_TBB + } else if (g_kernel == kTBB) { + g_evalOutput = new EvalOutput + (vertexStencils, varyingStencils, + nCoarseVertices, nverts, g_nParticles, g_patchTable); +#endif +#ifdef OPENSUBDIV_HAS_CUDA + } else if (g_kernel == kCUDA) { + g_evalOutput = new EvalOutput + (vertexStencils, varyingStencils, + nCoarseVertices, nverts, g_nParticles, g_patchTable); +#endif +#ifdef OPENSUBDIV_HAS_OPENCL + } else if (g_kernel == kCL) { + static Osd::EvaluatorCacheT clEvaluatorCache; + g_evalOutput = new EvalOutput + (vertexStencils, varyingStencils, + nCoarseVertices, nverts, g_nParticles, g_patchTable, + &clEvaluatorCache, &g_clDeviceContext); +#endif +#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK + } else if (g_kernel == kGLXFB) { + static Osd::EvaluatorCacheT glXFBEvaluatorCache; + g_evalOutput = new EvalOutput + (vertexStencils, varyingStencils, + nCoarseVertices, nverts, g_nParticles, g_patchTable, + &glXFBEvaluatorCache); +#endif +#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE + } else if (g_kernel == kGLCompute) { + static Osd::EvaluatorCacheT glComputeEvaluatorCache; + g_evalOutput = new EvalOutput + (vertexStencils, varyingStencils, + nCoarseVertices, nverts, g_nParticles, g_patchTable, + &glComputeEvaluatorCache); +#endif } + // Create the 'uv particles' manager - this class manages the limit + // location samples (ptex face index, (s,t) and updates them between frames. + // Note: the number of limit locations can be entirely arbitrary + delete g_particles; + g_particles = new STParticles(*topologyRefiner, g_patchTable, + g_nParticles, !g_randomStart); + g_nParticles = g_particles->GetNumParticles(); + g_particles->SetSpeed(speed); + updateGeom(); } @@ -505,32 +719,33 @@ linkDefaultProgram() { "in vec3 color;\n" "in vec3 tangentU;\n" "in vec3 tangentV;\n" + "in vec2 patchCoord;\n" "out vec4 fragColor;\n" - "out vec3 normal;\n" "uniform mat4 ModelViewMatrix;\n" "uniform mat4 ProjectionMatrix;\n" + "uniform int DrawMode;\n" "void main() {\n" - " fragColor = vec4(color, 1);\n" - // XXX: fix the normal transform - " normal = (ModelViewMatrix * vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n" + " vec3 normal = (ModelViewMatrix * " + " vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n" " gl_Position = ProjectionMatrix * ModelViewMatrix * " " vec4(position, 1);\n" + " if (DrawMode == 0) {\n" // UV + " fragColor = vec4(patchCoord.x, patchCoord.y, 0, 1);\n" + " } else if (DrawMode == 2) {\n" + " fragColor = vec4(normal*0.5+vec3(0.5), 1);\n" + " } else if (DrawMode == 3) {\n" + " fragColor = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n" + " } else {\n" // varying + " fragColor = vec4(color, 1);\n" + " }\n" "}\n"; static const char *fsSrc = GLSL_VERSION_DEFINE "in vec4 fragColor;\n" - "in vec3 normal;\n" - "uniform int DrawMode;\n" "out vec4 color;\n" "void main() {\n" - " if (DrawMode == 3) {\n" - " color = vec4(normal*0.5+vec3(0.5), 1);\n" - " } else if (DrawMode == 4) {\n" - " color = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n" - " } else {\n" - " color = fragColor;\n" - " }\n" + " color = fragColor;\n" "}\n"; GLuint program = glCreateProgram(); @@ -544,6 +759,7 @@ linkDefaultProgram() { glBindAttribLocation(program, 1, "color"); glBindAttribLocation(program, 2, "tangentU"); glBindAttribLocation(program, 3, "tangentV"); + glBindAttribLocation(program, 4, "patchCoord"); glBindFragDataLocation(program, 0, "color"); glLinkProgram(program); @@ -571,6 +787,7 @@ linkDefaultProgram() { g_defaultProgram.attrColor = glGetAttribLocation(program, "color"); g_defaultProgram.attrTangentU = glGetAttribLocation(program, "tangentU"); g_defaultProgram.attrTangentV = glGetAttribLocation(program, "tangentV"); + g_defaultProgram.attrPatchCoord = glGetAttribLocation(program, "patchCoord"); return true; } @@ -621,6 +838,7 @@ drawCageEdges() { glEnableVertexAttribArray(g_defaultProgram.attrColor); glDisableVertexAttribArray(g_defaultProgram.attrTangentU); glDisableVertexAttribArray(g_defaultProgram.attrTangentV); + glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord); glVertexAttribPointer(g_defaultProgram.attrPosition, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0); glVertexAttribPointer(g_defaultProgram.attrColor, @@ -680,6 +898,7 @@ drawCageVertices() { glEnableVertexAttribArray(g_defaultProgram.attrColor); glDisableVertexAttribArray(g_defaultProgram.attrTangentU); glDisableVertexAttribArray(g_defaultProgram.attrTangentV); + glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord); glVertexAttribPointer(g_defaultProgram.attrPosition, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0); glVertexAttribPointer(g_defaultProgram.attrColor, @@ -711,27 +930,33 @@ drawSamples() { glEnableVertexAttribArray(g_defaultProgram.attrTangentU); glEnableVertexAttribArray(g_defaultProgram.attrTangentV); - glBindBuffer(GL_ARRAY_BUFFER, g_outVertexData->BindVBO()); + glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindVertexData()); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0); glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12); - glBindBuffer(GL_ARRAY_BUFFER, g_outDerivatives->BindVBO()); + glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindDerivatives()); glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0); glVertexAttribPointer(3, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12); + glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindPatchCoords()); + glVertexAttribPointer(4, 2, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 5, (float*)12); + glEnableVertexAttribArray(g_defaultProgram.attrPosition); glEnableVertexAttribArray(g_defaultProgram.attrColor); glEnableVertexAttribArray(g_defaultProgram.attrTangentU); glEnableVertexAttribArray(g_defaultProgram.attrTangentV); + glEnableVertexAttribArray(g_defaultProgram.attrPatchCoord); glPointSize(2.0f); - glDrawArrays(GL_POINTS, 0, g_nparticles); + int nPatchCoords = (int)g_particles->GetPatchCoords().size(); + glDrawArrays(GL_POINTS, 0, nPatchCoords); glPointSize(1.0f); glDisableVertexAttribArray(g_defaultProgram.attrPosition); glDisableVertexAttribArray(g_defaultProgram.attrColor); glDisableVertexAttribArray(g_defaultProgram.attrTangentU); glDisableVertexAttribArray(g_defaultProgram.attrTangentV); + glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord); glBindVertexArray(0); @@ -789,9 +1014,10 @@ display() { double fps = 1.0/g_fpsTimer.GetElapsed(); g_fpsTimer.Start(); + int nPatchCoords = (int)g_particles->GetPatchCoords().size(); + g_hud.DrawString(10, -150, "Particle Speed ([) (]): %.1f", g_particles->GetSpeed()); - g_hud.DrawString(10, -120, "# Samples : (%d/%d)", - g_nsamplesFound, g_outVertexData->GetNumVertices()); + g_hud.DrawString(10, -120, "# Samples : (%d / %d)", nPatchCoords, g_nParticles); g_hud.DrawString(10, -100, "Compute : %.3f ms", g_computeTime); g_hud.DrawString(10, -80, "Eval : %.3f ms", g_evalTime * 1000.f); g_hud.DrawString(10, -60, "GPU Draw : %.3f ms", drawGpuTime); @@ -884,9 +1110,11 @@ void windowClose(GLFWwindow*) { //------------------------------------------------------------------------------ static void setSamples(bool add) { - g_nsamples += add ? 50 : -50; - - g_nsamples = std::max(0, g_nsamples); + if (add) { + g_nParticles = g_nParticles * 2; + } else { + g_nParticles = std::max(1, g_nParticles / 2); + } createOsdMesh(g_defaultShapes[g_currentShape], g_level); } @@ -936,6 +1164,40 @@ callbackModel(int m) { createOsdMesh(g_defaultShapes[g_currentShape], g_level); } +//------------------------------------------------------------------------------ +static void +callbackEndCap(int endCap) { + g_endCap = endCap; + createOsdMesh(g_defaultShapes[g_currentShape], g_level); +} + +//------------------------------------------------------------------------------ +static void +callbackKernel(int k) { + + g_kernel = k; + +#ifdef OPENSUBDIV_HAS_OPENCL + if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) { + if (g_clDeviceContext.Initialize() == false) { + printf("Error in initializing OpenCL\n"); + exit(1); + } + } +#endif +#ifdef OPENSUBDIV_HAS_CUDA + if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) { + if (g_cudaDeviceContext.Initialize() == false) { + printf("Error in initializing Cuda\n"); + exit(1); + } + } +#endif + + createOsdMesh(g_defaultShapes[g_currentShape], g_level); + +} + //------------------------------------------------------------------------------ static void callbackLevel(int l) { @@ -1001,10 +1263,40 @@ initHUD() { g_hud.AddCheckBox("Animate vertices (M)", g_moveScale != 0, 10, 50, callbackAnimate, 0, 'm'); g_hud.AddCheckBox("Freeze (spc)", false, 10, 70, callbackFreeze, 0, ' '); - g_hud.AddCheckBox("Random Start", false, 10, 120, callbackCentered, g_randomStart); + g_hud.AddCheckBox("Random Start", g_randomStart, 10, 120, callbackCentered, 0); + + int compute_pulldown = g_hud.AddPullDown("Compute (K)", 475, 10, 300, + callbackKernel, 'k'); + g_hud.AddPullDownButton(compute_pulldown, "CPU", kCPU); +#ifdef OPENSUBDIV_HAS_OPENMP + g_hud.AddPullDownButton(compute_pulldown, "OPENMP", kOPENMP); +#endif +#ifdef OPENSUBDIV_HAS_TBB + g_hud.AddPullDownButton(compute_pulldown, "TBB", kTBB); +#endif +#ifdef OPENSUBDIV_HAS_CUDA + g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA); +#endif +#ifdef OPENSUBDIV_HAS_OPENCL + g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL); +#endif +#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK + g_hud.AddPullDownButton(compute_pulldown, "GL XFB", kGLXFB); +#endif +#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE + g_hud.AddPullDownButton(compute_pulldown, "GL Compute", kGLCompute); +#endif + + int endcap_pulldown = g_hud.AddPullDown("End cap (E)", 10, 140, 200, + callbackEndCap, 'e'); + g_hud.AddPullDownButton(endcap_pulldown, "BSpline", + kEndCapBSplineBasis, + g_endCap == kEndCapBSplineBasis); + g_hud.AddPullDownButton(endcap_pulldown, "GregoryBasis", + kEndCapGregoryBasis, + g_endCap == kEndCapGregoryBasis); int shading_pulldown = g_hud.AddPullDown("Shading (W)", 250, 10, 250, callbackDisplayVaryingColors, 'w'); - g_hud.AddPullDownButton(shading_pulldown, "Random", kRANDOM, g_drawMode==kRANDOM); g_hud.AddPullDownButton(shading_pulldown, "(u,v)", kUV, g_drawMode==kUV); g_hud.AddPullDownButton(shading_pulldown, "Varying", kVARYING, g_drawMode==kVARYING); g_hud.AddPullDownButton(shading_pulldown, "Normal", kNORMAL, g_drawMode==kNORMAL); diff --git a/examples/glEvalLimit/particles.cpp b/examples/glEvalLimit/particles.cpp index 5dee362d..fccbae97 100644 --- a/examples/glEvalLimit/particles.cpp +++ b/examples/glEvalLimit/particles.cpp @@ -25,38 +25,104 @@ #include "particles.h" #include +#include + +#ifdef OPENSUBDIV_HAS_TBB +#include +#include +tbb::atomic g_tbbCounter; +class TbbUpdateKernel { +public: + TbbUpdateKernel(float speed, + STParticles::Position *positions, + float *velocities, + std::vector const &adjacency, + OpenSubdiv::Osd::PatchCoord *patchCoords, + OpenSubdiv::Far::PatchMap const *patchMap) : + _speed(speed), _positions(positions), _velocities(velocities), + _adjacency(adjacency), _patchCoords(patchCoords), _patchMap(patchMap) { + } + + void operator () (tbb::blocked_range const &r) const { + for (int i = r.begin(); i < r.end(); ++i) { + STParticles::Position * p = _positions + i; + float *dp = _velocities + i*2; + + // apply velocity + p->s += dp[0] * _speed; + p->t += dp[1] * _speed; + + // make sure particles can't skip more than 1 face boundary at a time + assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f)); + + // check if the particle is jumping a boundary + // note: a particle can jump 2 edges at a time (a "diagonal" jump) + // this is not treated here. + int edge = -1; + if (p->s >= 1.0f) edge = 1; + if (p->s <= 0.0f) edge = 3; + if (p->t >= 1.0f) edge = 2; + if (p->t <= 0.0f) edge = 0; + + if (edge>=0) { + // warp the particle to the other side of the boundary + STParticles::WarpParticle(_adjacency, edge, p, dp); + } + assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f)); + + // resolve particle positions into patch handles + OpenSubdiv::Far::PatchTable::PatchHandle const *handle = + _patchMap->FindPatch(p->ptexIndex, p->s, p->t); + if (handle) { + int index = g_tbbCounter.fetch_and_add(1); + _patchCoords[index] = + OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t); + } + } + } +private: + float _speed; + STParticles::Position *_positions; + float *_velocities; + std::vector const &_adjacency; + OpenSubdiv::Osd::PatchCoord *_patchCoords; + OpenSubdiv::Far::PatchMap const *_patchMap; +}; +#endif #include -STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered) : +STParticles::STParticles(Refiner const & refiner, + PatchTable const *patchTable, + int nParticles, bool centered) : _speed(1.0f) { OpenSubdiv::Far::PtexIndices ptexIndices(refiner); - int nptexfaces = ptexIndices.GetNumFaces(), - nsamples = nptexfaces * nparticles; + // Create a far patch map + _patchMap = new OpenSubdiv::Far::PatchMap(*patchTable); + + int nPtexFaces = ptexIndices.GetNumFaces(); srand(static_cast(2147483647)); { // initialize positions - _positions.resize(nsamples); + _positions.resize(nParticles); Position * pos = &_positions[0]; - for (int i=0; iptexIndex = i; - pos->s = centered ? 0.5f : (float)rand()/(float)RAND_MAX; - pos->t = centered ? 0.5f : (float)rand()/(float)RAND_MAX; - ++pos; - } + for (int i = 0; i < nParticles; ++i) { + pos->ptexIndex = (int)(((float)rand()/(float)RAND_MAX) * nPtexFaces); + pos->s = centered ? 0.5f : (float)rand()/(float)RAND_MAX; + pos->t = centered ? 0.5f : (float)rand()/(float)RAND_MAX; + ++pos; } } { // initialize velocities - _velocities.resize(nsamples*2); + _velocities.resize(nParticles * 2); - for (int i=0; is += dp[0] * speed; - p->t += dp[1] * speed; - - // make sure particles can't skip more than 1 face boundary at a time - assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f)); - - // check if the particle is jumping a boundary - // note: a particle can jump 2 edges at a time (a "diagonal" jump) - // this is not treated here. - int edge = -1; - - if (p->s >= 1.0f) edge = 1; - if (p->s <= 0.0f) edge = 3; - if (p->t >= 1.0f) edge = 2; - if (p->t <= 0.0f) edge = 0; - - if (edge>=0) { - // warp the particle to the other side of the boundary - warpParticle(edge, p, dp); - } - assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f)); - } - -} - inline void FlipS(STParticles::Position * p, float * dp) { p->s = 1.0f-p->s; @@ -163,6 +193,15 @@ Rotate(int rot, STParticles::Position * p, float * dp) { assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f)); } +inline void +Trim(STParticles::Position * p) { + if (p->s <0.0f) p->s = 1.0f + p->s; + if (p->s>=1.0f) p->s = p->s - 1.0f; + if (p->t <0.0f) p->t = 1.0f + p->t; + if (p->t>=1.0f) p->t = p->t - 1.0f; + assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f)); +} + inline void Clamp(STParticles::Position * p) { if (p->s<0.0f) { @@ -192,21 +231,12 @@ Bounce(int edge, STParticles::Position * p, float * dp) { assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f)); } -inline void -Trim(STParticles::Position * p) { - if (p->s <0.0f) p->s = 1.0f + p->s; - if (p->s>=1.0f) p->s = p->s - 1.0f; - if (p->t <0.0f) p->t = 1.0f + p->t; - if (p->t>=1.0f) p->t = p->t - 1.0f; - assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f)); -} - void -STParticles::warpParticle(int edge, Position * p, float * dp) { - - assert(p->ptexIndex<(int)_adjacency.size() and (edge>=0 and edge<4)); +STParticles::WarpParticle(std::vector const &adjacency, + int edge, Position * p, float * dp) { + assert(p->ptexIndex<(int)adjacency.size() and (edge>=0 and edge<4)); - FaceInfo const & f = _adjacency[p->ptexIndex]; + FaceInfo const & f = adjacency[p->ptexIndex]; int afid = f.adjface(edge), aeid = f.adjedge(edge); @@ -215,7 +245,7 @@ STParticles::warpParticle(int edge, Position * p, float * dp) { // boundary detected: bounce the particle Bounce(edge, p, dp); } else { - FaceInfo const & af = _adjacency[afid]; + FaceInfo const & af = adjacency[afid]; int rot = edge - aeid + 2; bool fIsSubface = f.isSubface(), @@ -233,6 +263,66 @@ STParticles::warpParticle(int edge, Position * p, float * dp) { assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f)); } +STParticles::~STParticles() { + delete _patchMap; +} + +void +STParticles::Update(float deltaTime) { + + if (fabs(GetSpeed()) < 0.001f) return; + float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f)); + + _patchCoords.clear(); + + // XXX: this process should be parallelized. +#ifdef OPENSUBDIV_HAS_TBB + + _patchCoords.resize((int)GetNumParticles()); + TbbUpdateKernel kernel(speed, &_positions[0], &_velocities[0], + _adjacency, &_patchCoords[0], _patchMap);; + g_tbbCounter = 0; + tbb::blocked_range range(0, GetNumParticles(), 256); + tbb::parallel_for(range, kernel); + _patchCoords.resize(g_tbbCounter); +#else + Position * p = &_positions[0]; + float * dp = &_velocities[0]; + for (int i=0; is += dp[0] * speed; + p->t += dp[1] * speed; + + // make sure particles can't skip more than 1 face boundary at a time + assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f)); + + // check if the particle is jumping a boundary + // note: a particle can jump 2 edges at a time (a "diagonal" jump) + // this is not treated here. + int edge = -1; + + if (p->s >= 1.0f) edge = 1; + if (p->s <= 0.0f) edge = 3; + if (p->t >= 1.0f) edge = 2; + if (p->t <= 0.0f) edge = 0; + + if (edge>=0) { + // warp the particle to the other side of the boundary + WarpParticle(_adjacency, edge, p, dp); + } + assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f)); + + // resolve particle positions into patch handles + OpenSubdiv::Far::PatchTable::PatchHandle const *handle = + _patchMap->FindPatch(p->ptexIndex, p->s, p->t); + if (handle) { + _patchCoords.push_back( + OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t)); + } + } +#endif +} + // Dump adjacency info std::ostream & operator << (std::ostream & os, STParticles::FaceInfo const & f) { diff --git a/examples/glEvalLimit/particles.h b/examples/glEvalLimit/particles.h index ea1f798d..887add25 100644 --- a/examples/glEvalLimit/particles.h +++ b/examples/glEvalLimit/particles.h @@ -26,7 +26,8 @@ #define ST_PARTICLES_H #include - +#include +#include #include // @@ -72,47 +73,6 @@ public: float s, t; ///< parametric location on face }; - typedef OpenSubdiv::Far::TopologyRefiner Refiner; - - STParticles(Refiner const & refiner, int nparticles, bool centered=false); - - void Update(float deltaTime); - - int GetNumParticles() const { - return (int)_positions.size(); - } - - void SetSpeed(float speed) { - _speed = std::max(-1.0f, std::min(1.0f, speed)); - } - - float GetSpeed() const { - return _speed; - } - - std::vector & GetPositions() { - return _positions; - } - - std::vector & GetVelocities() { - return _velocities; - } - - friend std::ostream & operator << (std::ostream & os, STParticles const & f); - -private: - - // - // Particle "Dynamics" - // - std::vector _positions; - - std::vector _velocities; - - float _speed; // velocity multiplier - -private: - // // Topology adjacency (borrowed from Ptexture.h) // @@ -152,11 +112,63 @@ private: int adjfaces[4]; }; - void warpParticle(int edge, Position * p, float * dp); + typedef OpenSubdiv::Far::TopologyRefiner Refiner; + typedef OpenSubdiv::Far::PatchTable PatchTable; + + STParticles(Refiner const & refiner, PatchTable const *patchTable, + int nparticles, bool centered=false); + + ~STParticles(); + + void Update(float deltaTime); + + int GetNumParticles() const { + return (int)_positions.size(); + } + + void SetSpeed(float speed) { + _speed = std::max(-1.0f, std::min(1.0f, speed)); + } + + float GetSpeed() const { + return _speed; + } + + std::vector & GetPositions() { + return _positions; + } + + std::vector & GetVelocities() { + return _velocities; + } + + std::vector GetPatchCoords() const { + return _patchCoords; + } + + friend std::ostream & operator << (std::ostream & os, STParticles const & f); + + static void WarpParticle(std::vector const &adjacency, + int edge, Position * p, float * dp); + +private: + + // + // Particle "Dynamics" + // + std::vector _positions; + + std::vector _velocities; + + std::vector _patchCoords; + + float _speed; // velocity multiplier friend std::ostream & operator << (std::ostream & os, FaceInfo const & f); + std::vector _adjacency; + OpenSubdiv::Far::PatchMap const *_patchMap; }; #endif // ST_PARTICLES_H diff --git a/examples/glFVarViewer/glFVarViewer.cpp b/examples/glFVarViewer/glFVarViewer.cpp index faea611a..954d263e 100644 --- a/examples/glFVarViewer/glFVarViewer.cpp +++ b/examples/glFVarViewer/glFVarViewer.cpp @@ -786,7 +786,7 @@ bindTextures() { } static GLenum -bindProgram(Effect effect, OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) { +bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) { EffectDesc effectDesc(patch.GetDescriptor(), effect); @@ -868,7 +868,7 @@ display() { glBindVertexArray(g_vao); - OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches = + OpenSubdiv::Osd::PatchArrayVector const & patches = g_mesh->GetPatchTable()->GetPatchArrays(); if (g_displayStyle == kWire) @@ -879,7 +879,7 @@ display() { // patch drawing for (int i = 0; i < (int)patches.size(); ++i) { - OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i]; + OpenSubdiv::Osd::PatchArray const & patch = patches[i]; GLenum primType = bindProgram(GetEffect(), patch); @@ -909,7 +909,7 @@ display() { glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); for (int i = 0; i < (int)patches.size(); ++i) { - OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i]; + OpenSubdiv::Osd::PatchArray const & patch = patches[i]; GLenum primType = bindProgram(GetEffect(/*uvDraw=*/ true), patch); diff --git a/examples/glImaging/glImaging.cpp b/examples/glImaging/glImaging.cpp old mode 100755 new mode 100644 index 1e0c38fb..a58e2dab --- a/examples/glImaging/glImaging.cpp +++ b/examples/glImaging/glImaging.cpp @@ -422,11 +422,11 @@ void runTest(ShapeDesc const &shapeDesc, std::string const &kernel, mesh->GetPatchTable()->GetPatchParamTextureBuffer()); } - Osd::GLPatchTable::PatchArrayVector const & patches = + Osd::PatchArrayVector const & patches = mesh->GetPatchTable()->GetPatchArrays(); for (int i=0; i<(int)patches.size(); ++i) { - Osd::GLPatchTable::PatchArray const & patch = patches[i]; + Osd::PatchArray const & patch = patches[i]; Far::PatchDescriptor desc = patch.GetDescriptor(); Far::PatchDescriptor::Type patchType = desc.GetType(); diff --git a/examples/glPaintTest/glPaintTest.cpp b/examples/glPaintTest/glPaintTest.cpp index d75f1039..4e13a61e 100644 --- a/examples/glPaintTest/glPaintTest.cpp +++ b/examples/glPaintTest/glPaintTest.cpp @@ -597,7 +597,7 @@ static void bindTextures(Effect effect) { } static GLuint -bindProgram(Effect effect, OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) { +bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) { EffectDesc effectDesc(patch.GetDescriptor(), effect); @@ -670,12 +670,12 @@ display() { glBindVertexArray(g_vao); - OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches = + OpenSubdiv::Osd::PatchArrayVector const & patches = g_mesh->GetPatchTable()->GetPatchArrays(); // patch drawing for (int i=0; i<(int)patches.size(); ++i) { - OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i]; + OpenSubdiv::Osd::PatchArray const & patch = patches[i]; OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); GLenum primType = GL_PATCHES; @@ -807,13 +807,13 @@ drawStroke(int x, int y) { effect.paint = 1; bindTextures(effect); - OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches = + OpenSubdiv::Osd::PatchArrayVector const & patches = g_mesh->GetPatchTable()->GetPatchArrays(); // patch drawing for (int i=0; i<(int)patches.size(); ++i) { - OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i]; + OpenSubdiv::Osd::PatchArray const & patch = patches[i]; OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); GLenum primType = GL_PATCHES; diff --git a/examples/glPtexViewer/glPtexViewer.cpp b/examples/glPtexViewer/glPtexViewer.cpp index 1a27fd98..67e4bb31 100644 --- a/examples/glPtexViewer/glPtexViewer.cpp +++ b/examples/glPtexViewer/glPtexViewer.cpp @@ -1249,7 +1249,7 @@ bindTextures() { //------------------------------------------------------------------------------ static GLenum bindProgram(Effect effect, - OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) { + OpenSubdiv::Osd::PatchArray const & patch) { EffectDesc effectDesc(patch.GetDescriptor(), effect); GLDrawConfig *config = g_shaderCache.GetDrawConfig(effectDesc); @@ -1296,10 +1296,10 @@ drawModel() { glBindVertexArray(g_vao); // patch drawing - OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches = + OpenSubdiv::Osd::PatchArrayVector const & patches = g_mesh->GetPatchTable()->GetPatchArrays(); for (int i = 0; i < (int)patches.size(); ++i) { - OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i]; + OpenSubdiv::Osd::PatchArray const & patch = patches[i]; Effect effect; effect.value = 0; diff --git a/examples/glViewer/glViewer.cpp b/examples/glViewer/glViewer.cpp index cf2663c8..38f7af7a 100644 --- a/examples/glViewer/glViewer.cpp +++ b/examples/glViewer/glViewer.cpp @@ -1181,7 +1181,7 @@ bindTextures() { static GLenum bindProgram(Effect effect, - OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) { + OpenSubdiv::Osd::PatchArray const & patch) { EffectDesc effectDesc(patch.GetDescriptor(), effect); // only legacy gregory needs maxValence and numElements @@ -1299,7 +1299,7 @@ display() { glBindVertexArray(g_vao); - OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches = + OpenSubdiv::Osd::PatchArrayVector const & patches = g_mesh->GetPatchTable()->GetPatchArrays(); // patch drawing @@ -1316,7 +1316,7 @@ display() { // core draw-calls for (int i=0; i<(int)patches.size(); ++i) { - OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i]; + OpenSubdiv::Osd::PatchArray const & patch = patches[i]; OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType(); diff --git a/opensubdiv/osd/CMakeLists.txt b/opensubdiv/osd/CMakeLists.txt index 4994ef64..134e6b30 100644 --- a/opensubdiv/osd/CMakeLists.txt +++ b/opensubdiv/osd/CMakeLists.txt @@ -29,6 +29,7 @@ set(CPU_SOURCE_FILES cpuEvaluator.cpp cpuKernel.cpp + cpuPatchTable.cpp cpuVertexBuffer.cpp ) @@ -43,10 +44,12 @@ set(PRIVATE_HEADER_FILES set(PUBLIC_HEADER_FILES cpuEvaluator.h + cpuPatchTable.h cpuVertexBuffer.h mesh.h nonCopyable.h opengl.h + types.h vertexDescriptor.h ) @@ -216,6 +219,7 @@ list(APPEND DOXY_HEADER_FILES ${DXSDK_PUBLIC_HEADERS}) # OpenCL code & dependencies set(OPENCL_PUBLIC_HEADERS clEvaluator.h + clPatchTable.h clVertexBuffer.h opencl.h ) @@ -223,6 +227,7 @@ set(OPENCL_PUBLIC_HEADERS if ( OPENCL_FOUND ) list(APPEND GPU_SOURCE_FILES clEvaluator.cpp + clPatchTable.cpp clVertexBuffer.cpp ) list(APPEND PUBLIC_HEADER_FILES ${OPENCL_PUBLIC_HEADERS}) @@ -254,12 +259,14 @@ list(APPEND DOXY_HEADER_FILES ${OPENCL_PUBLIC_HEADERS}) # CUDA code & dependencies set(CUDA_PUBLIC_HEADERS cudaEvaluator.h + cudaPatchTable.h cudaVertexBuffer.h ) if( CUDA_FOUND ) list(APPEND GPU_SOURCE_FILES cudaEvaluator.cpp + cudaPatchTable.cpp cudaVertexBuffer.cpp ) list(APPEND PUBLIC_HEADER_FILES ${CUDA_PUBLIC_HEADERS}) diff --git a/opensubdiv/osd/clEvaluator.cpp b/opensubdiv/osd/clEvaluator.cpp index 4c2efcf8..8708fd20 100644 --- a/opensubdiv/osd/clEvaluator.cpp +++ b/opensubdiv/osd/clEvaluator.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include "../osd/opencl.h" #include "../far/error.h" @@ -87,11 +88,12 @@ CLStencilTable::~CLStencilTable() { CLEvaluator::CLEvaluator(cl_context context, cl_command_queue queue) : _clContext(context), _clCommandQueue(queue), - _program(NULL), _stencilsKernel(NULL) { + _program(NULL), _stencilKernel(NULL), _patchKernel(NULL) { } CLEvaluator::~CLEvaluator() { - if (_stencilsKernel) clReleaseKernel(_stencilsKernel); + if (_stencilKernel) clReleaseKernel(_stencilKernel); + if (_patchKernel) clReleaseKernel(_patchKernel); if (_program) clReleaseProgram(_program); } @@ -145,7 +147,13 @@ CLEvaluator::Compile(VertexBufferDescriptor const &srcDesc, return false; } - _stencilsKernel = clCreateKernel(_program, "computeStencils", &errNum); + _stencilKernel = clCreateKernel(_program, "computeStencils", &errNum); + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum); + return false; + } + + _patchKernel = clCreateKernel(_program, "computePatches", &errNum); if (errNum != CL_SUCCESS) { Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum); @@ -169,24 +177,24 @@ CLEvaluator::EvalStencils(cl_mem src, size_t globalWorkSize = (size_t)(end - start); - clSetKernelArg(_stencilsKernel, 0, sizeof(cl_mem), &src); - clSetKernelArg(_stencilsKernel, 1, sizeof(int), &srcDesc.offset); - clSetKernelArg(_stencilsKernel, 2, sizeof(cl_mem), &dst); - clSetKernelArg(_stencilsKernel, 3, sizeof(int), &dstDesc.offset); - clSetKernelArg(_stencilsKernel, 4, sizeof(cl_mem), &sizes); - clSetKernelArg(_stencilsKernel, 5, sizeof(cl_mem), &offsets); - clSetKernelArg(_stencilsKernel, 6, sizeof(cl_mem), &indices); - clSetKernelArg(_stencilsKernel, 7, sizeof(cl_mem), &weights); - clSetKernelArg(_stencilsKernel, 8, sizeof(int), &start); - clSetKernelArg(_stencilsKernel, 9, sizeof(int), &end); + clSetKernelArg(_stencilKernel, 0, sizeof(cl_mem), &src); + clSetKernelArg(_stencilKernel, 1, sizeof(int), &srcDesc.offset); + clSetKernelArg(_stencilKernel, 2, sizeof(cl_mem), &dst); + clSetKernelArg(_stencilKernel, 3, sizeof(int), &dstDesc.offset); + clSetKernelArg(_stencilKernel, 4, sizeof(cl_mem), &sizes); + clSetKernelArg(_stencilKernel, 5, sizeof(cl_mem), &offsets); + clSetKernelArg(_stencilKernel, 6, sizeof(cl_mem), &indices); + clSetKernelArg(_stencilKernel, 7, sizeof(cl_mem), &weights); + clSetKernelArg(_stencilKernel, 8, sizeof(int), &start); + clSetKernelArg(_stencilKernel, 9, sizeof(int), &end); cl_int errNum = clEnqueueNDRangeKernel( - _clCommandQueue, _stencilsKernel, 1, NULL, + _clCommandQueue, _stencilKernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL); if (errNum != CL_SUCCESS) { Far::Error(Far::FAR_RUNTIME_ERROR, - "ApplyStencilTableKernel (%d) ", errNum); + "ApplyStencilKernel (%d) ", errNum); return false; } @@ -194,6 +202,51 @@ CLEvaluator::EvalStencils(cl_mem src, return true; } +bool +CLEvaluator::EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc, + cl_mem dst, VertexBufferDescriptor const &dstDesc, + cl_mem du, VertexBufferDescriptor const &duDesc, + cl_mem dv, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + cl_mem patchCoordsBuffer, + cl_mem patchArrayBuffer, + cl_mem patchIndexBuffer, + cl_mem patchParamBuffer) const { + + size_t globalWorkSize = (size_t)(numPatchCoords); + + clSetKernelArg(_patchKernel, 0, sizeof(cl_mem), &src); + clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset); + clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst); + clSetKernelArg(_patchKernel, 3, sizeof(int), &dstDesc.offset); + clSetKernelArg(_patchKernel, 4, sizeof(int), &numPatchCoords); + // clSetKernelArg(_patchKernel, 4, sizeof(cl_mem), &du); + // clSetKernelArg(_patchKernel, 5, sizeof(int), &duDesc.offset); + // clSetKernelArg(_patchKernel, 6, sizeof(int), &duDesc.stride); + // clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv); + // clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset); + // clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride); + clSetKernelArg(_patchKernel, 5, sizeof(cl_mem), &patchCoordsBuffer); + clSetKernelArg(_patchKernel, 6, sizeof(cl_mem), &patchArrayBuffer); + clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &patchIndexBuffer); + clSetKernelArg(_patchKernel, 8, sizeof(cl_mem), &patchParamBuffer); + + cl_int errNum = clEnqueueNDRangeKernel( + _clCommandQueue, _patchKernel, 1, NULL, + &globalWorkSize, NULL, 0, NULL, NULL); + + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "ApplyPatchKernel (%d) ", errNum); + return false; + } + + clFinish(_clCommandQueue); + return true; +} + + + /* static */ void CLEvaluator::Synchronize(cl_command_queue clCommandQueue) { diff --git a/opensubdiv/osd/clEvaluator.h b/opensubdiv/osd/clEvaluator.h index 62780fe5..e2857110 100644 --- a/opensubdiv/osd/clEvaluator.h +++ b/opensubdiv/osd/clEvaluator.h @@ -28,6 +28,7 @@ #include "../version.h" #include "../osd/opencl.h" +#include "../osd/types.h" #include "../osd/vertexDescriptor.h" namespace OpenSubdiv { @@ -75,9 +76,6 @@ private: // --------------------------------------------------------------------------- -/// \brief OpenCL stencil kernel -/// -/// class CLEvaluator { public: typedef bool Instantiatable; @@ -107,6 +105,12 @@ public: return NULL; } + /// ---------------------------------------------------------------------- + /// + /// Stencil evaluations with StencilTable + /// + /// ---------------------------------------------------------------------- + /// \brief Generic static compute function. This function has a same /// signature as other device kernels have so that it can be called /// transparently from OsdMesh template interface. @@ -124,7 +128,7 @@ public: /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param stencilTable stencil table to be applied. The table must have - /// OpenCL memory interfaces. + /// SSBO interfaces. /// /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide @@ -137,25 +141,25 @@ public: /// cl_command_queue GetCommandQueue() /// methods. /// - template - static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, - VertexBufferDescriptor const &srcDesc, - VERTEX_BUFFER *dstVertexBuffer, - VertexBufferDescriptor const &dstDesc, - STENCIL_TABLE const *stencilTable, - CLEvaluator const *instance, - DEVICE_CONTEXT deviceContext) { + template + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + CLEvaluator const *instance, + DEVICE_CONTEXT deviceContext) { + if (instance) { - return instance->EvalStencils(srcVertexBuffer, srcDesc, - dstVertexBuffer, dstDesc, + return instance->EvalStencils(srcBuffer, srcDesc, + dstBuffer, dstDesc, stencilTable); } else { // Create an instance on demand (slow) instance = Create(srcDesc, dstDesc, deviceContext); if (instance) { - bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, - dstVertexBuffer, dstDesc, + bool r = instance->EvalStencils(srcBuffer, srcDesc, + dstBuffer, dstDesc, stencilTable); delete instance; return r; @@ -167,15 +171,14 @@ public: /// Generic compute function. /// Dispatch the CL compute kernel asynchronously. /// Returns false if the kernel hasn't been compiled yet. - template - bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, - VertexBufferDescriptor const &srcDesc, - VERTEX_BUFFER *dstVertexBuffer, - VertexBufferDescriptor const &dstDesc, - STENCIL_TABLE const *stencilTable) const { - return EvalStencils(srcVertexBuffer->BindCLBuffer(_clCommandQueue), + template + bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const { + return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, - dstVertexBuffer->BindCLBuffer(_clCommandQueue), + dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, stencilTable->GetSizesBuffer(), stencilTable->GetOffsetsBuffer(), @@ -187,10 +190,8 @@ public: /// Dispatch the CL compute kernel asynchronously. /// returns false if the kernel hasn't been compiled yet. - bool EvalStencils(cl_mem src, - VertexBufferDescriptor const &srcDesc, - cl_mem dst, - VertexBufferDescriptor const &dstDesc, + bool EvalStencils(cl_mem src, VertexBufferDescriptor const &srcDesc, + cl_mem dst, VertexBufferDescriptor const &dstDesc, cl_mem sizes, cl_mem offsets, cl_mem indices, @@ -198,6 +199,278 @@ public: int start, int end) const; + /// ---------------------------------------------------------------------- + /// + /// Limit evaluations with PatchTable + /// + /// ---------------------------------------------------------------------- + /// + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCLBuffer() method returning a CL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCLBuffer() method returning a CL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindCLBuffer() method returning an + /// array of PatchCoord struct. + /// + /// @param patchTable CLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext client providing context class which supports + /// cL_context GetContext() + /// cl_command_queue GetCommandQueue() + /// methods. + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + CLEvaluator const *instance, + DEVICE_CONTEXT deviceContext) { + + if (instance) { + return instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + numPatchCoords, patchCoords, + patchTable); + } else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, deviceContext); + if (instance) { + bool r = instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + numPatchCoords, patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCLBuffer() method returning a CL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCLBuffer() method returning a CL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer + /// + /// @param duDesc + /// + /// @param dvBuffer + /// + /// @param dvDesc + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindCLBuffer() method returning an + /// array of PatchCoord struct + /// + /// @param patchTable CLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext client providing context class which supports + /// cL_context GetContext() + /// cl_command_queue GetCommandQueue() + /// methods. + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + CLEvaluator const *instance, + DEVICE_CONTEXT deviceContext) { + + if (instance) { + return instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + duBuffer, duDesc, + dvBuffer, dvDesc, + numPatchCoords, patchCoords, + patchTable); + } else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, deviceContext); + if (instance) { + bool r = instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + duBuffer, duDesc, + dvBuffer, dvDesc, + numPatchCoords, patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCLBuffer() method returning a CL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCLBuffer() method returning a CL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindCLBuffer() method returning an + /// array of PatchCoord struct. + /// + /// @param patchTable CLPatchTable or equivalent + /// + template + bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const { + + return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, + dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, + 0, VertexBufferDescriptor(), + 0, VertexBufferDescriptor(), + numPatchCoords, + patchCoords->BindCLBuffer(_clCommandQueue), + patchTable->GetPatchArrayBuffer(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function with derivatives. This function has + /// a same signature as other device kernels have so that it can be + /// called in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCLBuffer() method returning a CL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCLBuffer() method returning a CL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output U-derivatives buffer + /// must have BindCLBuffer() method returning a CL + /// buffer object of destination data of Du + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output V-derivatives buffer + /// must have BindCLBuffer() method returning a CL + /// buffer object of destination data of Dv + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable CLPatchTable or equivalent + /// + template + bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const { + + return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc, + dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc, + duBuffer->BindCLBuffer(_clCommandQueue), duDesc, + dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc, + numPatchCoords, + patchCoords->BindCLBuffer(_clCommandQueue), + patchTable->GetPatchArrayBuffer(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + bool EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc, + cl_mem dst, VertexBufferDescriptor const &dstDesc, + cl_mem du, VertexBufferDescriptor const &duDesc, + cl_mem dv, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + cl_mem patchCoordsBuffer, + cl_mem patchArrayBuffer, + cl_mem patchIndexBuffer, + cl_mem patchParamsBuffer) const; + + /// ---------------------------------------------------------------------- + /// + /// Other methods + /// + /// ---------------------------------------------------------------------- + /// Configure OpenCL kernel. /// Returns false if it fails to compile the kernel. bool Compile(VertexBufferDescriptor const &srcDesc, @@ -215,7 +488,8 @@ private: cl_context _clContext; cl_command_queue _clCommandQueue; cl_program _program; - cl_kernel _stencilsKernel; + cl_kernel _stencilKernel; + cl_kernel _patchKernel; }; diff --git a/opensubdiv/osd/clKernel.cl b/opensubdiv/osd/clKernel.cl index 38c2c51e..372887e5 100644 --- a/opensubdiv/osd/clKernel.cl +++ b/opensubdiv/osd/clKernel.cl @@ -85,3 +85,118 @@ __kernel void computeStencils(__global float * src, writeVertex(dst, current, &v); } + +// --------------------------------------------------------------------------- + +struct PatchArray { + int patchType; + int numPatches; + int indexBase; // an offset within the index buffer + int primitiveIdBase; // an offset within the patch param buffer +}; + +struct PatchCoord { + int arrayIndex; + int patchIndex; + int vertIndex; + float s; + float t; +}; + +struct PatchParam { + int faceIndex; + uint patchBits; + float sharpness; +}; + +static void getBSplineWeights(float t, float *point, float *deriv) { + // The four uniform cubic B-Spline basis functions evaluated at t: + float one6th = 1.0f / 6.0f; + + float t2 = t * t; + float t3 = t * t2; + + point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3); + point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3); + point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3)); + point[3] = one6th * ( t3); + + // Derivatives of the above four basis functions at t: + deriv[0] = -0.5f*t2 + t - 0.5f; + deriv[1] = 1.5f*t2 - 2.0f*t; + deriv[2] = -1.5f*t2 + t + 0.5f; + deriv[3] = 0.5f*t2; +} + +__kernel void computePatches(__global float *src, int srcOffset, + __global float *dst, int dstOffset, +// __global float *du, int duOffset, int duStride, +// __global float *dv, int dvOffset, int dvStride, + int numPatchCoords, + __global struct PatchCoord *patchCoords, + __global struct PatchArray *patchArrayBuffer, + __global int *patchIndexBuffer, + __global struct PatchParam *patchParamBuffer) { + int current = get_global_id(0); + + if (current > numPatchCoords) return; + + src += srcOffset; + dst += dstOffset; + // du += duOffset; + // dv += dvOffset; + + struct PatchCoord coord = patchCoords[current]; + int patchIndex = coord.patchIndex; +// struct PatchArray array = patchArrayBuffer[coord.arrayIndex]; + struct PatchArray array = patchArrayBuffer[0]; + + int patchType = 6; // array.x XXX: REGULAR only for now. + int numControlVertices = 16; + + uint patchBits = patchParamBuffer[patchIndex].patchBits; +// vec2 uv = normalizePatchCoord(patchBits, vec2(coord.s, coord.t)); + float dScale = 1.0f;//float(1 << getDepth(patchBits)); + + float uv[2] = {coord.s, coord.t}; + + float wP[20], wDs[20], wDt[20]; + if (patchType == 6) { // REGULAR + float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4]; + getBSplineWeights(uv[0], sWeights, dsWeights); + getBSplineWeights(uv[1], tWeights, dtWeights); + +// adjustBoundaryWeights(patchBits, sWeights, tWeights); +// adjustBoundaryWeights(patchBits, dsWeights, dtWeights); + + for (int k = 0; k < 4; ++k) { + for (int l = 0; l < 4; ++l) { + wP[4*k+l] = sWeights[l] * tWeights[k]; + wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale; + wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale; + } + } + } else { + // TODO: GREGORY BASIS + } + + struct Vertex v; + clear(&v); + +#if 1 + // debug + v.v[0] = uv[0]; + v.v[1] = uv[1]; + v.v[2] = patchIndexBuffer[current] * 0.1; + writeVertex(dst, current, &v); + return; +#endif + + int indexBase = array.indexBase + coord.vertIndex; + for (int i = 0; i < numControlVertices; ++i) { + int index = patchIndexBuffer[indexBase + i]; + if (index < 0) index = 0; + addWithWeight(&v, src, index, wP[i]); + } + writeVertex(dst, current, &v); +} diff --git a/opensubdiv/osd/clPatchTable.cpp b/opensubdiv/osd/clPatchTable.cpp new file mode 100644 index 00000000..2fb03730 --- /dev/null +++ b/opensubdiv/osd/clPatchTable.cpp @@ -0,0 +1,99 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/clPatchTable.h" + +#include "../far/error.h" +#include "../far/patchTable.h" +#include "../osd/opencl.h" +#include "../osd/cpuPatchTable.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +CLPatchTable::CLPatchTable() : + _patchArrays(NULL), _indexBuffer(NULL), _patchParamBuffer(NULL) { +} + +CLPatchTable::~CLPatchTable() { + if (_patchArrays) clReleaseMemObject(_patchArrays); + if (_indexBuffer) clReleaseMemObject(_indexBuffer); + if (_patchParamBuffer) clReleaseMemObject(_patchParamBuffer); +} + +CLPatchTable * +CLPatchTable::Create(Far::PatchTable const *farPatchTable, + cl_context clContext) { + CLPatchTable *instance = new CLPatchTable(); + if (instance->allocate(farPatchTable, clContext)) return instance; + delete instance; + return 0; +} + +bool +CLPatchTable::allocate(Far::PatchTable const *farPatchTable, cl_context clContext) { + CpuPatchTable patchTable(farPatchTable); + + size_t numPatchArrays = patchTable.GetNumPatchArrays(); + size_t indexSize = patchTable.GetPatchIndexSize(); + size_t patchParamSize = patchTable.GetPatchParamSize(); + + cl_int err = 0; + _patchArrays = clCreateBuffer(clContext, CL_MEM_READ_WRITE, + numPatchArrays * sizeof(Osd::PatchArray), + (void*)patchTable.GetPatchArrayBuffer(), + &err); + if (err != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err); + return false; + } + + _indexBuffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, + indexSize * sizeof(int), + (void*)patchTable.GetPatchIndexBuffer(), + &err); + if (err != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err); + return false; + } + + _patchParamBuffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE, + patchParamSize * sizeof(Osd::PatchParam), + (void*)patchTable.GetPatchParamBuffer(), + &err); + if (err != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err); + return false; + } + return true; +} + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv + diff --git a/opensubdiv/osd/clPatchTable.h b/opensubdiv/osd/clPatchTable.h new file mode 100644 index 00000000..1a90ec6a --- /dev/null +++ b/opensubdiv/osd/clPatchTable.h @@ -0,0 +1,91 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV3_OSD_CL_PATCH_TABLE_H +#define OPENSUBDIV3_OSD_CL_PATCH_TABLE_H + +#include "../version.h" + +#include "../osd/opencl.h" +#include "../osd/nonCopyable.h" +#include "../osd/types.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far{ + class PatchTable; +}; + +namespace Osd { + +/// \brief CL patch table +/// +/// This class is a CL buffer representation of Far::PatchTable. +/// +/// CLEvaluator consumes this table to evaluate on the patches. +/// +/// +class CLPatchTable : private NonCopyable { +public: + /// Creator. Returns NULL if error + static CLPatchTable *Create(Far::PatchTable const *patchTable, + cl_context clContext); + + template + static CLPatchTable * Create(Far::PatchTable const *patchTable, + DEVICE_CONTEXT context) { + return Create(patchTable, context->GetContext()); + } + + /// Destructor + ~CLPatchTable(); + + /// Returns the CL memory of the array of Osd::PatchArray buffer + cl_mem GetPatchArrayBuffer() const { return _patchArrays; } + + /// Returns the CL memory of the patch control vertices + cl_mem GetPatchIndexBuffer() const { return _indexBuffer; } + + /// Returns the CL memory of the array of Osd::PatchParam buffer + cl_mem GetPatchParamBuffer() const { return _patchParamBuffer; } + +protected: + CLPatchTable(); + + bool allocate(Far::PatchTable const *patchTable, cl_context clContext); + + cl_mem _patchArrays; + cl_mem _indexBuffer; + cl_mem _patchParamBuffer; +}; + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + +#endif // OPENSUBDIV3_OSD_CL_PATCH_TABLE_H diff --git a/opensubdiv/osd/clVertexBuffer.h b/opensubdiv/osd/clVertexBuffer.h index 0b8336e9..de203ab0 100644 --- a/opensubdiv/osd/clVertexBuffer.h +++ b/opensubdiv/osd/clVertexBuffer.h @@ -45,6 +45,12 @@ public: /// Creator. Returns NULL if error. static CLVertexBuffer * Create(int numElements, int numVertices, cl_context clContext); + template + static CLVertexBuffer * Create(int numElements, int numVertices, + DEVICE_CONTEXT context) { + return Create(numElements, numVertices, context->GetContext()); + } + /// Destructor. ~CLVertexBuffer(); @@ -52,6 +58,12 @@ public: /// vertices data to Osd. void UpdateData(const float *src, int startVertex, int numVertices, cl_command_queue clQueue); + template + void UpdateData(const float *src, int startVertex, int numVertices, + DEVICE_CONTEXT context) { + UpdateData(src, startVertex, numVertices, context->GetCommandQueue()); + } + /// Returns how many elements defined in this vertex buffer. int GetNumElements() const; diff --git a/opensubdiv/osd/cpuEvaluator.cpp b/opensubdiv/osd/cpuEvaluator.cpp index e15f09bd..3c8d761a 100644 --- a/opensubdiv/osd/cpuEvaluator.cpp +++ b/opensubdiv/osd/cpuEvaluator.cpp @@ -24,6 +24,7 @@ #include "../osd/cpuEvaluator.h" #include "../osd/cpuKernel.h" +#include "../far/patchBasis.h" #include @@ -34,15 +35,15 @@ namespace Osd { /* static */ bool -CpuEvaluator::EvalStencils(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - const int * sizes, - const int * offsets, - const int * indices, - const float * weights, - int start, int end) { +CpuEvaluator::EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end) { + if (end <= start) return true; if (srcDesc.length != dstDesc.length) return false; @@ -55,30 +56,28 @@ CpuEvaluator::EvalStencils(const float *src, /* static */ bool -CpuEvaluator::EvalStencils(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - float *dstDs, - VertexBufferDescriptor const &dstDsDesc, - float *dstDt, - VertexBufferDescriptor const &dstDtDesc, - const int * sizes, - const int * offsets, - const int * indices, - const float * weights, - const float * duWeights, - const float * dvWeights, - int start, int end) { +CpuEvaluator::EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, int end) { + if (end <= start) return true; if (srcDesc.length != dstDesc.length) return false; - if (srcDesc.length != dstDsDesc.length) return false; - if (srcDesc.length != dstDtDesc.length) return false; + if (srcDesc.length != duDesc.length) return false; + if (srcDesc.length != dvDesc.length) return false; CpuEvalStencils(src, srcDesc, dst, dstDesc, - dstDs, dstDsDesc, - dstDt, dstDtDesc, + du, duDesc, + dv, dvDesc, sizes, offsets, indices, weights, duWeights, dvWeights, start, end); @@ -123,10 +122,13 @@ CpuEvaluator::EvalPatches(const float *src, float *dst, VertexBufferDescriptor const &dstDesc, int numPatchCoords, - PatchCoord const *patchCoords, - Far::PatchTable const *patchTable) { + const PatchCoord *patchCoords, + const PatchArray *patchArrays, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer){ src += srcDesc.offset; if (dst) dst += dstDesc.offset; + else return false; BufferAdapter srcT(src, srcDesc.length, srcDesc.stride); BufferAdapter dstT(dst, dstDesc.length, dstDesc.stride); @@ -134,14 +136,38 @@ CpuEvaluator::EvalPatches(const float *src, float wP[20], wDs[20], wDt[20]; for (int i = 0; i < numPatchCoords; ++i) { - PatchCoord const &coords = patchCoords[i]; + PatchCoord const &coord = patchCoords[i]; + PatchArray const &array = patchArrays[coord.handle.arrayIndex]; - patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt); + int patchType = array.GetPatchType(); + // XXX: patchIndex is absolute. not sure it's consistent. + // (should be offsetted by array.primitiveIdBase?) + // patchParamBuffer[array.primitiveIdBase + coord.handle.patchIndex] + Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*) + &patchParamBuffer[coord.handle.patchIndex].patchBits; - Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle); + int numControlVertices = 0; + if (patchType == Far::PatchDescriptor::REGULAR) { + Far::internal::GetBSplineWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 16; + } else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) { + Far::internal::GetGregoryWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 20; + } else if (patchType == Far::PatchDescriptor::QUADS) { + Far::internal::GetBilinearWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 4; + } else { + assert(0); + return false; + } + const int *cvs = + &patchIndexBuffer[array.indexBase + coord.handle.vertIndex]; dstT.Clear(); - for (int j = 0; j < cvs.size(); ++j) { + for (int j = 0; j < numControlVertices; ++j) { dstT.AddWithWeight(srcT[cvs[j]], wP[j]); } ++dstT; @@ -151,47 +177,67 @@ CpuEvaluator::EvalPatches(const float *src, /* static */ bool -CpuEvaluator::EvalPatches(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - float *dstDs, - VertexBufferDescriptor const &dstDsDesc, - float *dstDt, - VertexBufferDescriptor const &dstDtDesc, - int numPatchCoords, - PatchCoord const *patchCoords, - Far::PatchTable const *patchTable) { +CpuEvaluator::EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PatchCoord const *patchCoords, + PatchArray const *patchArrays, + const int *patchIndexBuffer, + PatchParam const *patchParamBuffer) { + src += srcDesc.offset; if (dst) dst += dstDesc.offset; - if (dstDs) dstDs += dstDsDesc.offset; - if (dstDt) dstDt += dstDtDesc.offset; + if (du) du += duDesc.offset; + if (dv) dv += dvDesc.offset; BufferAdapter srcT(src, srcDesc.length, srcDesc.stride); - BufferAdapter dstT(dst, dstDesc.length, dstDesc.stride); - BufferAdapter dstDsT(dstDs, dstDsDesc.length, dstDsDesc.stride); - BufferAdapter dstDtT(dstDt, dstDtDesc.length, dstDtDesc.stride); + BufferAdapter dstT(dst, dstDesc.length, dstDesc.stride); + BufferAdapter duT (du, duDesc.length, duDesc.stride); + BufferAdapter dvT (dv, dvDesc.length, dvDesc.stride); float wP[20], wDs[20], wDt[20]; for (int i = 0; i < numPatchCoords; ++i) { - PatchCoord const &coords = patchCoords[i]; + PatchCoord const &coord = patchCoords[i]; + PatchArray const &array = patchArrays[coord.handle.arrayIndex]; - patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt); + int patchType = array.GetPatchType(); + Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*) + &patchParamBuffer[coord.handle.patchIndex].patchBits; - Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle); + int numControlVertices = 0; + if (patchType == Far::PatchDescriptor::REGULAR) { + Far::internal::GetBSplineWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 16; + } else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) { + Far::internal::GetGregoryWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 20; + } else if (patchType == Far::PatchDescriptor::QUADS) { + Far::internal::GetBilinearWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 4; + } else { + assert(0); + } + const int *cvs = + &patchIndexBuffer[array.indexBase + coord.handle.vertIndex]; dstT.Clear(); - dstDsT.Clear(); - dstDtT.Clear(); - for (int j = 0; j < cvs.size(); ++j) { + duT.Clear(); + dvT.Clear(); + for (int j = 0; j < numControlVertices; ++j) { dstT.AddWithWeight(srcT[cvs[j]], wP[j]); - dstDsT.AddWithWeight(srcT[cvs[j]], wDs[j]); - dstDtT.AddWithWeight(srcT[cvs[j]], wDt[j]); + duT.AddWithWeight (srcT[cvs[j]], wDs[j]); + dvT.AddWithWeight (srcT[cvs[j]], wDt[j]); } ++dstT; - ++dstDsT; - ++dstDtT; + ++duT; + ++dvT; } return true; } diff --git a/opensubdiv/osd/cpuEvaluator.h b/opensubdiv/osd/cpuEvaluator.h index e03be1ee..5c402485 100644 --- a/opensubdiv/osd/cpuEvaluator.h +++ b/opensubdiv/osd/cpuEvaluator.h @@ -29,33 +29,14 @@ #include #include +#include "../osd/types.h" #include "../osd/vertexDescriptor.h" -#include "../far/patchTable.h" namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Osd { -/// \brief Coordinates set on a patch table -/// XXX: this is a temporary structure, exists during Osd refactoring work. -/// -struct PatchCoord { - /// \brief Constructor - /// - /// @param p patch handle - /// - /// @param s parametric location on the patch - /// - /// @param t parametric location on the patch - /// - PatchCoord(Far::PatchTable::PatchHandle handle, float s, float t) : - handle(handle), s(s), t(t) { } - - Far::PatchTable::PatchHandle handle; ///< patch handle - float s, t; ///< parametric location on patch -}; - class CpuEvaluator { public: /// ---------------------------------------------------------------------- @@ -80,7 +61,7 @@ public: /// /// @param dstDesc vertex buffer descriptor for the output buffer /// - /// @param stencilTable stencil table to be applied. + /// @param stencilTable Far::StencilTable or equivalent /// /// @param instance not used in the cpu kernel /// (declared as a typed pointer to prevent @@ -89,20 +70,18 @@ public: /// @param deviceContext not used in the cpu kernel /// template - static bool EvalStencils(SRC_BUFFER *srcBuffer, - VertexBufferDescriptor const &srcDesc, - DST_BUFFER *dstBuffer, - VertexBufferDescriptor const &dstDesc, - STENCIL_TABLE const *stencilTable, - const CpuEvaluator *instance = NULL, - void * deviceContext = NULL) { - (void)instance; // unused - (void)deviceContext; // unused + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + const CpuEvaluator *instance = NULL, + void * deviceContext = NULL) { - return EvalStencils(srcBuffer->BindCpuBuffer(), - srcDesc, - dstBuffer->BindCpuBuffer(), - dstDesc, + (void)instance; // unused + (void)deviceContext; // unused + + return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, &stencilTable->GetSizes()[0], &stencilTable->GetOffsets()[0], &stencilTable->GetControlIndices()[0], @@ -125,24 +104,27 @@ public: /// /// @param dstDesc vertex buffer descriptor for the output buffer /// - /// @param stencilTable stencil table to be applied. + /// @param sizes pointer to the sizes buffer of the stencil table + /// to apply for the range [start, end) /// - /// @param instance not used in the cpu kernel - /// (declared as a typed pointer to prevent - /// undesirable template resolution) + /// @param offsets pointer to the offsets buffer of the stencil table /// - /// @param deviceContext not used in the cpu kernel + /// @param indices pointer to the indices buffer of the stencil table /// - static bool EvalStencils(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - const int * sizes, - const int * offsets, - const int * indices, - const float * weights, - int start, - int end); + /// @param weights pointer to the weights buffer of the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + static bool EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end); /// \brief Generic static eval stencils function with derivatives. /// This function has a same signature as other device kernels @@ -161,19 +143,19 @@ public: /// /// @param dstDesc vertex buffer descriptor for the output buffer /// - /// @param dstDsBuffer Output s-derivative buffer + /// @param duBuffer Output U-derivative buffer /// must have BindCpuBuffer() method returning a /// float pointer for write /// - /// @param dstDsDesc vertex buffer descriptor for the output buffer + /// @param duDesc vertex buffer descriptor for the output buffer /// - /// @param dstDtBuffer Output t-derivative buffer + /// @param dvBuffer Output V-derivative buffer /// must have BindCpuBuffer() method returning a /// float pointer for write /// - /// @param dstDtDesc vertex buffer descriptor for the output buffer + /// @param dvDesc vertex buffer descriptor for the output buffer /// - /// @param stencilTable stencil table to be applied. + /// @param stencilTable Far::StencilTable or equivalent /// /// @param instance not used in the cpu kernel /// (declared as a typed pointer to prevent @@ -182,28 +164,22 @@ public: /// @param deviceContext not used in the cpu kernel /// template - static bool EvalStencils(SRC_BUFFER *srcBuffer, - VertexBufferDescriptor const &srcDesc, - DST_BUFFER *dstBuffer, - VertexBufferDescriptor const &dstDesc, - DST_BUFFER *dstDsBuffer, - VertexBufferDescriptor const &dstDsDesc, - DST_BUFFER *dstDtBuffer, - VertexBufferDescriptor const &dstDtDesc, - STENCIL_TABLE const *stencilTable, - const CpuEvaluator *evaluator = NULL, - void * deviceContext = NULL) { - (void)evaluator; // unused - (void)deviceContext; // unused + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + STENCIL_TABLE const *stencilTable, + const CpuEvaluator *instance = NULL, + void * deviceContext = NULL) { - return EvalStencils(srcBuffer->BindCpuBuffer(), - srcDesc, - dstBuffer->BindCpuBuffer(), - dstDesc, - dstDsBuffer->BindCpuBuffer(), - dstDsDesc, - dstDtBuffer->BindCpuBuffer(), - dstDtDesc, + (void)instance; // unused + (void)deviceContext; // unused + + return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, + duBuffer->BindCpuBuffer(), duDesc, + dvBuffer->BindCpuBuffer(), dvDesc, &stencilTable->GetSizes()[0], &stencilTable->GetOffsets()[0], &stencilTable->GetControlIndices()[0], @@ -228,40 +204,44 @@ public: /// /// @param dstDesc vertex buffer descriptor for the output buffer /// - /// @param dstDs Output s-derivatives pointer. An offset of - /// dstDsDesc will be applied internally. + /// @param du Output U-derivatives pointer. An offset of + /// duDesc will be applied internally. /// - /// @param dstDsDesc vertex buffer descriptor for the output buffer + /// @param duDesc vertex buffer descriptor for the output buffer /// - /// @param dstDt Output t-derivatives pointer. An offset of - /// dstDtDesc will be applied internally. + /// @param dv Output V-derivatives pointer. An offset of + /// dvDesc will be applied internally. /// - /// @param dstDtDesc vertex buffer descriptor for the output buffer + /// @param dvDesc vertex buffer descriptor for the output buffer /// - /// @param stencilTable stencil table to be applied. + /// @param sizes pointer to the sizes buffer of the stencil table /// - /// @param instance not used in the cpu kernel - /// (declared as a typed pointer to prevent - /// undesirable template resolution) + /// @param offsets pointer to the offsets buffer of the stencil table /// - /// @param deviceContext not used in the cpu kernel + /// @param indices pointer to the indices buffer of the stencil table /// - static bool EvalStencils(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - float *dstDs, - VertexBufferDescriptor const &dstDsDesc, - float *dstDt, - VertexBufferDescriptor const &dstDtDesc, - const int * sizes, - const int * offsets, - const int * indices, - const float * weights, - const float * duWeights, - const float * dvWeights, - int start, - int end); + /// @param weights pointer to the weights buffer of the stencil table + /// + /// @param duWeights pointer to the du-weights buffer of the stencil table + /// + /// @param dvWeights pointer to the dv-weights buffer of the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + static bool EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, int end); /// ---------------------------------------------------------------------- /// @@ -289,32 +269,35 @@ public: /// /// @param patchCoords array of locations to be evaluated. /// - /// @param patchTable Far::PatchTable + /// @param patchTable CpuPatchTable or equivalent + /// XXX: currently Far::PatchTable can't be used + /// due to interface mismatch /// /// @param instance not used in the cpu evaluator /// /// @param deviceContext not used in the cpu evaluator /// - template - static bool EvalPatches(SRC_BUFFER *srcBuffer, - VertexBufferDescriptor const &srcDesc, - DST_BUFFER *dstBuffer, - VertexBufferDescriptor const &dstDesc, - int numPatchCoords, - PatchCoord const *patchCoords, - Far::PatchTable const *patchTable, - CpuEvaluator const *instance, - void * deviceContext = NULL) { - (void)instance; // unused - (void)deviceContext; // unused + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + CpuEvaluator const *instance = NULL, + void * deviceContext = NULL) { - return EvalPatches(srcBuffer->BindCpuBuffer(), - srcDesc, - dstBuffer->BindCpuBuffer(), - dstDesc, + (void)instance; // unused + (void)deviceContext; // unused + + return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, numPatchCoords, - patchCoords, - patchTable); + (const PatchCoord*)patchCoords->BindCpuBuffer(), + patchTable->GetPatchArrayBuffer(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); } /// \brief Generic limit eval function with derivatives. This function has @@ -333,56 +316,59 @@ public: /// /// @param dstDesc vertex buffer descriptor for the output buffer /// - /// @param dstDsBuffer Output s-derivatives buffer + /// @param duBuffer Output U-derivatives buffer /// must have BindCpuBuffer() method returning a /// float pointer for write /// - /// @param dstDsDesc vertex buffer descriptor for the dstDsBuffer + /// @param duDesc vertex buffer descriptor for the duBuffer /// - /// @param dstDtBuffer Output t-derivatives buffer + /// @param dvBuffer Output V-derivatives buffer /// must have BindCpuBuffer() method returning a /// float pointer for write /// - /// @param dstDtDesc vertex buffer descriptor for the dstDtBuffer + /// @param dvDesc vertex buffer descriptor for the dvBuffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// - /// @param patchTable Far::PatchTable + /// @param patchTable CpuPatchTable or equivalent + /// XXX: currently Far::PatchTable can't be used + /// due to interface mismatch /// /// @param instance not used in the cpu evaluator /// /// @param deviceContext not used in the cpu evaluator /// - template - static bool EvalPatches(SRC_BUFFER *srcBuffer, - VertexBufferDescriptor const &srcDesc, - DST_BUFFER *dstBuffer, - VertexBufferDescriptor const &dstDesc, - DST_BUFFER *dstDsBuffer, - VertexBufferDescriptor const &dstDsDesc, - DST_BUFFER *dstDtBuffer, - VertexBufferDescriptor const &dstDtDesc, - int numPatchCoords, - PatchCoord const *patchCoords, - Far::PatchTable const *patchTable, - CpuEvaluator const *instance, - void * deviceContext = NULL) { - (void)instance; // unused - (void)deviceContext; // unused + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + CpuEvaluator const *instance = NULL, + void * deviceContext = NULL) { + (void)instance; // unused + (void)deviceContext; // unused - return EvalPatches(srcBuffer->BindCpuBuffer(), - srcDesc, - dstBuffer->BindCpuBuffer(), - dstDesc, - dstDsBuffer->BindCpuBuffer(), - dstDsDesc, - dstDtBuffer->BindCpuBuffer(), - dstDtDesc, + // XXX: PatchCoords is somewhat abusing vertex primvar buffer interop. + // ideally all buffer classes should have templated by datatype + // so that downcast isn't needed there. + // (e.g. Osd::CpuBuffer ) + // + return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, + duBuffer->BindCpuBuffer(), duDesc, + dvBuffer->BindCpuBuffer(), dvDesc, numPatchCoords, - patchCoords, - patchTable); + (const PatchCoord*)patchCoords->BindCpuBuffer(), + patchTable->GetPatchArrayBuffer(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); } /// \brief Static limit eval function. It takes an array of PatchCoord @@ -403,20 +389,23 @@ public: /// /// @param patchCoords array of locations to be evaluated. /// - /// @param patchTable Far::PatchTable on which primvars are evaluated - /// for the patchCoords + /// @param patchArrays an array of Osd::PatchArray struct + /// indexed by PatchCoord::arrayIndex /// - /// @param instance not used in the cpu evaluator + /// @param patchIndexBuffer an array of patch indices + /// indexed by PatchCoord::vertIndex /// - /// @param deviceContext not used in the cpu evaluator + /// @param patchParamBuffer an array of Osd::PatchParam struct + /// indexed by PatchCoord::patchIndex /// - static bool EvalPatches(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - int numPatchCoords, - PatchCoord const *patchCoords, - Far::PatchTable const *patchTable); + static bool EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrays, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer); /// \brief Static limit eval function. It takes an array of PatchCoord /// and evaluate limit values on given PatchTable. @@ -432,38 +421,45 @@ public: /// /// @param dstDesc vertex buffer descriptor for the output buffer /// - /// @param dstDs Output s-derivatives pointer. An offset of - /// dstDsDesc will be applied internally. + /// @param du Output U-derivatives pointer. An offset of + /// duDesc will be applied internally. /// - /// @param dstDsDesc vertex buffer descriptor for the dstDs buffer + /// @param duDesc vertex buffer descriptor for the du buffer /// - /// @param dstDt Output t-derivatives pointer. An offset of - /// dstDtDesc will be applied internally. + /// @param dv Output V-derivatives pointer. An offset of + /// dvDesc will be applied internally. /// - /// @param dstDtDesc vertex buffer descriptor for the dstDt buffer + /// @param dvDesc vertex buffer descriptor for the dv buffer /// /// @param numPatchCoords number of patchCoords. /// /// @param patchCoords array of locations to be evaluated. /// - /// @param patchTable Far::PatchTable on which primvars are evaluated - /// for the patchCoords + /// @param patchArrays an array of Osd::PatchArray struct + /// indexed by PatchCoord::arrayIndex /// - /// @param instance not used in the cpu evaluator + /// @param patchIndexBuffer an array of patch indices + /// indexed by PatchCoord::vertIndex /// - /// @param deviceContext not used in the cpu evaluator + /// @param patchParamBuffer an array of Osd::PatchParam struct + /// indexed by PatchCoord::patchIndex /// - static bool EvalPatches(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - float *dstDs, - VertexBufferDescriptor const &dstDsDesc, - float *dstDt, - VertexBufferDescriptor const &dstDtDesc, - int numPatchCoords, - PatchCoord const *patchCoords, - Far::PatchTable const *patchTable); + static bool EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PatchCoord const *patchCoords, + PatchArray const *patchArrays, + const int *patchIndexBuffer, + PatchParam const *patchParamBuffer); + + /// ---------------------------------------------------------------------- + /// + /// Other methods + /// + /// ---------------------------------------------------------------------- /// \brief synchronize all asynchronous computation invoked on this device. static void Synchronize(void * /*deviceContext = NULL*/) { diff --git a/opensubdiv/osd/cpuPatchTable.cpp b/opensubdiv/osd/cpuPatchTable.cpp new file mode 100644 index 00000000..a59dd8e8 --- /dev/null +++ b/opensubdiv/osd/cpuPatchTable.cpp @@ -0,0 +1,106 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/cpuPatchTable.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +CpuPatchTable::CpuPatchTable(const Far::PatchTable *farPatchTable) { + int nPatchArrays = farPatchTable->GetNumPatchArrays(); + + // count + int numPatches = 0; + int numIndices = 0; + for (int j = 0; j < nPatchArrays; ++j) { + int nPatch = farPatchTable->GetNumPatches(j); + int nCV = farPatchTable->GetPatchArrayDescriptor(j).GetNumControlVertices(); + numPatches += nPatch; + numIndices += nPatch * nCV; + } + _patchArrays.reserve(nPatchArrays); + _indexBuffer.reserve(numIndices); + _patchParamBuffer.reserve(numPatches); + + // for each patchArray + for (int j = 0; j < nPatchArrays; ++j) { + PatchArray patchArray(farPatchTable->GetPatchArrayDescriptor(j), + farPatchTable->GetNumPatches(j), + (int)_indexBuffer.size(), + (int)_patchParamBuffer.size()); + _patchArrays.push_back(patchArray); + + // indices + Far::ConstIndexArray indices = farPatchTable->GetPatchArrayVertices(j); + for (int k = 0; k < indices.size(); ++k) { + _indexBuffer.push_back(indices[k]); + } + + // patchParams bundling + // XXX: this process won't be needed if Far::PatchParam includes + // sharpness. +#if 0 + // XXX: we need sharpness interface for patcharray or put sharpness + // into patchParam. + Far::ConstPatchParamArray patchParams = + farPatchTable->GetPatchParams(j); + for (int k = 0; k < patchParams.size(); ++k) { + float sharpness = 0.0; + _patchParamBuffer.push_back(patchParams[k].faceIndex); + _patchParamBuffer.push_back(patchParams[k].bitField.field); + _patchParamBuffer.push_back(*((unsigned int *)&sharpness)); + } +#else + // XXX: workaround. GetPatchParamTable() will be deprecated though. + Far::PatchParamTable const & patchParamTable = + farPatchTable->GetPatchParamTable(); + std::vector const &sharpnessIndexTable = + farPatchTable->GetSharpnessIndexTable(); + int numPatches = farPatchTable->GetNumPatches(j); + for (int k = 0; k < numPatches; ++k) { + float sharpness = 0.0; + int patchIndex = (int)_patchParamBuffer.size(); + if (patchIndex < (int)sharpnessIndexTable.size()) { + int sharpnessIndex = sharpnessIndexTable[patchIndex]; + if (sharpnessIndex >= 0) + sharpness = farPatchTable->GetSharpnessValues()[sharpnessIndex]; + } + PatchParam param; + //param.patchParam = patchParamTable[patchIndex]; + param.faceIndex = patchParamTable[patchIndex].faceIndex; + param.patchBits = patchParamTable[patchIndex].bitField.field; + param.sharpness = sharpness; + _patchParamBuffer.push_back(param); + } +#endif + } +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv + diff --git a/opensubdiv/osd/cpuPatchTable.h b/opensubdiv/osd/cpuPatchTable.h new file mode 100644 index 00000000..05151a30 --- /dev/null +++ b/opensubdiv/osd/cpuPatchTable.h @@ -0,0 +1,102 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H +#define OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H + +#include "../version.h" + +#include +#include "../far/patchDescriptor.h" +#include "../osd/nonCopyable.h" +#include "../osd/opengl.h" +#include "../osd/types.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far{ + class PatchTable; +}; + +namespace Osd { + +/// \brief Cpu patch table +/// +/// XXX: We can use just Far::PatchTable for typical CpuEval use cases. +/// +/// Currently this class exists because of the template resolution +/// for the CpuEvaluator's generic interface functions +/// (glEvalLimit example uses), and +/// device-specific patch tables such as GLPatchTables internally use +/// as a staging buffer to splice patcharray and interleave sharpnesses. +/// +/// Ideally Far::PatchTables should have the same data representation +/// and accessors so that we don't have to copy data unnecessarily. +/// +class CpuPatchTable { +public: + static CpuPatchTable *Create(const Far::PatchTable *patchTable, + void *deviceContext = NULL) { + (void)deviceContext; // unused + return new CpuPatchTable(patchTable); + } + + explicit CpuPatchTable(const Far::PatchTable *patchTable); + ~CpuPatchTable() {} + + const PatchArray *GetPatchArrayBuffer() const { + return &_patchArrays[0]; + } + const int *GetPatchIndexBuffer() const { + return &_indexBuffer[0]; + } + const PatchParam *GetPatchParamBuffer() const { + return &_patchParamBuffer[0]; + } + + size_t GetNumPatchArrays() const { + return _patchArrays.size(); + } + size_t GetPatchIndexSize() const { + return _indexBuffer.size(); + } + size_t GetPatchParamSize() const { + return _patchParamBuffer.size(); + } + +protected: + PatchArrayVector _patchArrays; + std::vector _indexBuffer; + PatchParamVector _patchParamBuffer; +}; + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + +#endif // OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H diff --git a/opensubdiv/osd/cpuVertexBuffer.cpp b/opensubdiv/osd/cpuVertexBuffer.cpp index 714b783e..9f354da5 100644 --- a/opensubdiv/osd/cpuVertexBuffer.cpp +++ b/opensubdiv/osd/cpuVertexBuffer.cpp @@ -52,7 +52,8 @@ CpuVertexBuffer::Create(int numElements, int numVertices, } void -CpuVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices) { +CpuVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices, + void * /*deviceContext*/) { memcpy(_cpuBuffer + startVertex * _numElements, src, GetNumElements() * numVertices * sizeof(float)); diff --git a/opensubdiv/osd/cpuVertexBuffer.h b/opensubdiv/osd/cpuVertexBuffer.h index ff0302b4..da24680a 100644 --- a/opensubdiv/osd/cpuVertexBuffer.h +++ b/opensubdiv/osd/cpuVertexBuffer.h @@ -50,7 +50,8 @@ public: /// This method is meant to be used in client code in order to provide /// coarse vertices data to Osd. - void UpdateData(const float *src, int startVertex, int numVertices); + void UpdateData(const float *src, int startVertex, int numVertices, + void *deviceContext = NULL); /// Returns how many elements defined in this vertex buffer. int GetNumElements() const; diff --git a/opensubdiv/osd/cudaEvaluator.cpp b/opensubdiv/osd/cudaEvaluator.cpp index 74907acd..3335fb88 100644 --- a/opensubdiv/osd/cudaEvaluator.cpp +++ b/opensubdiv/osd/cudaEvaluator.cpp @@ -28,6 +28,7 @@ #include #include "../far/stencilTable.h" +#include "../osd/types.h" extern "C" { void CudaEvalStencils(const float *src, @@ -41,6 +42,25 @@ extern "C" { const float * weights, int start, int end); + + void CudaEvalPatches( + const float *src, float *dst, + int length, int srcStride, int dstStride, + int numPatchCoords, + const void *patchCoords, + const void *patchArrays, + const int *patchIndices, + const void *patchParams); + + void CudaEvalPatchesWithDerivatives( + const float *src, float *dst, float *du, float *dv, + int length, + int srcStride, int dstStride, int dvStride, int duStride, + int numPatchCoords, + const void *patchCoords, + const void *patchArrays, + const int *patchIndices, + const void *patchParams); } namespace OpenSubdiv { @@ -102,6 +122,8 @@ CudaEvaluator::EvalStencils(const float *src, const float * weights, int start, int end) { + if (dst == NULL) return false; + CudaEvalStencils(src + srcDesc.offset, dst + dstDesc.offset, srcDesc.length, @@ -112,6 +134,105 @@ CudaEvaluator::EvalStencils(const float *src, return true; } +/* static */ +bool +CudaEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + float *dstDu, + VertexBufferDescriptor const &dstDuDesc, + float *dstDv, + VertexBufferDescriptor const &dstDvDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, + int end) { + // PERFORMANCE: need to combine 3 launches together + if (dst) { + CudaEvalStencils(src + srcDesc.offset, + dst + dstDesc.offset, + srcDesc.length, + srcDesc.stride, + dstDesc.stride, + sizes, offsets, indices, weights, + start, end); + } + if (dstDu) { + CudaEvalStencils(src + srcDesc.offset, + dstDu + dstDuDesc.offset, + srcDesc.length, + srcDesc.stride, + dstDuDesc.stride, + sizes, offsets, indices, duWeights, + start, end); + } + if (dstDv) { + CudaEvalStencils(src + srcDesc.offset, + dstDv + dstDvDesc.offset, + srcDesc.length, + srcDesc.stride, + dstDvDesc.stride, + sizes, offsets, indices, dvWeights, + start, end); + } + return true; +} + +/* static */ +bool +CudaEvaluator::EvalPatches(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrays, + const int *patchIndices, + const PatchParam *patchParams) { + src += srcDesc.offset; + if (dst) + dst += dstDesc.offset; + + CudaEvalPatches(src, dst, + srcDesc.length, srcDesc.stride, dstDesc.stride, + numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams); + + return true; +} + +/* static */ +bool +CudaEvaluator::EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrays, + const int *patchIndices, + const PatchParam *patchParams) { + + if (src) src += srcDesc.offset; + if (dst) dst += dstDesc.offset; + if (du) du += duDesc.offset; + if (dv) dv += dvDesc.offset; + + CudaEvalPatchesWithDerivatives( + src, dst, du, dv, + srcDesc.length, srcDesc.stride, + dstDesc.stride, duDesc.stride, dvDesc.stride, + numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams); + return true; +} + + + /* static */ void CudaEvaluator::Synchronize(void * /*deviceContext*/) { diff --git a/opensubdiv/osd/cudaEvaluator.h b/opensubdiv/osd/cudaEvaluator.h index ee36e0b6..c57f4a22 100644 --- a/opensubdiv/osd/cudaEvaluator.h +++ b/opensubdiv/osd/cudaEvaluator.h @@ -29,11 +29,13 @@ #include #include "../osd/vertexDescriptor.h" +#include "../osd/types.h" namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Far { + class PatchTable; class StencilTable; } @@ -43,7 +45,7 @@ namespace Osd { /// /// This class is a cuda buffer representation of Far::StencilTable. /// -/// CudaComputeKernel consumes this table to apply stencils +/// CudaEvaluator consumes this table to apply stencils /// /// class CudaStencilTable { @@ -72,10 +74,14 @@ private: int _numStencils; }; -// --------------------------------------------------------------------------- - class CudaEvaluator { public: + /// ---------------------------------------------------------------------- + /// + /// Stencil evaluations with StencilTable + /// + /// ---------------------------------------------------------------------- + /// \brief Generic static compute function. This function has a same /// signature as other device kernels have so that it can be called /// transparently from OsdMesh template interface. @@ -99,21 +105,18 @@ public: /// /// @param deviceContext not used in the CudaEvaluator /// - template - static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, - VertexBufferDescriptor const &srcDesc, - VERTEX_BUFFER *dstVertexBuffer, - VertexBufferDescriptor const &dstDesc, - STENCIL_TABLE const *stencilTable, - const void *instance = NULL, - void * deviceContext = NULL) { + template + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + const void *instance = NULL, + void * deviceContext = NULL) { (void)instance; // unused (void)deviceContext; // unused - return EvalStencils(srcVertexBuffer->BindCudaBuffer(), - srcDesc, - dstVertexBuffer->BindCudaBuffer(), - dstDesc, + return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc, + dstBuffer->BindCudaBuffer(), dstDesc, (int const *)stencilTable->GetSizesBuffer(), (int const *)stencilTable->GetOffsetsBuffer(), (int const *)stencilTable->GetIndicesBuffer(), @@ -122,17 +125,369 @@ public: /*end = */ stencilTable->GetNumStencils()); } - static bool EvalStencils(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - const int * sizes, - const int * offsets, - const int * indices, - const float * weights, - int start, - int end); + /// \brief Static eval stencils function which takes raw cuda buffers for + /// input and output. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param sizes pointer to the sizes buffer of the stencil table + /// + /// @param offsets pointer to the offsets buffer of the stencil table + /// + /// @param indices pointer to the indices buffer of the stencil table + /// + /// @param weights pointer to the weights buffer of the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + static bool EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end); + /// \brief Generic static eval stencils function with derivatives. + /// This function has a same signature as other device kernels + /// have so that it can be called in the same way from OsdMesh + /// template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCudaBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output U-derivative buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param duDesc vertex buffer descriptor for the output buffer + /// + /// @param dvBuffer Output V-derivative buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param dvDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. + /// + /// @param instance not used in the cuda kernel + /// (declared as a typed pointer to prevent + /// undesirable template resolution) + /// + /// @param deviceContext not used in the cuda kernel + /// + template + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + STENCIL_TABLE const *stencilTable, + const CudaEvaluator *instance = NULL, + void * deviceContext = NULL) { + + (void)instance; // unused + (void)deviceContext; // unused + + return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc, + dstBuffer->BindCudaBuffer(), dstDesc, + duBuffer->BindCudaBuffer(), duDesc, + dvBuffer->BindCudaBuffer(), dvDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + &stencilTable->GetDuWeights()[0], + &stencilTable->GetDvWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + /// \brief Static eval stencils function with derivatives, which takes + /// raw cuda pointers for input and output. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param du Output U-derivatives pointer. An offset of + /// duDesc will be applied internally. + /// + /// @param duDesc vertex buffer descriptor for the output buffer + /// + /// @param dv Output V-derivatives pointer. An offset of + /// dvDesc will be applied internally. + /// + /// @param dvDesc vertex buffer descriptor for the output buffer + /// + /// @param sizes pointer to the sizes buffer of the stencil table + /// + /// @param offsets pointer to the offsets buffer of the stencil table + /// + /// @param indices pointer to the indices buffer of the stencil table + /// + /// @param weights pointer to the weights buffer of the stencil table + /// + /// @param duWeights pointer to the du-weights buffer of the stencil table + /// + /// @param dvWeights pointer to the dv-weights buffer of the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + static bool EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, int end); + + /// ---------------------------------------------------------------------- + /// + /// Limit evaluations with PatchTable + /// + /// ---------------------------------------------------------------------- + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCudaBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindCudaBuffer() method returning an + /// array of PatchCoord struct in cuda memory. + /// + /// @param patchTable CudaPatchTable or equivalent + /// + /// @param instance not used in the cuda evaluator + /// + /// @param deviceContext not used in the cuda evaluator + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + CudaEvaluator const *instance, + void * deviceContext = NULL) { + + (void)instance; // unused + (void)deviceContext; // unused + + return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc, + dstBuffer->BindCudaBuffer(), dstDesc, + numPatchCoords, + (const PatchCoord *)patchCoords->BindCudaBuffer(), + (const PatchArray *)patchTable->GetPatchArrayBuffer(), + (const int *)patchTable->GetPatchIndexBuffer(), + (const PatchParam *)patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function with derivatives. This function has + /// a same signature as other device kernels have so that it can be + /// called in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCudaBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output U-derivatives buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output V-derivatives buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable CudaPatchTable or equivalent + /// + /// @param instance not used in the cuda evaluator + /// + /// @param deviceContext not used in the cuda evaluator + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + CudaEvaluator const *instance, + void * deviceContext = NULL) { + + (void)instance; // unused + (void)deviceContext; // unused + + return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc, + dstBuffer->BindCudaBuffer(), dstDesc, + duBuffer->BindCudaBuffer(), duDesc, + dvBuffer->BindCudaBuffer(), dvDesc, + numPatchCoords, + (const PatchCoord *)patchCoords->BindCudaBuffer(), + (const PatchArray *)patchTable->GetPatchArrayBuffer(), + (const int *)patchTable->GetPatchIndexBuffer(), + (const PatchParam *)patchTable->GetPatchParamBuffer()); + } + + /// \brief Static limit eval function. It takes an array of PatchCoord + /// and evaluate limit values on given PatchTable. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchArrays an array of Osd::PatchArray struct + /// indexed by PatchCoord::arrayIndex + /// + /// @param patchIndices an array of patch indices + /// indexed by PatchCoord::vertIndex + /// + /// @param patchParams an array of Osd::PatchParam struct + /// indexed by PatchCoord::patchIndex + /// + static bool EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrays, + const int *patchIndices, + const PatchParam *patchParams); + + /// \brief Static limit eval function. It takes an array of PatchCoord + /// and evaluate limit values on given PatchTable. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param du Output U-derivatives pointer. An offset of + /// duDesc will be applied internally. + /// + /// @param duDesc vertex buffer descriptor for the du buffer + /// + /// @param dv Output V-derivatives pointer. An offset of + /// dvDesc will be applied internally. + /// + /// @param dvDesc vertex buffer descriptor for the dv buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchArrays an array of Osd::PatchArray struct + /// indexed by PatchCoord::arrayIndex + /// + /// @param patchIndices an array of patch indices + /// indexed by PatchCoord::vertIndex + /// + /// @param patchParams an array of Osd::PatchParam struct + /// indexed by PatchCoord::patchIndex + /// + static bool EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrays, + const int *patchIndices, + const PatchParam *patchParams); + + /// ---------------------------------------------------------------------- + /// + /// Other methods + /// + /// ---------------------------------------------------------------------- static void Synchronize(void *deviceContext = NULL); }; diff --git a/opensubdiv/osd/cudaKernel.cu b/opensubdiv/osd/cudaKernel.cu index daa041e3..3ca69382 100644 --- a/opensubdiv/osd/cudaKernel.cu +++ b/opensubdiv/osd/cudaKernel.cu @@ -238,6 +238,204 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs, // ----------------------------------------------------------------------------- +// Osd::PatchCoord osd/types.h +struct PatchCoord { + int arrayIndex; + int patchIndex; + int vertIndex; + float s; + float t; +}; +struct PatchArray { + int patchType; // Far::PatchDescriptor::Type + int numPatches; + int indexBase; // offset in the index buffer + int primitiveIdBase; // offset in the patch param buffer +}; +struct PatchParam { + int faceIndex; + unsigned int bitField; + float sharpness; +}; + +__device__ void +getBSplineWeights(float t, float point[4], float deriv[4]) { + // The four uniform cubic B-Spline basis functions evaluated at t: + float const one6th = 1.0f / 6.0f; + + float t2 = t * t; + float t3 = t * t2; + + point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3); + point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3); + point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3)); + point[3] = one6th * ( t3); + + // Derivatives of the above four basis functions at t: + if (deriv) { + deriv[0] = -0.5f*t2 + t - 0.5f; + deriv[1] = 1.5f*t2 - 2.0f*t; + deriv[2] = -1.5f*t2 + t + 0.5f; + deriv[3] = 0.5f*t2; + } +} + +__device__ void +adjustBoundaryWeights(unsigned int bits, float sWeights[4], float tWeights[4]) { + int boundary = ((bits >> 4) & 0xf); // far/patchParam.h + + if (boundary & 1) { + tWeights[2] -= tWeights[0]; + tWeights[1] += 2*tWeights[0]; + tWeights[0] = 0; + } + if (boundary & 2) { + sWeights[1] -= sWeights[3]; + sWeights[2] += 2*sWeights[3]; + sWeights[3] = 0; + } + if (boundary & 4) { + tWeights[1] -= tWeights[3]; + tWeights[2] += 2*tWeights[3]; + tWeights[3] = 0; + } + if (boundary & 8) { + sWeights[2] -= sWeights[0]; + sWeights[1] += 2*sWeights[0]; + sWeights[0] = 0; + } +} + +__device__ +int getDepth(unsigned int patchBits) { + return (patchBits & 0x7); +} + +__device__ +float getParamFraction(unsigned int patchBits) { + bool nonQuadRoot = (patchBits >> 3) & 0x1; + int depth = getDepth(patchBits); + if (nonQuadRoot) { + return 1.0f / float( 1 << (depth-1) ); + } else { + return 1.0f / float( 1 << depth ); + } +} + +__device__ +void normalizePatchCoord(unsigned int patchBits, float *u, float *v) { + float frac = getParamFraction(patchBits); + + int iu = (patchBits >> 22) & 0x3ff; + int iv = (patchBits >> 12) & 0x3ff; + + // top left corner + float pu = (float)iu*frac; + float pv = (float)iv*frac; + + // normalize u,v coordinates + *u = (*u - pu) / frac, + *v = (*v - pv) / frac; +} + +// Far::PatchDescriptor::Type +enum Type { + NON_PATCH = 0, ///< undefined + POINTS, ///< points (useful for cage drawing) + LINES, ///< lines (useful for cage drawing) + QUADS, ///< bilinear quads-only patches + TRIANGLES, ///< bilinear triangles-only mesh + LOOP, ///< Loop patch + REGULAR, ///< feature-adaptive bicubic patches + GREGORY, + GREGORY_BOUNDARY, + GREGORY_BASIS +}; + +__global__ void +computePatches(const float *src, float *dst, float *dstDu, float *dstDv, + int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride, + int numPatchCoords, const PatchCoord *patchCoords, + const PatchArray *patchArrayBuffer, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer) { + + int first = threadIdx.x + blockIdx.x * blockDim.x; + + // PERFORMANCE: not yet optimized + + float wP[20], wDs[20], wDt[20]; + + for (int i = first; i < numPatchCoords; i += blockDim.x * gridDim.x) { + + PatchCoord const &coord = patchCoords[i]; + PatchArray const &array = patchArrayBuffer[coord.arrayIndex]; + + int patchType = array.patchType; + int numControlVertices = 16; + // note: patchIndex is absolute. + unsigned int patchBits = patchParamBuffer[coord.patchIndex].bitField; + + // normalize + float s = coord.s; + float t = coord.t; + normalizePatchCoord(patchBits, &s, &t); + float dScale = (float)(1 << getDepth(patchBits)); + + if (patchType == REGULAR) { + float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4]; + getBSplineWeights(s, sWeights, dsWeights); + getBSplineWeights(t, tWeights, dtWeights); + + // Compute the tensor product weight of the (s,t) basis function + // corresponding to each control vertex: + adjustBoundaryWeights(patchBits, sWeights, tWeights); + adjustBoundaryWeights(patchBits, dsWeights, dtWeights); + + for (int k = 0; k < 4; ++k) { + for (int l = 0; l < 4; ++l) { + wP[4*k+l] = sWeights[l] * tWeights[k]; + wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale; + wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale; + } + } + } else if (patchType == GREGORY_BASIS) { + // XXX: not yet implemented. + continue; + } else { + // unknown patchType + continue; + } + const int *cvs = + &patchIndexBuffer[array.indexBase + coord.vertIndex]; + + float * dstVert = dst + i * dstStride; + clear(dstVert, length); + for (int j = 0; j < numControlVertices; ++j) { + const float * srcVert = src + cvs[j] * srcStride; + addWithWeight(dstVert, srcVert, wP[j], length); + } + if (dstDu) { + float *d = dstDu + i * dstDuStride; + clear(d, length); + for (int j = 0; j < numControlVertices; ++j) { + const float * srcVert = src + cvs[j] * srcStride; + addWithWeight(d, srcVert, wDs[j], length); + } + } + if (dstDv) { + float *d = dstDv + i * dstDvStride; + clear(d, length); + for (int j = 0; j < numControlVertices; ++j) { + const float * srcVert = src + cvs[j] * srcStride; + addWithWeight(d, srcVert, wDt[j], length); + } + } + } +} + +// ----------------------------------------------------------------------------- + #include "../version.h" #define OPT_KERNEL(NUM_ELEMENTS, KERNEL, X, Y, ARG) \ @@ -257,20 +455,12 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs, extern "C" { -void CudaEvalStencils(const float *src, - float *dst, - int length, - int srcStride, - int dstStride, - const int * sizes, - const int * offsets, - const int * indices, - const float * weights, - int start, - int end) -{ -// assert(cvs and dst and sizes and offsets and indices and weights and (end>=start)); - +void CudaEvalStencils( + const float *src, float *dst, + int length, int srcStride, int dstStride, + const int * sizes, const int * offsets, const int * indices, + const float * weights, + int start, int end) { if (length == 0 or srcStride == 0 or dstStride == 0 or (end <= start)) { return; } @@ -301,4 +491,36 @@ void CudaEvalStencils(const float *src, // ----------------------------------------------------------------------------- +void CudaEvalPatches( + const float *src, float *dst, + int length, int srcStride, int dstStride, + int numPatchCoords, const PatchCoord *patchCoords, + const PatchArray *patchArrayBuffer, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer) { + + // PERFORMANCE: not optimized at all + + computePatches <<<512, 32>>>( + src, dst, NULL, NULL, length, srcStride, dstStride, 0, 0, + numPatchCoords, patchCoords, + patchArrayBuffer, patchIndexBuffer, patchParamBuffer); +} + +void CudaEvalPatchesWithDerivatives( + const float *src, float *dst, float *dstDu, float *dstDv, + int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride, + int numPatchCoords, const PatchCoord *patchCoords, + const PatchArray *patchArrayBuffer, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer) { + + // PERFORMANCE: not optimized at all + + computePatches <<<512, 32>>>( + src, dst, dstDu, dstDv, length, srcStride, dstStride, dstDuStride, dstDvStride, + numPatchCoords, patchCoords, + patchArrayBuffer, patchIndexBuffer, patchParamBuffer); +} + } /* extern "C" */ diff --git a/opensubdiv/osd/cudaPatchTable.cpp b/opensubdiv/osd/cudaPatchTable.cpp new file mode 100644 index 00000000..f90dcc1a --- /dev/null +++ b/opensubdiv/osd/cudaPatchTable.cpp @@ -0,0 +1,103 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/cudaPatchTable.h" + +#include + +#include "../far/patchTable.h" +#include "../osd/cpuPatchTable.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +CudaPatchTable::CudaPatchTable() : + _patchArrays(NULL), _indexBuffer(NULL), _patchParamBuffer(NULL) { +} + +CudaPatchTable::~CudaPatchTable() { + if (_patchArrays) cudaFree(_patchArrays); + if (_indexBuffer) cudaFree(_indexBuffer); + if (_patchParamBuffer) cudaFree(_patchParamBuffer); +} + +CudaPatchTable * +CudaPatchTable::Create(Far::PatchTable const *farPatchTable, + void * /*deviceContext*/) { + CudaPatchTable *instance = new CudaPatchTable(); + if (instance->allocate(farPatchTable)) return instance; + delete instance; + return 0; +} + +bool +CudaPatchTable::allocate(Far::PatchTable const *farPatchTable) { + CpuPatchTable patchTable(farPatchTable); + + size_t numPatchArrays = patchTable.GetNumPatchArrays(); + size_t indexSize = patchTable.GetPatchIndexSize(); + size_t patchParamSize = patchTable.GetPatchParamSize(); + + cudaError_t err; + err = cudaMalloc(&_patchArrays, numPatchArrays * sizeof(Osd::PatchArray)); + if (err != cudaSuccess) return false; + + err = cudaMalloc(&_indexBuffer, indexSize * sizeof(int)); + if (err != cudaSuccess) return false; + + err = cudaMalloc(&_patchParamBuffer, patchParamSize * sizeof(Osd::PatchParam)); + if (err != cudaSuccess) return false; + + // copy patch array + err = cudaMemcpy(_patchArrays, + patchTable.GetPatchArrayBuffer(), + numPatchArrays * sizeof(Osd::PatchArray), + cudaMemcpyHostToDevice); + if (err != cudaSuccess) return false; + + // copy index buffer + err = cudaMemcpy(_indexBuffer, + patchTable.GetPatchIndexBuffer(), + indexSize * sizeof(int), + cudaMemcpyHostToDevice); + if (err != cudaSuccess) return false; + + // patch param buffer + err = cudaMemcpy(_patchParamBuffer, + patchTable.GetPatchParamBuffer(), + patchParamSize * sizeof(Osd::PatchParam), + cudaMemcpyHostToDevice); + if (err != cudaSuccess) return false; + + return true; +} + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv + diff --git a/opensubdiv/osd/cudaPatchTable.h b/opensubdiv/osd/cudaPatchTable.h new file mode 100644 index 00000000..50f2a8ca --- /dev/null +++ b/opensubdiv/osd/cudaPatchTable.h @@ -0,0 +1,83 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H +#define OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H + +#include "../version.h" + +#include "../osd/nonCopyable.h" +#include "../osd/types.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far{ + class PatchTable; +}; + +namespace Osd { + +/// \brief CUDA patch table +/// +/// This class is a cuda buffer representation of Far::PatchTable. +/// +/// CudaEvaluator consumes this table to evaluate on the patches. +/// +/// +class CudaPatchTable : private NonCopyable { +public: + /// Creator. Returns NULL if error + static CudaPatchTable *Create(Far::PatchTable const *patchTable, + void *deviceContext = NULL); + /// Destructor + ~CudaPatchTable(); + + /// Returns the cuda memory of the array of Osd::PatchArray buffer + void *GetPatchArrayBuffer() const { return _patchArrays; } + + /// Returns the cuda memory of the patch control vertices + void *GetPatchIndexBuffer() const { return _indexBuffer; } + + /// Returns the cuda memory of the array of Osd::PatchParam buffer + void *GetPatchParamBuffer() const { return _patchParamBuffer; } + +protected: + CudaPatchTable(); + + bool allocate(Far::PatchTable const *patchTable); + + void *_patchArrays; + void *_indexBuffer; + void *_patchParamBuffer; +}; + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + +#endif // OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H diff --git a/opensubdiv/osd/cudaVertexBuffer.cpp b/opensubdiv/osd/cudaVertexBuffer.cpp index fb3e72b7..38ac62db 100644 --- a/opensubdiv/osd/cudaVertexBuffer.cpp +++ b/opensubdiv/osd/cudaVertexBuffer.cpp @@ -43,7 +43,8 @@ CudaVertexBuffer::~CudaVertexBuffer() { } CudaVertexBuffer * -CudaVertexBuffer::Create(int numElements, int numVertices) { +CudaVertexBuffer::Create(int numElements, int numVertices, + void * /*deviceContext */) { CudaVertexBuffer *instance = new CudaVertexBuffer(numElements, numVertices); if (instance->allocate()) return instance; @@ -52,7 +53,8 @@ CudaVertexBuffer::Create(int numElements, int numVertices) { } void -CudaVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices) { +CudaVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices, + void * /*deviceContext*/) { size_t size = _numElements * numVertices * sizeof(float); diff --git a/opensubdiv/osd/cudaVertexBuffer.h b/opensubdiv/osd/cudaVertexBuffer.h index 6a660142..df1d2dec 100644 --- a/opensubdiv/osd/cudaVertexBuffer.h +++ b/opensubdiv/osd/cudaVertexBuffer.h @@ -27,6 +27,8 @@ #include "../version.h" +#include + namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { @@ -41,14 +43,16 @@ class CudaVertexBuffer { public: /// Creator. Returns NULL if error. - static CudaVertexBuffer * Create(int numElements, int numVertices); + static CudaVertexBuffer * Create(int numElements, int numVertices, + void *deviceContext = NULL); /// Destructor. ~CudaVertexBuffer(); /// This method is meant to be used in client code in order to provide coarse /// vertices data to Osd. - void UpdateData(const float *src, int startVertex, int numVertices); + void UpdateData(const float *src, int startVertex, int numVertices, + void *deviceContext=NULL); /// Returns how many elements defined in this vertex buffer. int GetNumElements() const; diff --git a/opensubdiv/osd/d3d11PatchTable.h b/opensubdiv/osd/d3d11PatchTable.h index 5fed93b7..69b3aa47 100644 --- a/opensubdiv/osd/d3d11PatchTable.h +++ b/opensubdiv/osd/d3d11PatchTable.h @@ -30,6 +30,7 @@ #include #include "../far/patchDescriptor.h" #include "../osd/nonCopyable.h" +#include "../osd/types.h" struct ID3D11Buffer; struct ID3D11ShaderResourceView; @@ -49,33 +50,6 @@ class D3D11PatchTable : private NonCopyable { public: typedef ID3D11Buffer * VertexBufferBinding; - // XXX: this struct will be further refactored. - class PatchArray { - public: - PatchArray(Far::PatchDescriptor desc, int numPatches, - int indexBase, int primitiveIdBase) : - desc(desc), numPatches(numPatches), indexBase(indexBase), - primitiveIdBase(primitiveIdBase) {} - Far::PatchDescriptor const &GetDescriptor() const { - return desc; - } - int GetNumPatches() const { - return numPatches; - } - int GetIndexBase() const { - return indexBase; - } - int GetPrimitiveIdBase() const { - return primitiveIdBase; - } - private: - Far::PatchDescriptor desc; - int numPatches; - int indexBase; // an offset within the index buffer - int primitiveIdBase; // an offset within the patch param buffer - }; - typedef std::vector PatchArrayVector; - D3D11PatchTable(); ~D3D11PatchTable(); diff --git a/opensubdiv/osd/glComputeEvaluator.cpp b/opensubdiv/osd/glComputeEvaluator.cpp index a43bd99b..800df867 100644 --- a/opensubdiv/osd/glComputeEvaluator.cpp +++ b/opensubdiv/osd/glComputeEvaluator.cpp @@ -88,16 +88,66 @@ GLStencilTableSSBO::~GLStencilTableSSBO() { // --------------------------------------------------------------------------- -GLComputeEvaluator::GLComputeEvaluator() : - _program(0), _workGroupSize(64) { +GLComputeEvaluator::GLComputeEvaluator() : _workGroupSize(64) { + memset (&_stencilKernel, 0, sizeof(_stencilKernel)); + memset (&_patchKernel, 0, sizeof(_patchKernel)); } GLComputeEvaluator::~GLComputeEvaluator() { - if (_program) { - glDeleteProgram(_program); + if (_stencilKernel.program) { + glDeleteProgram(_stencilKernel.program); + } + if (_patchKernel.program) { + glDeleteProgram(_patchKernel.program); } } +static GLuint +compileKernel(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + const char *kernelDefine, + int workGroupSize) { + GLuint program = glCreateProgram(); + + GLuint shader = glCreateShader(GL_COMPUTE_SHADER); + + std::ostringstream defines; + defines << "#define LENGTH " << srcDesc.length << "\n" + << "#define SRC_STRIDE " << srcDesc.stride << "\n" + << "#define DST_STRIDE " << dstDesc.stride << "\n" + << "#define WORK_GROUP_SIZE " << workGroupSize << "\n" + << kernelDefine << "\n"; + std::string defineStr = defines.str(); + + const char *shaderSources[3] = {"#version 430\n", 0, 0}; + shaderSources[1] = defineStr.c_str(); + shaderSources[2] = shaderSource; + glShaderSource(shader, 3, shaderSources, NULL); + glCompileShader(shader); + glAttachShader(program, shader); + + GLint linked = 0; + glLinkProgram(program); + glGetProgramiv(program, GL_LINK_STATUS, &linked); + + if (linked == GL_FALSE) { + char buffer[1024]; + glGetShaderInfoLog(shader, 1024, NULL, buffer); + Far::Error(Far::FAR_RUNTIME_ERROR, buffer); + + glGetProgramInfoLog(program, 1024, NULL, buffer); + Far::Error(Far::FAR_RUNTIME_ERROR, buffer); + + glDeleteProgram(program); + return 0; + } + + glDeleteShader(shader); + + return program; +} + + bool GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc, VertexBufferDescriptor const &dstDesc) { @@ -108,58 +158,55 @@ GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc, return false; } - if (_program) { - glDeleteProgram(_program); - _program = 0; + // create stencil kernel + if (_stencilKernel.program) { + glDeleteProgram(_stencilKernel.program); } - _program = glCreateProgram(); - - GLuint shader = glCreateShader(GL_COMPUTE_SHADER); - - std::ostringstream defines; - defines << "#define LENGTH " << srcDesc.length << "\n" - << "#define SRC_STRIDE " << srcDesc.stride << "\n" - << "#define DST_STRIDE " << dstDesc.stride << "\n" - << "#define WORK_GROUP_SIZE " << _workGroupSize << "\n"; - std::string defineStr = defines.str(); - - const char *shaderSources[3] = {"#version 430\n", 0, 0}; - shaderSources[1] = defineStr.c_str(); - shaderSources[2] = shaderSource; - glShaderSource(shader, 3, shaderSources, NULL); - glCompileShader(shader); - glAttachShader(_program, shader); - - GLint linked = 0; - glLinkProgram(_program); - glGetProgramiv(_program, GL_LINK_STATUS, &linked); - - if (linked == GL_FALSE) { - char buffer[1024]; - glGetShaderInfoLog(shader, 1024, NULL, buffer); - Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - - glGetProgramInfoLog(_program, 1024, NULL, buffer); - Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - - glDeleteProgram(_program); - _program = 0; - return false; - } - - glDeleteShader(shader); + _stencilKernel.program = compileKernel( + srcDesc, dstDesc, + "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS", + _workGroupSize); + if (_stencilKernel.program == 0) return false; // store uniform locations for the compute kernel program. - _uniformSizes = glGetUniformLocation(_program, "stencilSizes"); - _uniformOffsets = glGetUniformLocation(_program, "stencilOffsets"); - _uniformIndices = glGetUniformLocation(_program, "stencilIndices"); - _uniformWeights = glGetUniformLocation(_program, "stencilIWeights"); + _stencilKernel.uniformSizes = + glGetUniformLocation(_stencilKernel.program, "stencilSizes"); + _stencilKernel.uniformOffsets = + glGetUniformLocation(_stencilKernel.program, "stencilOffsets"); + _stencilKernel.uniformIndices = + glGetUniformLocation(_stencilKernel.program, "stencilIndices"); + _stencilKernel.uniformWeights = + glGetUniformLocation(_stencilKernel.program, "stencilIWeights"); + _stencilKernel.uniformStart = + glGetUniformLocation(_stencilKernel.program, "batchStart"); + _stencilKernel.uniformEnd = + glGetUniformLocation(_stencilKernel.program, "batchEnd"); + _stencilKernel.uniformSrcOffset = + glGetUniformLocation(_stencilKernel.program, "srcOffset"); + _stencilKernel.uniformDstOffset = + glGetUniformLocation(_stencilKernel.program, "dstOffset"); - _uniformStart = glGetUniformLocation(_program, "batchStart"); - _uniformEnd = glGetUniformLocation(_program, "batchEnd"); + // create patch kernel + if (_patchKernel.program) { + glDeleteProgram(_patchKernel.program); + } + _patchKernel.program = compileKernel( + srcDesc, dstDesc, + "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES", + _workGroupSize); + if (_patchKernel.program == 0) return false; - _uniformSrcOffset = glGetUniformLocation(_program, "srcOffset"); - _uniformDstOffset = glGetUniformLocation(_program, "dstOffset"); + // uniform locaitons + _patchKernel.uniformSrcOffset = + glGetUniformLocation(_patchKernel.program, "srcOffset"); + _patchKernel.uniformDstOffset = + glGetUniformLocation(_patchKernel.program, "dstOffset"); + _patchKernel.uniformPatchArray = + glGetUniformLocation(_patchKernel.program, "patchArray"); + _patchKernel.uniformDuDesc = + glGetUniformLocation(_patchKernel.program, "dstDuDesc"); + _patchKernel.uniformDvDesc = + glGetUniformLocation(_patchKernel.program, "dstDvDesc"); return true; } @@ -183,7 +230,7 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer, GLuint weightsBuffer, int start, int end) const { - if (!_program) return false; + if (!_stencilKernel.program) return false; int count = end - start; if (count <= 0) { return true; @@ -196,12 +243,12 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer, glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, indicesBuffer); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, weightsBuffer); - glUseProgram(_program); + glUseProgram(_stencilKernel.program); - glUniform1i(_uniformStart, start); - glUniform1i(_uniformEnd, end); - glUniform1i(_uniformSrcOffset, srcDesc.offset); - glUniform1i(_uniformDstOffset, dstDesc.offset); + glUniform1i(_stencilKernel.uniformStart, start); + glUniform1i(_stencilKernel.uniformEnd, end); + glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset); + glUniform1i(_stencilKernel.uniformDstOffset, dstDesc.offset); glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1); @@ -218,6 +265,52 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer, return true; } +bool +GLComputeEvaluator::EvalPatches( + GLuint srcBuffer, VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, VertexBufferDescriptor const &dstDesc, + GLuint duBuffer, VertexBufferDescriptor const &duDesc, + GLuint dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const { + + if (!_patchKernel.program) return false; + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, patchCoordsBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchIndexBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchParamsBuffer); + + glUseProgram(_patchKernel.program); + + glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset); + glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset); + glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(), + (const GLint*)&patchArrays[0]); + glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride); + glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride); + + glDispatchCompute((numPatchCoords + _workGroupSize - 1) / _workGroupSize, 1, 1); + + glUseProgram(0); + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0); + + return true; +} + } // end namespace Osd } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/glComputeEvaluator.h b/opensubdiv/osd/glComputeEvaluator.h index 1e469f6f..8b7f41a5 100644 --- a/opensubdiv/osd/glComputeEvaluator.h +++ b/opensubdiv/osd/glComputeEvaluator.h @@ -28,6 +28,7 @@ #include "../version.h" #include "../osd/opengl.h" +#include "../osd/types.h" #include "../osd/vertexDescriptor.h" namespace OpenSubdiv { @@ -92,26 +93,32 @@ public: /// Destructor. note that the GL context must be made current. ~GLComputeEvaluator(); + /// ---------------------------------------------------------------------- + /// + /// Stencil evaluations with StencilTable + /// + /// ---------------------------------------------------------------------- + /// \brief Generic static compute function. This function has a same /// signature as other device kernels have so that it can be called /// transparently from OsdMesh template interface. /// /// @param srcBuffer Input primvar buffer. /// must have BindVBO() method returning a - /// const float pointer for read + /// GL buffer object of source data /// /// @param srcDesc vertex buffer descriptor for the input buffer /// /// @param dstBuffer Output primvar buffer /// must have BindVBO() method returning a - /// float pointer for write + /// GL buffer object of destination data /// /// @param dstDesc vertex buffer descriptor for the output buffer /// /// @param stencilTable stencil table to be applied. The table must have /// SSBO interfaces. /// - /// @param evaluator cached compiled instance. Clients are supposed to + /// @param instance cached compiled instance. Clients are supposed to /// pre-compile an instance of this class and provide /// to this function. If it's null the kernel still /// compute by instantiating on-demand kernel although @@ -119,25 +126,25 @@ public: /// /// @param deviceContext not used in the GLSL kernel /// - template - static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, - VertexBufferDescriptor const &srcDesc, - VERTEX_BUFFER *dstVertexBuffer, - VertexBufferDescriptor const &dstDesc, - STENCIL_TABLE const *stencilTable, - GLComputeEvaluator const *instance, - void * deviceContext = NULL) { + template + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + GLComputeEvaluator const *instance, + void * deviceContext = NULL) { + if (instance) { - return instance->EvalStencils(srcVertexBuffer, srcDesc, - dstVertexBuffer, dstDesc, + return instance->EvalStencils(srcBuffer, srcDesc, + dstBuffer, dstDesc, stencilTable); } else { // Create a kernel on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc); if (instance) { - bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, - dstVertexBuffer, dstDesc, + bool r = instance->EvalStencils(srcBuffer, srcDesc, + dstBuffer, dstDesc, stencilTable); delete instance; return r; @@ -148,15 +155,14 @@ public: /// Dispatch the GLSL compute kernel on GPU asynchronously. /// returns false if the kernel hasn't been compiled yet. - template - bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, - VertexBufferDescriptor const &srcDesc, - VERTEX_BUFFER *dstVertexBuffer, - VertexBufferDescriptor const &dstDesc, - STENCIL_TABLE const *stencilTable) const { - return EvalStencils(srcVertexBuffer->BindVBO(), + template + bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const { + return EvalStencils(srcBuffer->BindVBO(), srcDesc, - dstVertexBuffer->BindVBO(), + dstBuffer->BindVBO(), dstDesc, stencilTable->GetSizesBuffer(), stencilTable->GetOffsetsBuffer(), @@ -168,10 +174,8 @@ public: /// Dispatch the GLSL compute kernel on GPU asynchronously. /// returns false if the kernel hasn't been compiled yet. - bool EvalStencils(GLuint srcBuffer, - VertexBufferDescriptor const &srcDesc, - GLuint dstBuffer, - VertexBufferDescriptor const &dstDesc, + bool EvalStencils(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, VertexBufferDescriptor const &dstDesc, GLuint sizesBuffer, GLuint offsetsBuffer, GLuint indicesBuffer, @@ -179,6 +183,271 @@ public: int start, int end) const; + + /// ---------------------------------------------------------------------- + /// + /// Limit evaluations with PatchTable + /// + /// ---------------------------------------------------------------------- + /// + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void * deviceContext = NULL) { + + if (instance) { + return instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + numPatchCoords, patchCoords, + patchTable); + } else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc); + if (instance) { + bool r = instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + numPatchCoords, patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer + /// + /// @param duDesc + /// + /// @param dvBuffer + /// + /// @param dvDesc + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLComputeEvaluator const *instance, + void * deviceContext = NULL) { + + if (instance) { + return instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + duBuffer, duDesc, + dvBuffer, dvDesc, + numPatchCoords, patchCoords, + patchTable); + } else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc); + if (instance) { + bool r = instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + duBuffer, duDesc, + dvBuffer, dvDesc, + numPatchCoords, patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBOBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template + bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const { + + return EvalPatches(srcBuffer->BindVBO(), srcDesc, + dstBuffer->BindVBO(), dstDesc, + 0, VertexBufferDescriptor(), + 0, VertexBufferDescriptor(), + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetPatchArrays(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function with derivatives. This function has + /// a same signature as other device kernels have so that it can be + /// called in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output U-derivatives buffer + /// must have BindVBO() method returning a + /// float pointer for write + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output V-derivatives buffer + /// must have BindVBO() method returning a + /// float pointer for write + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template + bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const { + + return EvalPatches(srcBuffer->BindVBO(), srcDesc, + dstBuffer->BindVBO(), dstDesc, + duBuffer->BindVBO(), duDesc, + dvBuffer->BindVBO(), dvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetPatchArrays(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + bool EvalPatches(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, VertexBufferDescriptor const &dstDesc, + GLuint duBuffer, VertexBufferDescriptor const &duDesc, + GLuint dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const; + + /// ---------------------------------------------------------------------- + /// + /// Other methods + /// + /// ---------------------------------------------------------------------- + /// Configure GLSL kernel. A valid GL context must be made current before /// calling this function. Returns false if it fails to compile the kernel. bool Compile(VertexBufferDescriptor const &srcDesc, @@ -188,18 +457,27 @@ public: static void Synchronize(void *deviceContext); private: - GLuint _program; + struct _StencilKernel { + GLuint program; + GLuint uniformSizes; + GLuint uniformOffsets; + GLuint uniformIndices; + GLuint uniformWeights; + GLuint uniformStart; + GLuint uniformEnd; + GLuint uniformSrcOffset; + GLuint uniformDstOffset; + } _stencilKernel; - GLuint _uniformSizes, // stencil table - _uniformOffsets, - _uniformIndices, - _uniformWeights, + struct _PatchKernel { + GLuint program; + GLuint uniformSrcOffset; + GLuint uniformDstOffset; + GLuint uniformPatchArray; + GLuint uniformDuDesc; + GLuint uniformDvDesc; - _uniformStart, // range - _uniformEnd, - - _uniformSrcOffset, // src buffer offset (in elements) - _uniformDstOffset; // dst buffer offset (in elements) + } _patchKernel; int _workGroupSize; }; diff --git a/opensubdiv/osd/glPatchTable.cpp b/opensubdiv/osd/glPatchTable.cpp index 986133d3..aa9ba90d 100644 --- a/opensubdiv/osd/glPatchTable.cpp +++ b/opensubdiv/osd/glPatchTable.cpp @@ -26,6 +26,7 @@ #include "../far/patchTable.h" #include "../osd/opengl.h" +#include "../osd/cpuPatchTable.h" namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { @@ -33,11 +34,14 @@ namespace OPENSUBDIV_VERSION { namespace Osd { GLPatchTable::GLPatchTable() : - _indexBuffer(0), _patchParamTexture(0) { + _patchIndexBuffer(0), _patchParamBuffer(0), + _patchIndexTexture(0), _patchParamTexture(0) { } GLPatchTable::~GLPatchTable() { - if (_indexBuffer) glDeleteBuffers(1, &_indexBuffer); + if (_patchIndexBuffer) glDeleteBuffers(1, &_patchIndexBuffer); + if (_patchParamBuffer) glDeleteBuffers(1, &_patchParamBuffer); + if (_patchIndexTexture) glDeleteTextures(1, &_patchIndexTexture); if (_patchParamTexture) glDeleteTextures(1, &_patchParamTexture); } @@ -52,84 +56,56 @@ GLPatchTable::Create(Far::PatchTable const *farPatchTable, bool GLPatchTable::allocate(Far::PatchTable const *farPatchTable) { - glGenBuffers(1, &_indexBuffer); + glGenBuffers(1, &_patchIndexBuffer); + glGenBuffers(1, &_patchParamBuffer); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _indexBuffer); - std::vector buffer; - std::vector ppBuffer; + CpuPatchTable patchTable(farPatchTable); - // needs reserve? + size_t numPatchArrays = patchTable.GetNumPatchArrays(); + GLsizei indexSize = (GLsizei)patchTable.GetPatchIndexSize(); + GLsizei patchParamSize = (GLsizei)patchTable.GetPatchParamSize(); - int nPatchArrays = farPatchTable->GetNumPatchArrays(); - - // for each patchArray - for (int j = 0; j < nPatchArrays; ++j) { - PatchArray patchArray(farPatchTable->GetPatchArrayDescriptor(j), - farPatchTable->GetNumPatches(j), - (int)buffer.size(), - (int)ppBuffer.size()/3); - _patchArrays.push_back(patchArray); - - // indices - Far::ConstIndexArray indices = farPatchTable->GetPatchArrayVertices(j); - for (int k = 0; k < indices.size(); ++k) { - buffer.push_back(indices[k]); - } - - // patchParams -#if 0 - // XXX: we need sharpness interface for patcharray or put sharpness - // into patchParam. - Far::ConstPatchParamArray patchParams = - farPatchTable->GetPatchParams(j); - for (int k = 0; k < patchParams.size(); ++k) { - float sharpness = 0.0; - ppBuffer.push_back(patchParams[k].faceIndex); - ppBuffer.push_back(patchParams[k].bitField.field); - ppBuffer.push_back(*((unsigned int *)&sharpness)); - } -#else - // XXX: workaround. GetPatchParamTable() will be deprecated though. - Far::PatchParamTable const & patchParamTable = - farPatchTable->GetPatchParamTable(); - std::vector const &sharpnessIndexTable = - farPatchTable->GetSharpnessIndexTable(); - int numPatches = farPatchTable->GetNumPatches(j); - for (int k = 0; k < numPatches; ++k) { - float sharpness = 0.0; - int patchIndex = (int)ppBuffer.size()/3; - if (patchIndex < (int)sharpnessIndexTable.size()) { - int sharpnessIndex = sharpnessIndexTable[patchIndex]; - if (sharpnessIndex >= 0) - sharpness = farPatchTable->GetSharpnessValues()[sharpnessIndex]; - } - ppBuffer.push_back(patchParamTable[patchIndex].faceIndex); - ppBuffer.push_back(patchParamTable[patchIndex].bitField.field); - ppBuffer.push_back(*((unsigned int *)&sharpness)); - } -#endif - } + // copy patch array + _patchArrays.insert(_patchArrays.end(), + patchTable.GetPatchArrayBuffer(), + patchTable.GetPatchArrayBuffer() + numPatchArrays); + // copy index buffer + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _patchIndexBuffer); glBufferData(GL_ELEMENT_ARRAY_BUFFER, - (int)buffer.size()*sizeof(int), &buffer[0], GL_STATIC_DRAW); - - // patchParam is currently expected to be texture (it can be SSBO) - GLuint texBuffer = 0; - glGenBuffers(1, &texBuffer); - glBindBuffer(GL_ARRAY_BUFFER, texBuffer); - glBufferData(GL_ARRAY_BUFFER, ppBuffer.size()*sizeof(unsigned int), - &ppBuffer[0], GL_STATIC_DRAW); - - glGenTextures(1, &_patchParamTexture); - glBindTexture(GL_TEXTURE_BUFFER, _patchParamTexture); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGB32I, texBuffer); - glBindTexture(GL_TEXTURE_BUFFER, 0); - - glDeleteBuffers(1, &texBuffer); - + indexSize * sizeof(GLint), + patchTable.GetPatchIndexBuffer(), + GL_STATIC_DRAW); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + // copy patchparam buffer + glBindBuffer(GL_ARRAY_BUFFER, _patchParamBuffer); + glBufferData(GL_ARRAY_BUFFER, + patchParamSize * sizeof(PatchParam), + patchTable.GetPatchParamBuffer(), + GL_STATIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); + // make both buffer as texture buffers too. + glGenTextures(1, &_patchIndexTexture); + glGenTextures(1, &_patchParamTexture); + + GLuint buffer; + glGenBuffers(1, &buffer); + glBindBuffer(GL_ARRAY_BUFFER, buffer); + glBufferData(GL_ARRAY_BUFFER, + indexSize * sizeof(GLint), + patchTable.GetPatchIndexBuffer(), + GL_STATIC_DRAW); + + glBindTexture(GL_TEXTURE_BUFFER, _patchIndexTexture); +// glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, _patchIndexBuffer); + glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, buffer); + + glBindTexture(GL_TEXTURE_BUFFER, _patchParamTexture); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGB32I, _patchParamBuffer); + glBindTexture(GL_TEXTURE_BUFFER, 0); + return true; } diff --git a/opensubdiv/osd/glPatchTable.h b/opensubdiv/osd/glPatchTable.h index 4fbf1b0b..e7e67861 100644 --- a/opensubdiv/osd/glPatchTable.h +++ b/opensubdiv/osd/glPatchTable.h @@ -27,10 +27,9 @@ #include "../version.h" -#include -#include "../far/patchDescriptor.h" #include "../osd/nonCopyable.h" #include "../osd/opengl.h" +#include "../osd/types.h" namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { @@ -45,34 +44,6 @@ class GLPatchTable : private NonCopyable { public: typedef GLuint VertexBufferBinding; - // XXX: this struct will be further refactored. - class PatchArray { - public: - PatchArray(Far::PatchDescriptor desc, int numPatches, - int indexBase, int primitiveIdBase) : - desc(desc), numPatches(numPatches), indexBase(indexBase), - primitiveIdBase(primitiveIdBase) {} - Far::PatchDescriptor const &GetDescriptor() const { - return desc; - } - int GetNumPatches() const { - return numPatches; - } - int GetIndexBase() const { - return indexBase; - } - int GetPrimitiveIdBase() const { - return primitiveIdBase; - } - private: - Far::PatchDescriptor desc; - int numPatches; - int indexBase; // an offset within the index buffer - int primitiveIdBase; // an offset within the patch param buffer - }; - typedef std::vector PatchArrayVector; - - GLPatchTable(); ~GLPatchTable(); static GLPatchTable *Create(Far::PatchTable const *farPatchTable, @@ -84,7 +55,17 @@ public: /// Returns the GL index buffer containing the patch control vertices GLuint GetPatchIndexBuffer() const { - return _indexBuffer; + return _patchIndexBuffer; + } + + /// Returns the GL index buffer containing the patch parameter + GLuint GetPatchParamBuffer() const { + return _patchParamBuffer; + } + + /// Returns the GL texture buffer containing the patch control vertices + GLuint GetPatchIndexTextureBuffer() const { + return _patchIndexTexture; } /// Returns the GL texture buffer containing the patch parameter @@ -93,11 +74,17 @@ public: } protected: + GLPatchTable(); + // allocate buffers from patchTable bool allocate(Far::PatchTable const *farPatchTable); PatchArrayVector _patchArrays; - GLuint _indexBuffer; + + GLuint _patchIndexBuffer; + GLuint _patchParamBuffer; + + GLuint _patchIndexTexture; GLuint _patchParamTexture; }; diff --git a/opensubdiv/osd/glXFBEvaluator.cpp b/opensubdiv/osd/glXFBEvaluator.cpp index 807a7ec6..6ff3c8d9 100644 --- a/opensubdiv/osd/glXFBEvaluator.cpp +++ b/opensubdiv/osd/glXFBEvaluator.cpp @@ -108,37 +108,36 @@ GLStencilTableTBO::~GLStencilTableTBO() { // --------------------------------------------------------------------------- -GLXFBEvaluator::GLXFBEvaluator() : - _program(0), _srcBufferTexture(0), - _uniformSrcBufferTexture(0), _uniformSizesTexture(0), - _uniformOffsetsTexture(0), _uniformIndicesTexture(0), - _uniformWeightsTexture(0), _uniformStart(0), _uniformEnd(0), - _uniformSrcOffset(0) { +GLXFBEvaluator::GLXFBEvaluator() : _srcBufferTexture(0) { + memset (&_stencilKernel, 0, sizeof(_stencilKernel)); + memset (&_patchKernel, 0, sizeof(_patchKernel)); } GLXFBEvaluator::~GLXFBEvaluator() { - if (_program) { - glDeleteProgram(_program); + if (_stencilKernel.program) { + glDeleteProgram(_stencilKernel.program); + } + if (_patchKernel.program) { + glDeleteProgram(_patchKernel.program); } if (_srcBufferTexture) { glDeleteTextures(1, &_srcBufferTexture); } } -bool -GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc, - VertexBufferDescriptor const &dstDesc) { - if (_program) { - glDeleteProgram(_program); - _program = 0; - } - _program = glCreateProgram(); +static GLuint +compileKernel(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + const char *kernelDefine) { + + GLuint program = glCreateProgram(); GLuint shader = glCreateShader(GL_VERTEX_SHADER); std::ostringstream defines; defines << "#define LENGTH " << srcDesc.length << "\n" - << "#define SRC_STRIDE " << srcDesc.stride << "\n"; + << "#define SRC_STRIDE " << srcDesc.stride << "\n" + << kernelDefine << "\n"; std::string defineStr = defines.str(); const char *shaderSources[3] = {"#version 410\n", NULL, NULL}; @@ -147,7 +146,7 @@ GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc, shaderSources[2] = shaderSource; glShaderSource(shader, 3, shaderSources, NULL); glCompileShader(shader); - glAttachShader(_program, shader); + glAttachShader(program, shader); std::vector outputs; std::vector pOutputs; @@ -183,40 +182,83 @@ GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc, } } - glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(), + glTransformFeedbackVaryings(program, (GLsizei)outputs.size(), &pOutputs[0], GL_INTERLEAVED_ATTRIBS); GLint linked = 0; - glLinkProgram(_program); - glGetProgramiv(_program, GL_LINK_STATUS, &linked); + glLinkProgram(program); + glGetProgramiv(program, GL_LINK_STATUS, &linked); if (linked == GL_FALSE) { char buffer[1024]; glGetShaderInfoLog(shader, 1024, NULL, buffer); Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - glGetProgramInfoLog(_program, 1024, NULL, buffer); + glGetProgramInfoLog(program, 1024, NULL, buffer); Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - glDeleteProgram(_program); - _program = 0; - return false; + glDeleteProgram(program); + program = 0; } glDeleteShader(shader); - // set uniform locations for compute kernels - _uniformSrcBufferTexture = glGetUniformLocation(_program, "vertexBuffer"); + return program; +} - _uniformSizesTexture = glGetUniformLocation(_program, "sizes"); - _uniformOffsetsTexture = glGetUniformLocation(_program, "offsets"); - _uniformIndicesTexture = glGetUniformLocation(_program, "indices"); - _uniformWeightsTexture = glGetUniformLocation(_program, "weights"); +bool +GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc) { - _uniformStart = glGetUniformLocation(_program, "batchStart"); - _uniformEnd = glGetUniformLocation(_program, "batchEnd"); + // create stencil kernel + if (_stencilKernel.program) { + glDeleteProgram(_stencilKernel.program); + } + _stencilKernel.program = compileKernel( + srcDesc, dstDesc, + "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS"); + if (_stencilKernel.program == 0) return false; - _uniformSrcOffset = glGetUniformLocation(_program, "srcOffset"); + // cache uniform locations + _stencilKernel.uniformSrcBufferTexture + = glGetUniformLocation(_stencilKernel.program, "vertexBuffer"); + _stencilKernel.uniformSrcOffset + = glGetUniformLocation(_stencilKernel.program, "srcOffset"); + + _stencilKernel.uniformSizesTexture + = glGetUniformLocation(_stencilKernel.program, "sizes"); + _stencilKernel.uniformOffsetsTexture + = glGetUniformLocation(_stencilKernel.program, "offsets"); + _stencilKernel.uniformIndicesTexture + = glGetUniformLocation(_stencilKernel.program, "indices"); + _stencilKernel.uniformWeightsTexture + = glGetUniformLocation(_stencilKernel.program, "weights"); + _stencilKernel.uniformStart + = glGetUniformLocation(_stencilKernel.program, "batchStart"); + _stencilKernel.uniformEnd + = glGetUniformLocation(_stencilKernel.program, "batchEnd"); + + // create patch kernel + if (_patchKernel.program) { + glDeleteProgram(_patchKernel.program); + } + _patchKernel.program = compileKernel( + srcDesc, dstDesc, + "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES"); + if (_patchKernel.program == 0) return false; + + // cache uniform locations + _patchKernel.uniformSrcBufferTexture + = glGetUniformLocation(_patchKernel.program, "vertexBuffer"); + _patchKernel.uniformSrcOffset + = glGetUniformLocation(_patchKernel.program, "srcOffset"); + + _patchKernel.uniformPatchArray + = glGetUniformLocation(_patchKernel.program, "patchArray"); + _patchKernel.uniformPatchParamTexture + = glGetUniformLocation(_patchKernel.program, "patchParamBuffer"); + _patchKernel.uniformPatchIndexTexture + = glGetUniformLocation(_patchKernel.program, "patchIndexBuffer"); // create a texture for input buffer if (!_srcBufferTexture) { @@ -255,7 +297,7 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer, GLuint weightsTexture, int start, int end) const { - if (!_program) return false; + if (!_stencilKernel.program) return false; int count = end - start; if (count <= 0) { return true; @@ -268,25 +310,25 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer, glBindVertexArray(vao); glEnable(GL_RASTERIZER_DISCARD); - glUseProgram(_program); + glUseProgram(_stencilKernel.program); // Set input VBO as a texture buffer. glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture); glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer); glBindTexture(GL_TEXTURE_BUFFER, 0); - bindTexture(_uniformSrcBufferTexture, _srcBufferTexture, 0); + bindTexture(_stencilKernel.uniformSrcBufferTexture, _srcBufferTexture, 0); // bind stencil table textures. - bindTexture(_uniformSizesTexture, sizesTexture, 1); - bindTexture(_uniformOffsetsTexture, offsetsTexture, 2); - bindTexture(_uniformIndicesTexture, indicesTexture, 3); - bindTexture(_uniformWeightsTexture, weightsTexture, 4); + bindTexture(_stencilKernel.uniformSizesTexture, sizesTexture, 1); + bindTexture(_stencilKernel.uniformOffsetsTexture, offsetsTexture, 2); + bindTexture(_stencilKernel.uniformIndicesTexture, indicesTexture, 3); + bindTexture(_stencilKernel.uniformWeightsTexture, weightsTexture, 4); // set batch range - glUniform1i(_uniformStart, start); - glUniform1i(_uniformEnd, end); - glUniform1i(_uniformSrcOffset, srcDesc.offset); + glUniform1i(_stencilKernel.uniformStart, start); + glUniform1i(_stencilKernel.uniformEnd, end); + glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset); // The destination buffer is bound at vertex boundary. // @@ -347,6 +389,94 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer, return true; } +bool +GLXFBEvaluator::EvalPatches( + GLuint srcBuffer, VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, VertexBufferDescriptor const &dstDesc, + GLuint duBuffer, VertexBufferDescriptor const & /*duDesc*/, + GLuint dvBuffer, VertexBufferDescriptor const & /*dvDesc*/, + int numPatchCoords, + GLuint patchCoordsBuffer, + const PatchArrayVector &patchArrays, + GLuint patchIndexTexture, + GLuint patchParamTexture) const { + if (!_patchKernel.program) return false; + + if (duBuffer != 0 || dvBuffer != 0) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "GLXFBEvaluator doesn't support derivative evaluation yet.\n"); + } + + // bind vertex array + // always create new one, to be safe with multiple contexts (slow though) + GLuint vao = 0; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + + glEnable(GL_RASTERIZER_DISCARD); + glUseProgram(_patchKernel.program); + + // Set input VBO as a texture buffer. + glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture); + glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer); + glBindTexture(GL_TEXTURE_BUFFER, 0); + + bindTexture(_patchKernel.uniformSrcBufferTexture, _srcBufferTexture, 0); + + // bind patch index and patch param textures. + bindTexture(_patchKernel.uniformPatchParamTexture, patchParamTexture, 1); + bindTexture(_patchKernel.uniformPatchIndexTexture, patchIndexTexture, 2); + + // set other uniforms + glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(), + (const GLint*)&patchArrays[0]); + glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset); + + // input patchcoords + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + int stride = sizeof(int) * 5; // patchcoord = int*5 struct + glBindBuffer(GL_ARRAY_BUFFER, patchCoordsBuffer); + glVertexAttribIPointer(0, 3, GL_UNSIGNED_INT, stride, (void*)0); + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, stride, (void*)(sizeof(int)*3)); + + int dstBufferBindOffset = + dstDesc.offset - (dstDesc.offset % dstDesc.stride); + + // bind destination buffer + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, + 0, dstBuffer, + dstBufferBindOffset * sizeof(float), + numPatchCoords * dstDesc.stride * sizeof(float)); + + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, numPatchCoords); + glEndTransformFeedback(); + + glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0); + + // unbind textures + for (int i = 0; i < 3; ++i) { + glActiveTexture(GL_TEXTURE0 + i); + glBindTexture(GL_TEXTURE_BUFFER, 0); + } + + glDisable(GL_RASTERIZER_DISCARD); + glUseProgram(0); + glActiveTexture(GL_TEXTURE0); + + glDisableVertexAttribArray(0); + glDisableVertexAttribArray(1); + + // revert vao + glBindVertexArray(0); + glDeleteVertexArrays(1, &vao); + + + return true; +} + + } // end namespace Osd } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/glXFBEvaluator.h b/opensubdiv/osd/glXFBEvaluator.h index 65881670..e04d3bb7 100644 --- a/opensubdiv/osd/glXFBEvaluator.h +++ b/opensubdiv/osd/glXFBEvaluator.h @@ -28,6 +28,7 @@ #include "../version.h" #include "../osd/opengl.h" +#include "../osd/types.h" #include "../osd/vertexDescriptor.h" namespace OpenSubdiv { @@ -93,6 +94,12 @@ public: /// Destructor. note that the GL context must be made current. ~GLXFBEvaluator(); + /// ---------------------------------------------------------------------- + /// + /// Stencil evaluations with StencilTable + /// + /// ---------------------------------------------------------------------- + /// \brief Generic static stencil function. This function has a same /// signature as other device kernels have so that it can be called /// transparently from OsdMesh template interface. @@ -120,25 +127,25 @@ public: /// /// @param deviceContext not used in the GLSLTransformFeedback kernel /// - template - static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, - VertexBufferDescriptor const &srcDesc, - VERTEX_BUFFER *dstVertexBuffer, - VertexBufferDescriptor const &dstDesc, - STENCIL_TABLE const *stencilTable, - GLXFBEvaluator const *instance, - void * deviceContext = NULL) { + template + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + GLXFBEvaluator const *instance, + void * deviceContext = NULL) { + if (instance) { - return instance->EvalStencils(srcVertexBuffer, srcDesc, - dstVertexBuffer, dstDesc, + return instance->EvalStencils(srcBuffer, srcDesc, + dstBuffer, dstDesc, stencilTable); } else { // Create an instance on demand (slow) (void)deviceContext; // unused instance = Create(srcDesc, dstDesc); if (instance) { - bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, - dstVertexBuffer, dstDesc, + bool r = instance->EvalStencils(srcBuffer, srcDesc, + dstBuffer, dstDesc, stencilTable); delete instance; return r; @@ -147,18 +154,30 @@ public: } } - /// Dispatch the GLSL compute kernel on GPU asynchronously. - /// returns false if the kernel hasn't been compiled yet. - template - bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, - VertexBufferDescriptor const &srcDesc, - VERTEX_BUFFER *dstVertexBuffer, - VertexBufferDescriptor const &dstDesc, - STENCIL_TABLE const *stencilTable) const { - return EvalStencils(srcVertexBuffer->BindVBO(), - srcDesc, - dstVertexBuffer->BindVBO(), - dstDesc, + /// \brief dispatch eval stencils function. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object for destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. + /// + template + bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const { + + return EvalStencils(srcBuffer->BindVBO(), srcDesc, + dstBuffer->BindVBO(), dstDesc, stencilTable->GetSizesTexture(), stencilTable->GetOffsetsTexture(), stencilTable->GetIndicesTexture(), @@ -167,12 +186,31 @@ public: /* end = */ stencilTable->GetNumStencils()); } - /// Dispatch the GLSL compute kernel on GPU asynchronously. - /// returns false if the kernel hasn't been compiled yet. - bool EvalStencils(GLuint srcBuffer, - VertexBufferDescriptor const &srcDesc, - GLuint dstBuffer, - VertexBufferDescriptor const &dstDesc, + /// \brief Static eval stencils function, dispatch the GLSL XFB kernel on + /// on GPU asynchronously. + /// + /// @param srcBuffer GL buffer of input primvars. + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer GL buffer of output primvars. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param sizesBuffer GL buffer of the sizes in the stencil table + /// + /// @param offsetsBuffer GL buffer of the offsets in the stencil table + /// + /// @param indicesBuffer GL buffer of the indices in the stencil table + /// + /// @param weightsBuffer GL buffer of the weifgrs in the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + bool EvalStencils(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, VertexBufferDescriptor const &dstDesc, GLuint sizesBuffer, GLuint offsetsBuffer, GLuint indicesBuffer, @@ -180,6 +218,270 @@ public: int start, int end) const; + /// ---------------------------------------------------------------------- + /// + /// Limit evaluations with PatchTable + /// + /// ---------------------------------------------------------------------- + /// + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLXFBEvaluator const *instance, + void * deviceContext = NULL) { + + if (instance) { + return instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + numPatchCoords, patchCoords, + patchTable); + } else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc); + if (instance) { + bool r = instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + numPatchCoords, patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a GL + /// buffer object of source data + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a GL + /// buffer object of destination data + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer + /// + /// @param duDesc + /// + /// @param dvBuffer + /// + /// @param dvDesc + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindVBO() method returning an + /// array of PatchCoord struct in VBO. + /// + /// @param patchTable GLPatchTable or equivalent + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLXFB evaluator + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + GLXFBEvaluator const *instance, + void * deviceContext = NULL) { + + if (instance) { + return instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + duBuffer, duDesc, + dvBuffer, dvDesc, + numPatchCoords, patchCoords, + patchTable); + } else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc); + if (instance) { + bool r = instance->EvalPatches(srcBuffer, srcDesc, + dstBuffer, dstDesc, + duBuffer, duDesc, + dvBuffer, dvDesc, + numPatchCoords, patchCoords, + patchTable); + delete instance; + return r; + } + return false; + } + } + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCudaBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// must have BindCudaBuffer() method returning an + /// array of PatchCoord struct in cuda memory. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template + bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const { + + return EvalPatches(srcBuffer->BindVBO(), srcDesc, + dstBuffer->BindVBO(), dstDesc, + 0, VertexBufferDescriptor(), + 0, VertexBufferDescriptor(), + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetPatchArrays(), + patchTable->GetPatchIndexTextureBuffer(), + patchTable->GetPatchParamTextureBuffer()); + } + + /// \brief Generic limit eval function with derivatives. This function has + /// a same signature as other device kernels have so that it can be + /// called in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCudaBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output s-derivatives buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output t-derivatives buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable GLPatchTable or equivalent + /// + template + bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable) const { + + return EvalPatches(srcBuffer->BindVBO(), srcDesc, + dstBuffer->BindVBO(), dstDesc, + duBuffer->BindVBO(), duDesc, + dvBuffer->BindVBO(), dvDesc, + numPatchCoords, + patchCoords->BindVBO(), + patchTable->GetPatchArrays(), + patchTable->GetPatchIndexTextureBuffer(), + patchTable->GetPatchParamTextureBuffer()); + } + + bool EvalPatches(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, VertexBufferDescriptor const &dstDesc, + GLuint duBuffer, VertexBufferDescriptor const &duDesc, + GLuint dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + GLuint patchCoordsBuffer, + const PatchArrayVector &patchArrays, + GLuint patchIndexBuffer, + GLuint patchParamsBuffer) const; + + /// ---------------------------------------------------------------------- + /// + /// Other methods + /// + /// ---------------------------------------------------------------------- + /// Configure GLSL kernel. A valid GL context must be made current before /// calling this function. Returns false if it fails to compile the kernel. bool Compile(VertexBufferDescriptor const &srcDesc, @@ -189,19 +491,31 @@ public: static void Synchronize(void *kernel); private: - GLuint _program; - GLuint _srcBufferTexture; - GLuint _uniformSrcBufferTexture; - GLuint _uniformSizesTexture; - GLuint _uniformOffsetsTexture; - GLuint _uniformIndicesTexture; - GLuint _uniformWeightsTexture; + struct _StencilKernel { + GLuint program; + GLuint uniformSrcBufferTexture; + GLuint uniformSrcOffset; // src buffer offset (in elements) + + GLuint uniformSizesTexture; + GLuint uniformOffsetsTexture; + GLuint uniformIndicesTexture; + GLuint uniformWeightsTexture; + GLuint uniformStart; // range + GLuint uniformEnd; + } _stencilKernel; + + struct _PatchKernel { + GLuint program; + GLuint uniformSrcBufferTexture; + GLuint uniformSrcOffset; // src buffer offset (in elements) + + GLuint uniformPatchArray; + GLuint uniformPatchParamTexture; + GLuint uniformPatchIndexTexture; + } _patchKernel; - GLuint _uniformStart; // range - GLuint _uniformEnd; - GLuint _uniformSrcOffset; // src buffer offset (in elements) }; } // end namespace Osd diff --git a/opensubdiv/osd/glslComputeKernel.glsl b/opensubdiv/osd/glslComputeKernel.glsl index 946a2773..617d4351 100644 --- a/opensubdiv/osd/glslComputeKernel.glsl +++ b/opensubdiv/osd/glslComputeKernel.glsl @@ -24,18 +24,10 @@ //------------------------------------------------------------------------------ -uniform int batchStart = 0; -uniform int batchEnd = 0; uniform int srcOffset = 0; uniform int dstOffset = 0; - layout(binding=0) buffer src_buffer { float srcVertexBuffer[]; }; layout(binding=1) buffer dst_buffer { float dstVertexBuffer[]; }; -layout(binding=2) buffer stencilSizes { int _sizes[]; }; -layout(binding=3) buffer stencilOffsets { int _offsets[]; }; -layout(binding=4) buffer stencilIndices { int _indices[]; }; -layout(binding=5) buffer stencilWeights { float _weights[]; }; - layout(local_size_x=WORK_GROUP_SIZE, local_size_y=1, local_size_z=1) in; //------------------------------------------------------------------------------ @@ -73,6 +65,15 @@ void addWithWeight(inout Vertex v, const Vertex src, float weight) { } //------------------------------------------------------------------------------ +#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS) + +uniform int batchStart = 0; +uniform int batchEnd = 0; +layout(binding=2) buffer stencilSizes { int _sizes[]; }; +layout(binding=3) buffer stencilOffsets { int _offsets[]; }; +layout(binding=4) buffer stencilIndices { int _indices[]; }; +layout(binding=5) buffer stencilWeights { float _weights[]; }; + void main() { int current = int(gl_GlobalInvocationID.x) + batchStart; @@ -94,4 +95,194 @@ void main() { writeVertex(current, dst); } +#endif + //------------------------------------------------------------------------------ +#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES) + +// PERFORMANCE: stride could be constant, but not as significant as length + +//struct PatchArray { +// int patchType; +// int numPatches; +// int indexBase; // an offset within the index buffer +// int primitiveIdBase; // an offset within the patch param buffer +//}; +// # of patcharrays is 1 or 2. + +uniform ivec4 patchArray[2]; +uniform ivec3 dstDuDesc; +uniform ivec3 dstDvDesc; +layout(binding=2) buffer du_buffer { float dstDuBuffer[]; }; +layout(binding=3) buffer dv_buffer { float dstDvBuffer[]; }; + +struct PatchCoord { + int arrayIndex; + int patchIndex; + int vertIndex; + float s; + float t; +}; + +struct PatchParam { + int faceIndex; + uint patchBits; + float sharpness; +}; + +layout(binding=4) buffer patchCoord_buffer { PatchCoord patchCoords[]; }; +layout(binding=5) buffer patchIndex_buffer { int patchIndexBuffer[]; }; +layout(binding=6) buffer patchParam_buffer { PatchParam patchParamBuffer[]; }; + +void writeDu(int index, Vertex du) { + int duIndex = dstDuDesc.x + index * dstDuDesc.z; + for (int i = 0; i < LENGTH; ++i) { + dstDuBuffer[duIndex + i] = du.vertexData[i]; + } +} + +void writeDv(int index, Vertex dv) { + int dvIndex = dstDvDesc.x + index * dstDvDesc.z; + for (int i = 0; i < LENGTH; ++i) { + dstDvBuffer[dvIndex + i] = dv.vertexData[i]; + } +} + +void getBSplineWeights(float t, inout vec4 point, inout vec4 deriv) { + // The four uniform cubic B-Spline basis functions evaluated at t: + float one6th = 1.0f / 6.0f; + + float t2 = t * t; + float t3 = t * t2; + + point.x = one6th * (1.0f - 3.0f*(t - t2) - t3); + point.y = one6th * (4.0f - 6.0f*t2 + 3.0f*t3); + point.z = one6th * (1.0f + 3.0f*(t + t2 - t3)); + point.w = one6th * ( t3); + + // Derivatives of the above four basis functions at t: + deriv.x = -0.5f*t2 + t - 0.5f; + deriv.y = 1.5f*t2 - 2.0f*t; + deriv.z = -1.5f*t2 + t + 0.5f; + deriv.w = 0.5f*t2; +} + +uint getDepth(uint patchBits) { + return (patchBits & 0x7); +} + +float getParamFraction(uint patchBits) { + uint nonQuadRoot = (patchBits >> 3) & 0x1; + uint depth = getDepth(patchBits); + if (nonQuadRoot == 1) { + return 1.0f / float( 1 << (depth-1) ); + } else { + return 1.0f / float( 1 << depth ); + } +} + +vec2 normalizePatchCoord(uint patchBits, vec2 uv) { + float frac = getParamFraction(patchBits); + + uint iu = (patchBits >> 22) & 0x3ff; + uint iv = (patchBits >> 12) & 0x3ff; + + // top left corner + float pu = float(iu*frac); + float pv = float(iv*frac); + + // normalize u,v coordinates + return vec2((uv.x - pu) / frac, (uv.y - pv) / frac); +} + +void adjustBoundaryWeights(uint bits, inout vec4 sWeights, inout vec4 tWeights) { + uint boundary = ((bits >> 4) & 0xf); + + if ((boundary & 1) != 0) { + tWeights[2] -= tWeights[0]; + tWeights[1] += 2*tWeights[0]; + tWeights[0] = 0; + } + if ((boundary & 2) != 0) { + sWeights[1] -= sWeights[3]; + sWeights[2] += 2*sWeights[3]; + sWeights[3] = 0; + } + if ((boundary & 4) != 0) { + tWeights[1] -= tWeights[3]; + tWeights[2] += 2*tWeights[3]; + tWeights[3] = 0; + } + if ((boundary & 8) != 0) { + sWeights[2] -= sWeights[0]; + sWeights[1] += 2*sWeights[0]; + sWeights[0] = 0; + } +} + +void main() { + + int current = int(gl_GlobalInvocationID.x); + + PatchCoord coord = patchCoords[current]; + int patchIndex = coord.patchIndex; + + ivec4 array = patchArray[coord.arrayIndex]; + int patchType = 6; // array.x XXX: REGULAR only for now. + int numControlVertices = 16; + + uint patchBits = patchParamBuffer[patchIndex].patchBits; + vec2 uv = normalizePatchCoord(patchBits, vec2(coord.s, coord.t)); + float dScale = float(1 << getDepth(patchBits)); + + float wP[20], wDs[20], wDt[20]; + if (patchType == 6) { // REGULAR + vec4 sWeights, tWeights, dsWeights, dtWeights; + getBSplineWeights(uv.x, sWeights, dsWeights); + getBSplineWeights(uv.y, tWeights, dtWeights); + + adjustBoundaryWeights(patchBits, sWeights, tWeights); + adjustBoundaryWeights(patchBits, dsWeights, dtWeights); + + for (int k = 0; k < 4; ++k) { + for (int l = 0; l < 4; ++l) { + wP[4*k+l] = sWeights[l] * tWeights[k]; + wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale; + wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale; + } + } + } else { + // TODO: GREGORY BASIS + } + + Vertex dst; + clear(dst); + + int indexBase = array.z + coord.vertIndex; + for (int i = 0; i < numControlVertices; ++i) { + int index = patchIndexBuffer[indexBase + i]; + addWithWeight(dst, readVertex(index), wP[i]); + } + writeVertex(current, dst); + + if (dstDuDesc.y > 0) { // length + Vertex du; + clear(du); + for (int i = 0; i < numControlVertices; ++i) { + int index = patchIndexBuffer[indexBase + i]; + addWithWeight(du, readVertex(index), wDs[i]); + } + writeDu(current, du); + } + if (dstDvDesc.y > 0) { + Vertex dv; + clear(dv); + for (int i = 0; i < numControlVertices; ++i) { + int index = patchIndexBuffer[indexBase + i]; + addWithWeight(dv, readVertex(index), wDt[i]); + } + writeDv(current, dv); + } +} + +#endif diff --git a/opensubdiv/osd/glslXFBKernel.glsl b/opensubdiv/osd/glslXFBKernel.glsl index caba7e2a..24200a5e 100644 --- a/opensubdiv/osd/glslXFBKernel.glsl +++ b/opensubdiv/osd/glslXFBKernel.glsl @@ -25,18 +25,8 @@ //------------------------------------------------------------------------------ uniform samplerBuffer vertexBuffer; - -out float outVertexBuffer[LENGTH]; - -uniform usamplerBuffer sizes; -uniform isamplerBuffer offsets; -uniform isamplerBuffer indices; -uniform samplerBuffer weights; - -uniform int batchStart = 0; -uniform int batchEnd = 0; - uniform int srcOffset = 0; +out float outVertexBuffer[LENGTH]; //------------------------------------------------------------------------------ @@ -72,6 +62,16 @@ void writeVertex(Vertex v) { } //------------------------------------------------------------------------------ + +#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS) + +uniform usamplerBuffer sizes; +uniform isamplerBuffer offsets; +uniform isamplerBuffer indices; +uniform samplerBuffer weights; +uniform int batchStart = 0; +uniform int batchEnd = 0; + void main() { int current = gl_VertexID + batchStart; @@ -97,4 +97,146 @@ void main() { writeVertex(dst); } +#endif + //------------------------------------------------------------------------------ + +#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES) + +layout (location = 0) in ivec3 patchHandles; +layout (location = 1) in vec2 patchCoords; + +//struct PatchArray { +// int patchType; +// int numPatches; +// int indexBase; // an offset within the index buffer +// int primitiveIdBase; // an offset within the patch param buffer +//}; +// # of patcharrays is 1 or 2. + +uniform ivec4 patchArray[2]; +uniform isamplerBuffer patchParamBuffer; +uniform isamplerBuffer patchIndexBuffer; + +void getBSplineWeights(float t, inout vec4 point, vec4 deriv) { + // The four uniform cubic B-Spline basis functions evaluated at t: + float one6th = 1.0f / 6.0f; + + float t2 = t * t; + float t3 = t * t2; + + point.x = one6th * (1.0f - 3.0f*(t - t2) - t3); + point.y = one6th * (4.0f - 6.0f*t2 + 3.0f*t3); + point.z = one6th * (1.0f + 3.0f*(t + t2 - t3)); + point.w = one6th * ( t3); + + // Derivatives of the above four basis functions at t: + /* if (deriv) { */ + /* deriv[0] = -0.5f*t2 + t - 0.5f; */ + /* deriv[1] = 1.5f*t2 - 2.0f*t; */ + /* deriv[2] = -1.5f*t2 + t + 0.5f; */ + /* deriv[3] = 0.5f*t2; */ + /* } */ +} + +uint getDepth(uint patchBits) { + return (patchBits & 0x7); +} + +float getParamFraction(uint patchBits) { + uint nonQuadRoot = (patchBits >> 3) & 0x1; + uint depth = getDepth(patchBits); + if (nonQuadRoot == 1) { + return 1.0f / float( 1 << (depth-1) ); + } else { + return 1.0f / float( 1 << depth ); + } +} + +vec2 normalizePatchCoord(uint patchBits, vec2 uv) { + float frac = getParamFraction(patchBits); + + uint iu = (patchBits >> 22) & 0x3ff; + uint iv = (patchBits >> 12) & 0x3ff; + + // top left corner + float pu = float(iu*frac); + float pv = float(iv*frac); + + // normalize u,v coordinates + return vec2((uv.x - pu) / frac, (uv.y - pv) / frac); +} + +void adjustBoundaryWeights(uint bits, inout vec4 sWeights, inout vec4 tWeights) { + uint boundary = ((bits >> 4) & 0xf); + + if ((boundary & 1) != 0) { + tWeights[2] -= tWeights[0]; + tWeights[1] += 2*tWeights[0]; + tWeights[0] = 0; + } + if ((boundary & 2) != 0) { + sWeights[1] -= sWeights[3]; + sWeights[2] += 2*sWeights[3]; + sWeights[3] = 0; + } + if ((boundary & 4) != 0) { + tWeights[1] -= tWeights[3]; + tWeights[2] += 2*tWeights[3]; + tWeights[3] = 0; + } + if ((boundary & 8) != 0) { + sWeights[2] -= sWeights[0]; + sWeights[1] += 2*sWeights[0]; + sWeights[0] = 0; + } +} + +void main() { + int current = gl_VertexID; + + ivec3 handle = patchHandles; + int patchIndex = handle.y; + + vec2 coord = patchCoords; + ivec4 array = patchArray[handle.x]; + int patchType = array.x; + int numControlVertices = 16; + + uint patchBits = texelFetch(patchParamBuffer, patchIndex).y; + + // normalize + coord = normalizePatchCoord(patchBits, coord); + + // XXX: dScale for derivative + + // if regular + float wP[20]; + { + vec4 sWeights, tWeights, dsWeights, dtWeights; + getBSplineWeights(coord.s, sWeights, dsWeights); + getBSplineWeights(coord.t, tWeights, dtWeights); + + adjustBoundaryWeights(patchBits, sWeights, tWeights); + + for (int k = 0; k < 4; ++k) { + for (int l = 0; l < 4; ++l) { + wP[4*k+l] = sWeights[l] * tWeights[k]; + } + } + } + + Vertex dst; + clear(dst); + + int indexBase = array.z + handle.z; + for (int i = 0; i < numControlVertices; ++i) { + int index = texelFetch(patchIndexBuffer, indexBase + i).x; + addWithWeight(dst, readVertex(index), wP[i]); + } + + writeVertex(dst); +} + +#endif + diff --git a/opensubdiv/osd/mesh.h b/opensubdiv/osd/mesh.h index 00646512..d8253f69 100644 --- a/opensubdiv/osd/mesh.h +++ b/opensubdiv/osd/mesh.h @@ -171,7 +171,12 @@ public: for(typename Evaluators::iterator it = _evaluators.begin(); it != _evaluators.end(); ++it) { - if (it->srcDesc.length == srcDesc.length and + // Note: XFB kernel needs to be configured with the local offset + // of the dstDesc to skip preceding primvars. + int dstOffset1 = it->dstDesc.offset % it->dstDesc.stride; + int dstOffset2 = dstDesc.offset % dstDesc.stride; + if (dstOffset1 == dstOffset2 and + it->srcDesc.length == srcDesc.length and it->srcDesc.stride == srcDesc.stride and it->dstDesc.length == dstDesc.length and it->dstDesc.stride == dstDesc.stride) { diff --git a/opensubdiv/osd/ompEvaluator.cpp b/opensubdiv/osd/ompEvaluator.cpp index 13d07e19..8f117db3 100644 --- a/opensubdiv/osd/ompEvaluator.cpp +++ b/opensubdiv/osd/ompEvaluator.cpp @@ -24,6 +24,7 @@ #include "../osd/ompEvaluator.h" #include "../osd/ompKernel.h" +#include "../far/patchBasis.h" #include namespace OpenSubdiv { @@ -33,24 +34,212 @@ namespace Osd { /* static */ bool -OmpEvaluator::EvalStencils(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - const int * sizes, - const int * offsets, - const int * indices, - const float * weights, - int start, int end) { - if (end <= start) return true; +OmpEvaluator::EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end) { - // we can probably expand cpuKernel.cpp to here. + if (end <= start) return true; + if (srcDesc.length != dstDesc.length) return false; + + // XXX: we can probably expand cpuKernel.cpp to here. OmpEvalStencils(src, srcDesc, dst, dstDesc, sizes, offsets, indices, weights, start, end); return true; } +/* static */ +bool +OmpEvaluator::EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, int end) { + + if (end <= start) return true; + if (srcDesc.length != dstDesc.length) return false; + if (srcDesc.length != duDesc.length) return false; + if (srcDesc.length != dvDesc.length) return false; + + OmpEvalStencils(src, srcDesc, + dst, dstDesc, + du, duDesc, + dv, dvDesc, + sizes, offsets, indices, + weights, duWeights, dvWeights, + start, end); + + return true; +} + +template +struct BufferAdapter { + BufferAdapter(T *p, int length, int stride) : + _p(p), _length(length), _stride(stride) { } + void Clear() { + for (int i = 0; i < _length; ++i) _p[i] = 0; + } + void AddWithWeight(T const *src, float w) { + if (_p) { + // TODO: derivatives. + for (int i = 0; i < _length; ++i) { + _p[i] += src[i] * w; + } + } + } + const T *operator[] (int index) const { + return _p + _stride * index; + } + BufferAdapter & operator ++() { + if (_p) { + _p += _stride; + } + return *this; + } + + T *_p; + int _length; + int _stride; +}; + +/* static */ +bool +OmpEvaluator::EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrays, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer){ + + src += srcDesc.offset; + if (dst) dst += dstDesc.offset; + else return false; + BufferAdapter srcT(src, srcDesc.length, srcDesc.stride); + +#pragma omp parallel for + for (int i = 0; i < numPatchCoords; ++i) { + BufferAdapter dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride); + + float wP[20], wDs[20], wDt[20]; + PatchCoord const &coord = patchCoords[i]; + PatchArray const &array = patchArrays[coord.handle.arrayIndex]; + + int patchType = array.GetPatchType(); + // XXX: patchIndex is absolute. not sure it's consistent. + // (should be offsetted by array.primitiveIdBase?) + // patchParamBuffer[array.primitiveIdBase + coord.handle.patchIndex] + Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*) + &patchParamBuffer[coord.handle.patchIndex].patchBits; + + int numControlVertices = 0; + if (patchType == Far::PatchDescriptor::REGULAR) { + Far::internal::GetBSplineWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 16; + } else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) { + Far::internal::GetGregoryWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 20; + } else if (patchType == Far::PatchDescriptor::QUADS) { + Far::internal::GetBilinearWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 4; + } else { + continue; + } + const int *cvs = + &patchIndexBuffer[array.indexBase + coord.handle.vertIndex]; + + dstT.Clear(); + for (int j = 0; j < numControlVertices; ++j) { + dstT.AddWithWeight(srcT[cvs[j]], wP[j]); + } + } + return true; +} + +/* static */ +bool +OmpEvaluator::EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PatchCoord const *patchCoords, + PatchArray const *patchArrays, + const int *patchIndexBuffer, + PatchParam const *patchParamBuffer) { + + src += srcDesc.offset; + if (dst) dst += dstDesc.offset; + if (du) du += duDesc.offset; + if (dv) dv += dvDesc.offset; + + BufferAdapter srcT(src, srcDesc.length, srcDesc.stride); + +#pragma omp parallel for + for (int i = 0; i < numPatchCoords; ++i) { + float wP[20], wDs[20], wDt[20]; + BufferAdapter dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride); + BufferAdapter duT(du + duDesc.stride*i, duDesc.length, duDesc.stride); + BufferAdapter dvT(dv + dvDesc.stride*i, dvDesc.length, dvDesc.stride); + + PatchCoord const &coord = patchCoords[i]; + PatchArray const &array = patchArrays[coord.handle.arrayIndex]; + + int patchType = array.GetPatchType(); + Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*) + &patchParamBuffer[coord.handle.patchIndex].patchBits; + + int numControlVertices = 0; + if (patchType == Far::PatchDescriptor::REGULAR) { + Far::internal::GetBSplineWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 16; + } else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) { + Far::internal::GetGregoryWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 20; + } else if (patchType == Far::PatchDescriptor::QUADS) { + Far::internal::GetBilinearWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 4; + } else { + continue; + } + const int *cvs = + &patchIndexBuffer[array.indexBase + coord.handle.vertIndex]; + + dstT.Clear(); + duT.Clear(); + dvT.Clear(); + for (int j = 0; j < numControlVertices; ++j) { + dstT.AddWithWeight(srcT[cvs[j]], wP[j]); + duT.AddWithWeight(srcT[cvs[j]], wDs[j]); + dvT.AddWithWeight(srcT[cvs[j]], wDt[j]); + } + ++dstT; + ++duT; + ++dvT; + } + return true; +} + /* static */ void OmpEvaluator::Synchronize(void * /*deviceContext*/) { diff --git a/opensubdiv/osd/ompEvaluator.h b/opensubdiv/osd/ompEvaluator.h index a329eed1..9b30e7a6 100644 --- a/opensubdiv/osd/ompEvaluator.h +++ b/opensubdiv/osd/ompEvaluator.h @@ -28,7 +28,7 @@ #include "../version.h" #include - +#include "../osd/types.h" #include "../osd/vertexDescriptor.h" namespace OpenSubdiv { @@ -38,9 +38,15 @@ namespace Osd { class OmpEvaluator { public: - /// \brief Generic static compute function. This function has a same + /// ---------------------------------------------------------------------- + /// + /// Stencil evaluations with StencilTable + /// + /// ---------------------------------------------------------------------- + + /// \brief Generic static eval stencils function. This function has a same /// signature as other device kernels have so that it can be called - /// transparently from OsdMesh template interface. + /// in the same way from OsdMesh template interface. /// /// @param srcBuffer Input primvar buffer. /// must have BindCpuBuffer() method returning a @@ -54,29 +60,27 @@ public: /// /// @param dstDesc vertex buffer descriptor for the output buffer /// - /// @param stencilTable stencil table to be applied. + /// @param stencilTable Far::StencilTable or equivalent /// - /// @param instance not used in the omp kernel + /// @param instance not used in the omp kernel /// (declared as a typed pointer to prevent /// undesirable template resolution) /// /// @param deviceContext not used in the omp kernel /// - template - static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, - VertexBufferDescriptor const &srcDesc, - VERTEX_BUFFER *dstVertexBuffer, - VertexBufferDescriptor const &dstDesc, - STENCIL_TABLE const *stencilTable, - OmpEvaluator const * instance = NULL, - void * deviceContext = NULL) { - (void)instance; // unused; - (void)deviceContext; // unused; + template + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + const OmpEvaluator *instance = NULL, + void * deviceContext = NULL) { - return EvalStencils(srcVertexBuffer->BindCpuBuffer(), - srcDesc, - dstVertexBuffer->BindCpuBuffer(), - dstDesc, + (void)instance; // unused + (void)deviceContext; // unused + + return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, &stencilTable->GetSizes()[0], &stencilTable->GetOffsets()[0], &stencilTable->GetControlIndices()[0], @@ -85,17 +89,376 @@ public: /*end = */ stencilTable->GetNumStencils()); } - /// stencil compute function. - static bool EvalStencils(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - const int * sizes, - const int * offsets, - const int * indices, - const float * weights, - int start, - int end); + /// \brief Static eval stencils function which takes raw CPU pointers for + /// input and output. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param sizes pointer to the sizes buffer of the stencil table + /// to apply for the range [start, end) + /// + /// @param offsets pointer to the offsets buffer of the stencil table + /// + /// @param indices pointer to the indices buffer of the stencil table + /// + /// @param weights pointer to the weights buffer of the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + static bool EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end); + + /// \brief Generic static eval stencils function with derivatives. + /// This function has a same signature as other device kernels + /// have so that it can be called in the same way from OsdMesh + /// template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output U-derivative buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param duDesc vertex buffer descriptor for the output buffer + /// + /// @param dvBuffer Output V-derivative buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dvDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable Far::StencilTable or equivalent + /// + /// @param instance not used in the omp kernel + /// (declared as a typed pointer to prevent + /// undesirable template resolution) + /// + /// @param deviceContext not used in the omp kernel + /// + template + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + STENCIL_TABLE const *stencilTable, + const OmpEvaluator *instance = NULL, + void * deviceContext = NULL) { + + (void)instance; // unused + (void)deviceContext; // unused + + return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, + duBuffer->BindCpuBuffer(), duDesc, + dvBuffer->BindCpuBuffer(), dvDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + &stencilTable->GetDuWeights()[0], + &stencilTable->GetDvWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + /// \brief Static eval stencils function with derivatives, which takes + /// raw CPU pointers for input and output. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param du Output U-derivatives pointer. An offset of + /// duDesc will be applied internally. + /// + /// @param duDesc vertex buffer descriptor for the output buffer + /// + /// @param dv Output V-derivatives pointer. An offset of + /// dvDesc will be applied internally. + /// + /// @param dvDesc vertex buffer descriptor for the output buffer + /// + /// @param sizes pointer to the sizes buffer of the stencil table + /// + /// @param offsets pointer to the offsets buffer of the stencil table + /// + /// @param indices pointer to the indices buffer of the stencil table + /// + /// @param weights pointer to the weights buffer of the stencil table + /// + /// @param duWeights pointer to the du-weights buffer of the stencil table + /// + /// @param dvWeights pointer to the dv-weights buffer of the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + static bool EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, int end); + + /// ---------------------------------------------------------------------- + /// + /// Limit evaluations with PatchTable + /// + /// ---------------------------------------------------------------------- + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable CpuPatchTable or equivalent + /// XXX: currently Far::PatchTable can't be used + /// due to interface mismatch + /// + /// @param instance not used in the omp evaluator + /// + /// @param deviceContext not used in the omp evaluator + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + OmpEvaluator const *instance = NULL, + void * deviceContext = NULL) { + + (void)instance; // unused + (void)deviceContext; // unused + + return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, + numPatchCoords, + (const PatchCoord*)patchCoords->BindCpuBuffer(), + patchTable->GetPatchArrayBuffer(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function with derivatives. This function has + /// a same signature as other device kernels have so that it can be + /// called in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output U-derivatives buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output V-derivatives buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable CpuPatchTable or equivalent + /// XXX: currently Far::PatchTable can't be used + /// due to interface mismatch + /// + /// @param instance not used in the omp evaluator + /// + /// @param deviceContext not used in the omp evaluator + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + OmpEvaluator const *instance = NULL, + void * deviceContext = NULL) { + (void)instance; // unused + (void)deviceContext; // unused + + // XXX: PatchCoords is somewhat abusing vertex primvar buffer interop. + // ideally all buffer classes should have templated by datatype + // so that downcast isn't needed there. + // (e.g. Osd::CpuBuffer ) + // + return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, + duBuffer->BindCpuBuffer(), duDesc, + dvBuffer->BindCpuBuffer(), dvDesc, + numPatchCoords, + (const PatchCoord*)patchCoords->BindCpuBuffer(), + patchTable->GetPatchArrayBuffer(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Static limit eval function. It takes an array of PatchCoord + /// and evaluate limit values on given PatchTable. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchArrays an array of Osd::PatchArray struct + /// indexed by PatchCoord::arrayIndex + /// + /// @param patchIndexBuffer an array of patch indices + /// indexed by PatchCoord::vertIndex + /// + /// @param patchParamBuffer an array of Osd::PatchParam struct + /// indexed by PatchCoord::patchIndex + /// + static bool EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrays, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer); + + /// \brief Static limit eval function. It takes an array of PatchCoord + /// and evaluate limit values on given PatchTable. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param du Output U-derivatives pointer. An offset of + /// duDesc will be applied internally. + /// + /// @param duDesc vertex buffer descriptor for the du buffer + /// + /// @param dv Output V-derivatives pointer. An offset of + /// dvDesc will be applied internally. + /// + /// @param dvDesc vertex buffer descriptor for the dv buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchArrays an array of Osd::PatchArray struct + /// indexed by PatchCoord::arrayIndex + /// + /// @param patchIndexBuffer an array of patch indices + /// indexed by PatchCoord::vertIndex + /// + /// @param patchParamBuffer an array of Osd::PatchParam struct + /// indexed by PatchCoord::patchIndex + /// + static bool EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PatchCoord const *patchCoords, + PatchArray const *patchArrays, + const int *patchIndexBuffer, + PatchParam const *patchParamBuffer); + + /// ---------------------------------------------------------------------- + /// + /// Other methods + /// + /// ---------------------------------------------------------------------- static void Synchronize(void *deviceContext = NULL); diff --git a/opensubdiv/osd/ompKernel.cpp b/opensubdiv/osd/ompKernel.cpp index dc7ecbfe..2b727c4f 100644 --- a/opensubdiv/osd/ompKernel.cpp +++ b/opensubdiv/osd/ompKernel.cpp @@ -117,10 +117,82 @@ OmpEvalStencils(float const * src, copy(dst, i, threadResult, dstDesc); } +} + +void +OmpEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + float * dstDu, + VertexBufferDescriptor const &dstDuDesc, + float * dstDv, + VertexBufferDescriptor const &dstDvDesc, + int const * sizes, + int const * offsets, + int const * indices, + float const * weights, + float const * duWeights, + float const * dvWeights, + int start, int end) { + if (start > 0) { + sizes += start; + indices += offsets[start]; + weights += offsets[start]; + duWeights += offsets[start]; + dvWeights += offsets[start]; + } + + src += srcDesc.offset; + dst += dstDesc.offset; + dstDu += dstDuDesc.offset; + dstDv += dstDvDesc.offset; + + int numThreads = omp_get_max_threads(); + int n = end - start; + + float * result = (float*)alloca(srcDesc.length * numThreads * sizeof(float)); + float * resultDu = (float*)alloca(srcDesc.length * numThreads * sizeof(float)); + float * resultDv = (float*)alloca(srcDesc.length * numThreads * sizeof(float)); + +#pragma omp parallel for + for (int i = 0; i < n; ++i) { + + int index = i + (start > 0 ? start : 0); // Stencil index + + // Get thread-local pointers + int const * threadIndices = indices + offsets[index]; + float const * threadWeights = weights + offsets[index]; + float const * threadWeightsDu = duWeights + offsets[index]; + float const * threadWeightsDv = dvWeights + offsets[index]; + + int threadId = omp_get_thread_num(); + + float * threadResult = result + threadId*srcDesc.length; + float * threadResultDu = resultDu + threadId*srcDesc.length; + float * threadResultDv = resultDv + threadId*srcDesc.length; + + clear(threadResult, dstDesc); + clear(threadResultDu, dstDuDesc); + clear(threadResultDv, dstDvDesc); + + for (int j=0; j<(int)sizes[index]; ++j) { + addWithWeight(threadResult, src, + threadIndices[j], threadWeights[j], srcDesc); + addWithWeight(threadResultDu, src, + threadIndices[j], threadWeightsDu[j], srcDesc); + addWithWeight(threadResultDv, src, + threadIndices[j], threadWeightsDv[j], srcDesc); + } + + copy(dst, i, threadResult, dstDesc); + copy(dstDu, i, threadResultDu, dstDuDesc); + copy(dstDv, i, threadResultDv, dstDvDesc); + } } -} // end namespace Osd +} // end namespace Osd } // end namespace OPENSUBDIV_VERSION } // end namespace OpenSubdiv diff --git a/opensubdiv/osd/ompKernel.h b/opensubdiv/osd/ompKernel.h index 7a9770cb..f0d71aea 100644 --- a/opensubdiv/osd/ompKernel.h +++ b/opensubdiv/osd/ompKernel.h @@ -45,6 +45,23 @@ OmpEvalStencils(float const * src, float const * weights, int start, int end); +void +OmpEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + float * dstDu, + VertexBufferDescriptor const &dstDuDesc, + float * dstDv, + VertexBufferDescriptor const &dstDvDesc, + int const * sizes, + int const * offsets, + int const * indices, + float const * weights, + float const * duWeights, + float const * dvWeights, + int start, int end); + } // end namespace Osd } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/tbbEvaluator.cpp b/opensubdiv/osd/tbbEvaluator.cpp index 2d49e258..5bfce4b9 100644 --- a/opensubdiv/osd/tbbEvaluator.cpp +++ b/opensubdiv/osd/tbbEvaluator.cpp @@ -34,15 +34,15 @@ namespace Osd { /* static */ bool -TbbEvaluator::EvalStencils(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - const int * sizes, - const int * offsets, - const int * indices, - const float * weights, - int start, int end) { +TbbEvaluator::EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end) { + if (end <= start) return true; TbbEvalStencils(src, srcDesc, dst, dstDesc, @@ -51,6 +51,82 @@ TbbEvaluator::EvalStencils(const float *src, return true; } +/* static */ +bool +TbbEvaluator::EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, int end) { + + if (end <= start) return true; + if (srcDesc.length != dstDesc.length) return false; + if (srcDesc.length != duDesc.length) return false; + if (srcDesc.length != dvDesc.length) return false; + + TbbEvalStencils(src, srcDesc, + dst, dstDesc, + du, duDesc, + dv, dvDesc, + sizes, offsets, indices, + weights, duWeights, dvWeights, + start, end); + + return true; +} + +/* static */ +bool +TbbEvaluator::EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrayBuffer, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer) { + + if (srcDesc.length != dstDesc.length) return false; + + TbbEvalPatches(src, srcDesc, dst, dstDesc, + NULL, VertexBufferDescriptor(), + NULL, VertexBufferDescriptor(), + numPatchCoords, patchCoords, + patchArrayBuffer, patchIndexBuffer, patchParamBuffer); + + return true; +} + +/* static */ +bool +TbbEvaluator::EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrayBuffer, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer) { + + if (srcDesc.length != dstDesc.length) return false; + + TbbEvalPatches(src, srcDesc, dst, dstDesc, + du, duDesc, dv, dvDesc, + numPatchCoords, patchCoords, + patchArrayBuffer, patchIndexBuffer, patchParamBuffer); + + return true; +} + /* static */ void TbbEvaluator::Synchronize(void *) { diff --git a/opensubdiv/osd/tbbEvaluator.h b/opensubdiv/osd/tbbEvaluator.h index 58cc02f6..37eaf9a2 100644 --- a/opensubdiv/osd/tbbEvaluator.h +++ b/opensubdiv/osd/tbbEvaluator.h @@ -26,7 +26,9 @@ #define OPENSUBDIV3_OSD_TBB_EVALUATOR_H #include "../version.h" +#include "../osd/types.h" #include "../osd/vertexDescriptor.h" +#include "../far/patchTable.h" #include @@ -37,9 +39,15 @@ namespace Osd { class TbbEvaluator { public: - /// \brief Generic static stencil eval function. This function has a same + /// ---------------------------------------------------------------------- + /// + /// Stencil evaluations with StencilTable + /// + /// ---------------------------------------------------------------------- + + /// \brief Generic static eval stencils function. This function has a same /// signature as other device kernels have so that it can be called - /// transparently from OsdMesh template interface. + /// in the same way from OsdMesh template interface. /// /// @param srcBuffer Input primvar buffer. /// must have BindCpuBuffer() method returning a @@ -55,27 +63,25 @@ public: /// /// @param stencilTable stencil table to be applied. /// - /// @param instance not used in the tbb kernel + /// @param instance not used in the tbb kernel /// (declared as a typed pointer to prevent /// undesirable template resolution) /// /// @param deviceContext not used in the tbb kernel /// - template - static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, - VertexBufferDescriptor const &srcDesc, - VERTEX_BUFFER *dstVertexBuffer, - VertexBufferDescriptor const &dstDesc, - STENCIL_TABLE const *stencilTable, - TbbEvaluator const *instance = NULL, - void *deviceContext = NULL) { + template + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + TbbEvaluator const *instance = NULL, + void *deviceContext = NULL) { + (void)instance; // unused (void)deviceContext; // unused - return EvalStencils(srcVertexBuffer->BindCpuBuffer(), - srcDesc, - dstVertexBuffer->BindCpuBuffer(), - dstDesc, + return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, &stencilTable->GetSizes()[0], &stencilTable->GetOffsets()[0], &stencilTable->GetControlIndices()[0], @@ -84,19 +90,381 @@ public: /*end = */ stencilTable->GetNumStencils()); } - static bool EvalStencils(const float *src, - VertexBufferDescriptor const &srcDesc, - float *dst, - VertexBufferDescriptor const &dstDesc, - const int *sizes, - const int *offsets, - const int *indices, - const float *weights, - int start, - int end); + /// \brief Static eval stencils function which takes raw CPU pointers for + /// input and output. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param sizes pointer to the sizes buffer of the stencil table + /// to apply for the range [start, end) + /// + /// @param offsets pointer to the offsets buffer of the stencil table + /// + /// @param indices pointer to the indices buffer of the stencil table + /// + /// @param weights pointer to the weights buffer of the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + static bool EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + const int *sizes, + const int *offsets, + const int *indices, + const float *weights, + int start, int end); + /// \brief Generic static eval stencils function with derivatives. + /// This function has a same signature as other device kernels + /// have so that it can be called in the same way from OsdMesh + /// template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output U-derivative buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param duDesc vertex buffer descriptor for the output buffer + /// + /// @param dvBuffer Output V-derivative buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dvDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. + /// + /// @param instance not used in the tbb kernel + /// (declared as a typed pointer to prevent + /// undesirable template resolution) + /// + /// @param deviceContext not used in the tbb kernel + /// + template + static bool EvalStencils( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + STENCIL_TABLE const *stencilTable, + const TbbEvaluator *instance = NULL, + void * deviceContext = NULL) { + + (void)instance; // unused + (void)deviceContext; // unused + + return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, + duBuffer->BindCpuBuffer(), duDesc, + dvBuffer->BindCpuBuffer(), dvDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + &stencilTable->GetDuWeights()[0], + &stencilTable->GetDvWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + /// \brief Static eval stencils function with derivatives, which takes + /// raw CPU pointers for input and output. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param du Output s-derivatives pointer. An offset of + /// duDesc will be applied internally. + /// + /// @param duDesc vertex buffer descriptor for the output buffer + /// + /// @param dv Output t-derivatives pointer. An offset of + /// dvDesc will be applied internally. + /// + /// @param dvDesc vertex buffer descriptor for the output buffer + /// + /// @param sizes pointer to the sizes buffer of the stencil table + /// to apply for the range [start, end) + /// + /// @param offsets pointer to the offsets buffer of the stencil table + /// + /// @param indices pointer to the indices buffer of the stencil table + /// + /// @param weights pointer to the weights buffer of the stencil table + /// + /// @param duWeights pointer to the u-weights buffer of the stencil table + /// + /// @param dvWeights pointer to the v-weights buffer of the stencil table + /// + /// @param start start index of stencil table + /// + /// @param end end index of stencil table + /// + static bool EvalStencils( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + const int * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, int end); + + /// ---------------------------------------------------------------------- + /// + /// Limit evaluations with PatchTable + /// + /// ---------------------------------------------------------------------- + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable Far::PatchTable + /// + /// @param instance not used in the cpu evaluator + /// + /// @param deviceContext not used in the cpu evaluator + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + TbbEvaluator const *instance = NULL, + void * deviceContext = NULL) { + + (void)instance; // unused + (void)deviceContext; // unused + + return EvalPatches(srcBuffer->BindCpuBuffer(), + srcDesc, + dstBuffer->BindCpuBuffer(), + dstDesc, + numPatchCoords, + (const PatchCoord*)patchCoords->BindCpuBuffer(), + patchTable->GetPatchArrayBuffer(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Generic limit eval function with derivatives. This function has + /// a same signature as other device kernels have so that it can be + /// called in the same way. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param duBuffer Output s-derivatives buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param duDesc vertex buffer descriptor for the duBuffer + /// + /// @param dvBuffer Output t-derivatives buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dvDesc vertex buffer descriptor for the dvBuffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchTable Far::PatchTable + /// + /// @param instance not used in the cpu evaluator + /// + /// @param deviceContext not used in the cpu evaluator + /// + template + static bool EvalPatches( + SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc, + DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc, + DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + PATCHCOORD_BUFFER *patchCoords, + PATCH_TABLE *patchTable, + TbbEvaluator const *instance = NULL, + void * deviceContext = NULL) { + + (void)instance; // unused + (void)deviceContext; // unused + + return EvalPatches( + srcBuffer->BindCpuBuffer(), srcDesc, + dstBuffer->BindCpuBuffer(), dstDesc, + duBuffer->BindCpuBuffer(), duDesc, + dvBuffer->BindCpuBuffer(), dvDesc, + numPatchCoords, + (const PatchCoord*)patchCoords->BindCpuBuffer(), + patchTable->GetPatchArrayBuffer(), + patchTable->GetPatchIndexBuffer(), + patchTable->GetPatchParamBuffer()); + } + + /// \brief Static limit eval function. It takes an array of PatchCoord + /// and evaluate limit values on given PatchTable. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchArrays an array of Osd::PatchArray struct + /// indexed by PatchCoord::arrayIndex + /// + /// @param patchIndexBuffer an array of patch indices + /// indexed by PatchCoord::vertIndex + /// + /// @param patchParamBuffer an array of Osd::PatchParam struct + /// indexed by PatchCoord::patchIndex + /// + static bool EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrays, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer); + + /// \brief Static limit eval function. It takes an array of PatchCoord + /// and evaluate limit values on given PatchTable. + /// + /// @param src Input primvar pointer. An offset of srcDesc + /// will be applied internally (i.e. the pointer + /// should not include the offset) + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dst Output primvar pointer. An offset of dstDesc + /// will be applied internally. + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param du Output s-derivatives pointer. An offset of + /// duDesc will be applied internally. + /// + /// @param duDesc vertex buffer descriptor for the du buffer + /// + /// @param dv Output t-derivatives pointer. An offset of + /// dvDesc will be applied internally. + /// + /// @param dvDesc vertex buffer descriptor for the dv buffer + /// + /// @param numPatchCoords number of patchCoords. + /// + /// @param patchCoords array of locations to be evaluated. + /// + /// @param patchArrays an array of Osd::PatchArray struct + /// indexed by PatchCoord::arrayIndex + /// + /// @param patchIndexBuffer an array of patch indices + /// indexed by PatchCoord::vertIndex + /// + /// @param patchParamBuffer an array of Osd::PatchParam struct + /// indexed by PatchCoord::patchIndex + /// + static bool EvalPatches( + const float *src, VertexBufferDescriptor const &srcDesc, + float *dst, VertexBufferDescriptor const &dstDesc, + float *du, VertexBufferDescriptor const &duDesc, + float *dv, VertexBufferDescriptor const &dvDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrays, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer); + + /// ---------------------------------------------------------------------- + /// + /// Other methods + /// + /// ---------------------------------------------------------------------- + + /// \brief synchronize all asynchronous computation invoked on this device. static void Synchronize(void *deviceContext = NULL); + /// \brief initialize tbb task schedular + /// (optional: client may use tbb::task_scheduler_init) + /// + /// @param numThreads how many threads + /// static void SetNumThreads(int numThreads); }; diff --git a/opensubdiv/osd/tbbKernel.cpp b/opensubdiv/osd/tbbKernel.cpp index a15f765c..6b45f078 100644 --- a/opensubdiv/osd/tbbKernel.cpp +++ b/opensubdiv/osd/tbbKernel.cpp @@ -24,7 +24,9 @@ #include "../osd/cpuKernel.h" #include "../osd/tbbKernel.h" +#include "../osd/types.h" #include "../osd/vertexDescriptor.h" +#include "../far/patchBasis.h" #include #include @@ -187,6 +189,274 @@ TbbEvalStencils(float const * src, tbb::parallel_for(range, kernel); } +void +TbbEvalStencils(float const * src, VertexBufferDescriptor const &srcDesc, + float * dst, VertexBufferDescriptor const &dstDesc, + float * du, VertexBufferDescriptor const &duDesc, + float * dv, VertexBufferDescriptor const &dvDesc, + int const * sizes, + int const * offsets, + int const * indices, + float const * weights, + float const * duWeights, + float const * dvWeights, + int start, int end) { + if (start > 0) { + sizes += start; + indices += offsets[start]; + weights += offsets[start]; + duWeights += offsets[start]; + dvWeights += offsets[start]; + } + + if (src) src += srcDesc.offset; + if (dst) dst += dstDesc.offset; + if (du) du += duDesc.offset; + if (dv) dv += dvDesc.offset; + + // PERFORMANCE: need to combine 3 launches together + if (dst) { + TBBStencilKernel kernel(src, srcDesc, dst, dstDesc, + sizes, offsets, indices, weights); + tbb::blocked_range range(start, end, grain_size); + tbb::parallel_for(range, kernel); + } + + if (du) { + TBBStencilKernel kernel(src, srcDesc, du, duDesc, + sizes, offsets, indices, duWeights); + tbb::blocked_range range(start, end, grain_size); + tbb::parallel_for(range, kernel); + } + + if (dv) { + TBBStencilKernel kernel(src, srcDesc, dv, dvDesc, + sizes, offsets, indices, dvWeights); + tbb::blocked_range range(start, end, grain_size); + tbb::parallel_for(range, kernel); + } +} + +// --------------------------------------------------------------------------- + +template +struct BufferAdapter { + BufferAdapter(T *p, int length, int stride) : + _p(p), _length(length), _stride(stride) { } + void Clear() { + for (int i = 0; i < _length; ++i) _p[i] = 0; + } + void AddWithWeight(T const *src, float w) { + if (_p) { + for (int i = 0; i < _length; ++i) { + _p[i] += src[i] * w; + } + } + } + const T *operator[] (int index) const { + return _p + _stride * index; + } + BufferAdapter & operator ++() { + if (_p) { + _p += _stride; + } + return *this; + } + + T *_p; + int _length; + int _stride; +}; + +class TbbEvalPatchesKernel { + VertexBufferDescriptor _srcDesc; + VertexBufferDescriptor _dstDesc; + VertexBufferDescriptor _dstDuDesc; + VertexBufferDescriptor _dstDvDesc; + float const * _src; + float * _dst; + float * _dstDu; + float * _dstDv; + int _numPatchCoords; + const PatchCoord *_patchCoords; + const PatchArray *_patchArrayBuffer; + const int *_patchIndexBuffer; + const PatchParam *_patchParamBuffer; + +public: + TbbEvalPatchesKernel(float const *src, + VertexBufferDescriptor srcDesc, + float *dst, + VertexBufferDescriptor dstDesc, + float *dstDu, + VertexBufferDescriptor dstDuDesc, + float *dstDv, + VertexBufferDescriptor dstDvDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrayBuffer, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer) : + _srcDesc(srcDesc), _dstDesc(dstDesc), + _dstDuDesc(dstDuDesc), _dstDvDesc(dstDvDesc), + _src(src), _dst(dst), _dstDu(dstDu), _dstDv(dstDv), + _numPatchCoords(numPatchCoords), + _patchCoords(patchCoords), + _patchArrayBuffer(patchArrayBuffer), + _patchIndexBuffer(patchIndexBuffer), + _patchParamBuffer(patchParamBuffer) { + } + + void operator() (tbb::blocked_range const &r) const { + if (_dstDu == NULL && _dstDv == NULL) { + compute(r); + } else { + computeWithDerivative(r); + } + } + + void compute(tbb::blocked_range const &r) const { + float wP[20], wDs[20], wDt[20]; + BufferAdapter srcT(_src + _srcDesc.offset, + _srcDesc.length, + _srcDesc.stride); + BufferAdapter dstT(_dst + _dstDesc.offset + + r.begin() * _dstDesc.stride, + _dstDesc.length, + _dstDesc.stride); + + BufferAdapter dstDuT(_dstDu, + _dstDuDesc.length, + _dstDuDesc.stride); + BufferAdapter dstDvT(_dstDv, + _dstDvDesc.length, + _dstDvDesc.stride); + + for (int i = r.begin(); i < r.end(); ++i) { + PatchCoord const &coord = _patchCoords[i]; + PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex]; + + int patchType = array.GetPatchType(); + Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*) + &_patchParamBuffer[coord.handle.patchIndex].patchBits; + + int numControlVertices = 0; + if (patchType == Far::PatchDescriptor::REGULAR) { + Far::internal::GetBSplineWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 16; + } else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) { + Far::internal::GetGregoryWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 20; + } else if (patchType == Far::PatchDescriptor::QUADS) { + Far::internal::GetBilinearWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 4; + } else { + assert(0); + } + + const int *cvs = + &_patchIndexBuffer[array.indexBase + coord.handle.vertIndex]; + + dstT.Clear(); + for (int j = 0; j < numControlVertices; ++j) { + dstT.AddWithWeight(srcT[cvs[j]], wP[j]); + } + ++dstT; + } + } + + void computeWithDerivative(tbb::blocked_range const &r) const { + float wP[20], wDs[20], wDt[20]; + BufferAdapter srcT(_src + _srcDesc.offset, + _srcDesc.length, + _srcDesc.stride); + BufferAdapter dstT(_dst + _dstDesc.offset + + r.begin() * _dstDesc.stride, + _dstDesc.length, + _dstDesc.stride); + BufferAdapter dstDuT(_dstDu + _dstDuDesc.offset + + r.begin() * _dstDuDesc.stride, + _dstDuDesc.length, + _dstDuDesc.stride); + BufferAdapter dstDvT(_dstDv + _dstDvDesc.offset + + r.begin() * _dstDvDesc.stride, + _dstDvDesc.length, + _dstDvDesc.stride); + + for (int i = r.begin(); i < r.end(); ++i) { + PatchCoord const &coord = _patchCoords[i]; + PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex]; + + int patchType = array.GetPatchType(); + Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*) + &_patchParamBuffer[coord.handle.patchIndex].patchBits; + + int numControlVertices = 0; + if (patchType == Far::PatchDescriptor::REGULAR) { + Far::internal::GetBSplineWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 16; + } else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) { + Far::internal::GetGregoryWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 20; + } else if (patchType == Far::PatchDescriptor::QUADS) { + Far::internal::GetBilinearWeights(patchBits, + coord.s, coord.t, wP, wDs, wDt); + numControlVertices = 4; + } else { + assert(0); + } + + const int *cvs = + &_patchIndexBuffer[array.indexBase + coord.handle.vertIndex]; + + dstT.Clear(); + dstDuT.Clear(); + dstDvT.Clear(); + for (int j = 0; j < numControlVertices; ++j) { + dstT.AddWithWeight(srcT[cvs[j]], wP[j]); + dstDuT.AddWithWeight(srcT[cvs[j]], wDs[j]); + dstDvT.AddWithWeight(srcT[cvs[j]], wDt[j]); + } + ++dstT; + ++dstDuT; + ++dstDvT; + } + } +}; + + +void +TbbEvalPatches(float const *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + float *dstDu, + VertexBufferDescriptor const &dstDuDesc, + float *dstDv, + VertexBufferDescriptor const &dstDvDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrayBuffer, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer) { + + TbbEvalPatchesKernel kernel(src, srcDesc, dst, dstDesc, + dstDu, dstDuDesc, dstDv, dstDvDesc, + numPatchCoords, patchCoords, + patchArrayBuffer, + patchIndexBuffer, + patchParamBuffer); + + tbb::blocked_range range(0, numPatchCoords, grain_size); + tbb::parallel_for(range, kernel); + +} + } // end namespace Osd } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/tbbKernel.h b/opensubdiv/osd/tbbKernel.h index c44dd732..01a6389d 100644 --- a/opensubdiv/osd/tbbKernel.h +++ b/opensubdiv/osd/tbbKernel.h @@ -32,6 +32,9 @@ namespace OPENSUBDIV_VERSION { namespace Osd { +struct PatchArray; +struct PatchCoord; +struct PatchParam; struct VertexBufferDescriptor; void @@ -45,6 +48,38 @@ TbbEvalStencils(float const * src, float const * weights, int start, int end); +void +TbbEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + float * dstDu, + VertexBufferDescriptor const &dstDuDesc, + float * dstDv, + VertexBufferDescriptor const &dstDvDesc, + int const * sizes, + int const * offsets, + int const * indices, + float const * weights, + float const * duWeights, + float const * dvWeights, + int start, int end); + +void +TbbEvalPatches(float const *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + float *dstDu, + VertexBufferDescriptor const &dstDuDesc, + float *dstDv, + VertexBufferDescriptor const &dstDvDesc, + int numPatchCoords, + const PatchCoord *patchCoords, + const PatchArray *patchArrayBuffer, + const int *patchIndexBuffer, + const PatchParam *patchParamBuffer); + } // end namespace Osd } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/types.h b/opensubdiv/osd/types.h new file mode 100644 index 00000000..b2279b3d --- /dev/null +++ b/opensubdiv/osd/types.h @@ -0,0 +1,111 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV3_OSD_TYPES_H +#define OPENSUBDIV3_OSD_TYPES_H + +#include "../version.h" +#include "../far/patchTable.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +/// \brief Coordinates set on a patch table +/// +/// XXX: this class may be moved into Far +/// +struct PatchCoord { + // 5-ints struct. + + /// \brief Constructor + /// + /// @param handle patch handle + /// + /// @param s parametric location on the patch + /// + /// @param t parametric location on the patch + /// + PatchCoord(Far::PatchTable::PatchHandle handle, float s, float t) : + handle(handle), s(s), t(t) { } + + PatchCoord() : s(0), t(0) { + handle.arrayIndex = 0; + handle.patchIndex = 0; + handle.vertIndex = 0; + } + + Far::PatchTable::PatchHandle handle; ///< patch handle + float s, t; ///< parametric location on patch +}; + +struct PatchArray { + // 4-ints struct. + PatchArray(Far::PatchDescriptor desc, int numPatches, + int indexBase, int primitiveIdBase) : + desc(desc), numPatches(numPatches), indexBase(indexBase), + primitiveIdBase(primitiveIdBase) {} + + Far::PatchDescriptor const &GetDescriptor() const { + return desc; + } + + int GetPatchType() const { + return desc.GetType(); + } + int GetNumPatches() const { + return numPatches; + } + int GetIndexBase() const { + return indexBase; + } + int GetPrimitiveIdBase() const { + return primitiveIdBase; + } + Far::PatchDescriptor desc; + int numPatches; + int indexBase; // an offset within the index buffer + int primitiveIdBase; // an offset within the patch param buffer +}; + +struct PatchParam { + // int3 struct. + int faceIndex; + unsigned int patchBits; + float sharpness; +}; + +typedef std::vector PatchArrayVector; +typedef std::vector PatchParamVector; + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV3_OSD_TYPES_H