diff --git a/examples/dxPtexViewer/dxPtexViewer.cpp b/examples/dxPtexViewer/dxPtexViewer.cpp index 2aec7259..39b32562 100644 --- a/examples/dxPtexViewer/dxPtexViewer.cpp +++ b/examples/dxPtexViewer/dxPtexViewer.cpp @@ -30,41 +30,28 @@ #include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL; +#include #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController * g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL #include - #include - #include - + #include #include "../common/clDeviceContext.h" - CLD3D11DeviceContext g_clDeviceContext; - OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_CUDA #include - #include - #include - + #include #include "../common/cudaDeviceContext.h" - CudaDeviceContext g_cudaDeviceContext; - OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL; #endif #include -#include -#include -OpenSubdiv::Osd::D3D11ComputeController * g_d3d11ComputeController = NULL; +#include #include OpenSubdiv::Osd::D3D11MeshInterface *g_mesh; @@ -635,6 +622,8 @@ createPtex(const char *filename) { //------------------------------------------------------------------------------ void createOsdMesh(int level, int kernel) { + + using namespace OpenSubdiv; Ptex::String ptexError; PtexTexture *ptexColor = PtexTexture::open(g_ptexColorFilename, ptexError, true); if (ptexColor == NULL) { @@ -691,79 +680,81 @@ createOsdMesh(int level, int kernel) { int numVertexElements = 6; //g_adaptive ? 3 : 6; int numVaryingElements = 0; - if (kernel == kCPU) { - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + if (g_kernel == kCPU) { + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); + #ifdef OPENSUBDIV_HAS_OPENMP } else if (kernel == kOPENMP) { - if (not g_ompComputeController) { - g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_ompComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); +#endif +#ifdef OPENSUBDIV_HAS_TBB + } else if (kernel == kTBB) { + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); #endif #ifdef OPENSUBDIV_HAS_OPENCL - } else if (kernel == kCL) { - if (not g_clComputeController) { - g_clComputeController = new OpenSubdiv::Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_clComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, &g_clDeviceContext); + } else if(kernel == kCL) { + static Osd::EvaluatorCacheT clEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &clEvaluatorCache, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_CUDA - } else if (kernel == kCUDA) { - if (not g_cudaComputeController) { - g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cudaComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + } else if (g_kernel == kCUDA) { + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); #endif } else if (g_kernel == kDirectCompute) { - if (not g_d3d11ComputeController) { - g_d3d11ComputeController = new OpenSubdiv::Osd::D3D11ComputeController(g_pd3dDeviceContext); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_d3d11ComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + static Osd::EvaluatorCacheT d3d11ComputeEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &d3d11ComputeEvaluatorCache, + g_pd3dDeviceContext); } else { printf("Unsupported kernel %s\n", getKernelName(kernel)); } @@ -1122,22 +1113,6 @@ quit() { SAFE_RELEASE(g_pd3dDeviceContext); SAFE_RELEASE(g_pd3dDevice); - delete g_cpuComputeController; - -#ifdef OPENSUBDIV_HAS_OPENMP - delete g_ompComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_OPENCL - delete g_clComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_CUDA - delete g_cudaComputeController; -#endif - - delete g_d3d11ComputeController; - PostQuitMessage(0); exit(0); } diff --git a/examples/dxViewer/dxviewer.cpp b/examples/dxViewer/dxviewer.cpp index 89cdaecd..4699a0a4 100644 --- a/examples/dxViewer/dxviewer.cpp +++ b/examples/dxViewer/dxviewer.cpp @@ -30,46 +30,32 @@ #include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL; +#include #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController * g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL #include - #include - #include - + #include #include "../common/clDeviceContext.h" - CLD3D11DeviceContext g_clDeviceContext; - OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_CUDA #include - #include - #include - + #include #include "../common/cudaDeviceContext.h" - CudaDeviceContext g_cudaDeviceContext; - OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL; #endif #include -#include -#include -OpenSubdiv::Osd::D3D11ComputeController * g_d3d11ComputeController = NULL; +#include #include OpenSubdiv::Osd::D3D11MeshInterface *g_mesh; @@ -272,17 +258,18 @@ getKernelName(int kernel) { static void createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=kCatmark) { - typedef OpenSubdiv::Far::ConstIndexArray IndexArray; + using namespace OpenSubdiv; + typedef Far::ConstIndexArray IndexArray; Shape * shape = Shape::parseObj(shapeDesc.data.c_str(), shapeDesc.scheme); // create Vtr mesh (topology) - OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape); - OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape); + Sdc::SchemeType sdctype = GetSdcType(*shape); + Sdc::Options sdcoptions = GetSdcOptions(*shape); - OpenSubdiv::Far::TopologyRefiner * refiner = - OpenSubdiv::Far::TopologyRefinerFactory::Create(*shape, - OpenSubdiv::Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); + Far::TopologyRefiner * refiner = + Far::TopologyRefinerFactory::Create(*shape, + Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); // save coarse topology (used for coarse mesh drawing) int nedges = refiner->GetNumEdges(0), @@ -316,104 +303,90 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme= bool doAdaptive = (g_adaptive!=0 and g_scheme==kCatmark), doSingleCreasePatch = (g_singleCreasePatch!=0 and g_scheme==kCatmark); - OpenSubdiv::Osd::MeshBitset bits; - bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive); - bits.set(OpenSubdiv::Osd::MeshUseSingleCreasePatch, doSingleCreasePatch); + Osd::MeshBitset bits; + bits.set(Osd::MeshAdaptive, doAdaptive); + bits.set(Osd::MeshUseSingleCreasePatch, doSingleCreasePatch); // gregory basis hasn't supported yet in D3D11Mesh - bits.set(OpenSubdiv::Osd::MeshEndCapLegacyGregory, true); + bits.set(Osd::MeshEndCapLegacyGregory, true); int numVertexElements = 6; int numVaryingElements = 0; if (g_kernel == kCPU) { - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); + #ifdef OPENSUBDIV_HAS_OPENMP } else if (kernel == kOPENMP) { - if (not g_ompComputeController) { - g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_ompComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); #endif #ifdef OPENSUBDIV_HAS_TBB } else if (kernel == kTBB) { - if (not g_tbbComputeController) { - g_tbbComputeController = new OpenSubdiv::Osd::TbbComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_tbbComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); #endif #ifdef OPENSUBDIV_HAS_OPENCL } else if(kernel == kCL) { - if (not g_clComputeController) { - g_clComputeController = new OpenSubdiv::Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_clComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, - &g_clDeviceContext); + static Osd::EvaluatorCacheT clEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &clEvaluatorCache, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_CUDA } else if (g_kernel == kCUDA) { - if (not g_cudaComputeController) { - g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cudaComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); #endif } else if (g_kernel == kDirectCompute) { - if (not g_d3d11ComputeController) { - g_d3d11ComputeController = new OpenSubdiv::Osd::D3D11ComputeController(g_pd3dDeviceContext); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_d3d11ComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + static Osd::EvaluatorCacheT d3d11ComputeEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &d3d11ComputeEvaluatorCache, + g_pd3dDeviceContext); } else { printf("Unsupported kernel %s\n", getKernelName(kernel)); } @@ -995,26 +968,6 @@ quit() { SAFE_RELEASE(g_pd3dDeviceContext); SAFE_RELEASE(g_pd3dDevice); - delete g_cpuComputeController; - -#ifdef OPENSUBDIV_HAS_OPENMP - delete g_ompComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_TBB - delete g_tbbComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_OPENCL - delete g_clComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_CUDA - delete g_cudaComputeController; -#endif - - delete g_d3d11ComputeController; - PostQuitMessage(0); exit(0); } diff --git a/examples/glEvalLimit/glEvalLimit.cpp b/examples/glEvalLimit/glEvalLimit.cpp index 45285d6a..5ecb89d6 100755 --- a/examples/glEvalLimit/glEvalLimit.cpp +++ b/examples/glEvalLimit/glEvalLimit.cpp @@ -42,19 +42,18 @@ GLFWwindow* g_window=0; GLFWmonitor* g_primary=0; -#include -#include -#include -#include +#include #include #include #include #include + #include #include #include #include #include +#include #include @@ -198,13 +197,12 @@ Far::TopologyRefiner * g_topologyRefiner = 0; Osd::CpuVertexBuffer * g_vertexData = 0, * g_varyingData = 0; -Osd::CpuComputeContext * g_computeCtx = 0; +Far::StencilTables const * g_vertexStencils = NULL; +Far::StencilTables const * g_varyingStencils = NULL; -Osd::CpuComputeController g_computeCtrl; - -Osd::CpuEvalLimitContext * g_evalCtx = 0; - -Osd::CpuEvalLimitController g_evalCtrl; +Far::PatchTables const * g_patchTables = NULL; +Far::PatchMap const * g_patchMap = NULL; +Osd::PatchCoordArray g_patchCoords; Osd::VertexBufferDescriptor g_idesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 3 ), g_odesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6 ), @@ -245,7 +243,25 @@ updateGeom() { g_vertexData->UpdateData( &g_positions[0], 0, nverts); - g_computeCtrl.Compute(g_computeCtx, g_vertexData, g_varyingData); + if (! g_topologyRefiner) return; + + // note that for patch eval we need coarse+refined combined buffer. + int nCoarseVertices = g_topologyRefiner->GetNumVertices(0); + Osd::CpuEvaluator::EvalStencils(g_vertexData, + Osd::VertexBufferDescriptor(0, 3, 3), + g_vertexData, + Osd::VertexBufferDescriptor( + nCoarseVertices*3, 3, 3), + g_vertexStencils); + + if (g_varyingData) { + Osd::CpuEvaluator::EvalStencils(g_varyingData, + Osd::VertexBufferDescriptor(0, 3, 3), + g_varyingData, + Osd::VertexBufferDescriptor( + nCoarseVertices*3, 3, 3), + g_varyingStencils); + } s.Stop(); g_computeTime = float(s.GetElapsed() * 1000.0f); @@ -255,66 +271,37 @@ updateGeom() { s.Start(); - // The varying data ends-up interleaved in the same g_Q output buffer because - // g_Q has a stride of 6 and g_vdesc sets the offset to 3, while g_odesc sets - // the offset to 0 - switch (g_drawMode) { - case kVARYING : g_evalCtrl.BindVaryingBuffers( g_idesc, g_varyingData, g_vdesc, g_Q ); break; - - case kFACEVARYING : //g_evalCtrl.BindFacevaryingBuffers( g_fvidesc, g_fvodesc, g_Q ); break; - case kRANDOM : - case kUV : - default : g_evalCtrl.Unbind(); break; - } - - // Bind/Unbind of the vertex buffers to the context needs to happen - // outside of the parallel loop - g_evalCtrl.BindVertexBuffers( g_idesc, g_vertexData, g_odesc, g_Q, g_dQs, g_dQt ); - - // Apply 'dynamics' update assert(g_particles); g_particles->Update(g_evalTime); // XXXX g_evalTime is not really elapsed time... - // Evaluate the positions of the samples on the limit surface - g_nsamplesFound=0; -#define USE_OPENMP -#if defined(OPENSUBDIV_HAS_OPENMP) and defined(USE_OPENMP) - #pragma omp parallel for -#endif - for (int i=0; iGetPositions()[i]; - - int n = g_evalCtrl.EvalLimitSample( coord, g_evalCtx, i ); - - if (n) { - // point colors - switch (g_drawMode) { - case kUV : { float * color = g_Q->BindCpuBuffer() + i*g_Q->GetNumElements() + 3; - color[0] = coord.s; - color[1] = 0.0f; - color[2] = coord.t; } break; - - case kRANDOM : // no update needed - case kVARYING : - case kFACEVARYING : break; - - default : break; - } -#if defined(OPENSUBDIV_HAS_OPENMP) and defined(USE_OPENMP) - #pragma omp atomic -#endif - g_nsamplesFound += n; - } else { - // "hide" unfound samples (hole tags...) as a black dot at the origin - float * sample = g_Q->BindCpuBuffer() + i*g_Q->GetNumElements(); - memset(sample, 0, g_Q->GetNumElements() * sizeof(float)); + // resolve particle positions into patch handles + // XXX: this process should be handled by OsdKernel in parallel + g_patchCoords.clear(); + for (int i = 0; i < g_particles->GetNumParticles(); ++i) { + STParticles::Position const &position = g_particles->GetPositions()[i]; + Far::PatchTables::PatchHandle const *handle = + g_patchMap->FindPatch(position.ptexIndex, position.s, position.t); + if (handle) { + g_patchCoords.push_back(Osd::PatchCoord( + *handle, position.s, position.t)); } } - g_evalCtrl.Unbind(); + // Evaluate the positions of the samples on the limit surface + g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(g_vertexData, g_idesc, + g_Q, g_odesc, + g_patchCoords, + g_patchTables, NULL); + + // varying + if (g_drawMode == kVARYING) { + Osd::CpuEvaluator::EvalPatches(g_varyingData, g_idesc, + g_Q, g_vdesc, + g_patchCoords, + g_patchTables, NULL); + } g_Q->BindVBO(); @@ -335,7 +322,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape); delete g_topologyRefiner; - OpenSubdiv::Far::TopologyRefiner * g_topologyRefiner = + g_topologyRefiner = OpenSubdiv::Far::TopologyRefinerFactory::Create(*shape, OpenSubdiv::Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); @@ -409,18 +396,17 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { nverts = vertexStencils->GetNumControlVertices() + vertexStencils->GetNumStencils(); - // Create an Osd Compute context, used to "pose" the vertices with - // the stencils tables - delete g_computeCtx; - g_computeCtx = Osd::CpuComputeContext::Create(vertexStencils, - varyingStencils); + if (g_vertexStencils) delete g_vertexStencils; + g_vertexStencils = vertexStencils; + if (g_varyingStencils) delete g_varyingStencils; + g_varyingStencils = varyingStencils; - // Create a limit Eval context with the patch tables - delete g_evalCtx; - g_evalCtx = Osd::CpuEvalLimitContext::Create(*patchTables); + if (g_patchTables) delete g_patchTables; + g_patchTables = patchTables; - delete vertexStencils; - delete varyingStencils; + // Create a far patch map + if (g_patchMap) delete g_patchMap; + g_patchMap = new Far::PatchMap(*g_patchTables); } { // Create vertex primvar buffer for the CVs diff --git a/examples/glEvalLimit/particles.h b/examples/glEvalLimit/particles.h index 80f7c2b6..ea1f798d 100644 --- a/examples/glEvalLimit/particles.h +++ b/examples/glEvalLimit/particles.h @@ -25,7 +25,6 @@ #ifndef ST_PARTICLES_H #define ST_PARTICLES_H -#include #include #include @@ -54,8 +53,25 @@ class STParticles { public: + /// \brief Coordinates set on a limit surface + /// + struct Position { + Position() { } + + /// \brief Constructor + /// + /// @param f Ptex face id + /// + /// @param x parametric location on face + /// + /// @param y parametric location on face + /// + Position(int f, float x, float y) : ptexIndex(f), s(x), t(y) { } + + int ptexIndex; ///< ptex face index + float s, t; ///< parametric location on face + }; - typedef OpenSubdiv::Osd::LimitLocation Position; typedef OpenSubdiv::Far::TopologyRefiner Refiner; STParticles(Refiner const & refiner, int nparticles, bool centered=false); diff --git a/examples/glFVarViewer/glFVarViewer.cpp b/examples/glFVarViewer/glFVarViewer.cpp index d41ad1b0..f20758d2 100644 --- a/examples/glFVarViewer/glFVarViewer.cpp +++ b/examples/glFVarViewer/glFVarViewer.cpp @@ -46,10 +46,8 @@ GLFWmonitor* g_primary = 0; #include #include +#include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL; #include OpenSubdiv::Osd::GLMeshInterface *g_mesh = NULL; @@ -358,20 +356,15 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, Scheme scheme = kCatmark) int numVertexElements = 3; int numVaryingElements = 0; - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } - delete g_mesh; - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + OpenSubdiv::Far::StencilTables, + OpenSubdiv::Osd::CpuEvaluator, + OpenSubdiv::Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); std::vector fvarData; @@ -1008,8 +1001,6 @@ uninitGL() { if (g_mesh) delete g_mesh; - - delete g_cpuComputeController; } //------------------------------------------------------------------------------ diff --git a/examples/glImaging/glImaging.cpp b/examples/glImaging/glImaging.cpp index 4078a15a..ea6547d8 100755 --- a/examples/glImaging/glImaging.cpp +++ b/examples/glImaging/glImaging.cpp @@ -46,55 +46,40 @@ #include #include +#include #include -#include -#include - OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL; #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL + #include #include - #include - #include #include "../common/clDeviceContext.h" - CLDeviceContext g_clDeviceContext; - OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_CUDA + #include #include - #include - #include - #include "../common/cudaDeviceContext.h" - CudaDeviceContext g_cudaDeviceContext; - OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - #include - #include + #include #include - OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslTransformFeedbackComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - #include - #include + #include #include - OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL; #endif #include @@ -244,103 +229,82 @@ createOsdMesh(std::string const &kernel, Osd::MeshBitset bits) { if (kernel == "CPU") { - if (not g_cpuComputeController) { - g_cpuComputeController = new Osd::CpuComputeController(); - } return new Osd::Mesh( - g_cpuComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Far::StencilTables, + Osd::CpuEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #ifdef OPENSUBDIV_HAS_OPENMP } else if (kernel == "OPENMP") { - if (not g_ompComputeController) { - g_ompComputeController = new Osd::OmpComputeController(); - } return new Osd::Mesh( - g_ompComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Far::StencilTables, + Osd::OmpEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_TBB } else if (kernel == "TBB") { - if (not g_tbbComputeController) { - g_tbbComputeController = new Osd::TbbComputeController(); - } return new Osd::Mesh( - g_tbbComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Far::StencilTables, + Osd::TbbEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_OPENCL } else if(kernel == "CL") { - if (not g_clComputeController) { - g_clComputeController = new Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - } return new Osd::Mesh( - g_clComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, &g_clDeviceContext); + Osd::CLStencilTables, + Osd::CLEvaluator, + Osd::GLDrawContext, + CLDeviceContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + NULL, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_CUDA } else if(kernel == "CUDA") { - if (not g_cudaComputeController) { - g_cudaComputeController = new Osd::CudaComputeController(); - } return new Osd::Mesh( - g_cudaComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Osd::CudaStencilTables, + Osd::CudaEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK } else if(kernel == "XFB") { - if (not g_glslTransformFeedbackComputeController) { - g_glslTransformFeedbackComputeController = new Osd::GLSLTransformFeedbackComputeController(); - } return new Osd::Mesh( - g_glslTransformFeedbackComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Osd::GLStencilTablesTBO, + Osd::GLXFBEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE } else if(kernel == "GLSL") { - if (not g_glslComputeController) { - g_glslComputeController = new Osd::GLSLComputeController(); - } return new Osd::Mesh( - g_glslComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Osd::GLStencilTablesSSBO, + Osd::GLComputeEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif } diff --git a/examples/glPaintTest/glPaintTest.cpp b/examples/glPaintTest/glPaintTest.cpp index da86255d..5828a974 100644 --- a/examples/glPaintTest/glPaintTest.cpp +++ b/examples/glPaintTest/glPaintTest.cpp @@ -47,10 +47,8 @@ GLFWmonitor* g_primary=0; #include #include +#include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL; #include OpenSubdiv::Osd::GLMeshInterface *g_mesh; @@ -240,14 +238,11 @@ createOsdMesh() { bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive); bits.set(OpenSubdiv::Osd::MeshPtexData, true); - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, - refiner, 3, 0, g_level, bits); + OpenSubdiv::Far::StencilTables, + OpenSubdiv::Osd::CpuEvaluator, + OpenSubdiv::Osd::GLDrawContext>( + refiner, 3, 0, g_level, bits); // compute model bounding float min[3] = { FLT_MAX, FLT_MAX, FLT_MAX}; @@ -1095,8 +1090,6 @@ uninitGL() { if (g_mesh) delete g_mesh; - - delete g_cpuComputeController; } //------------------------------------------------------------------------------ diff --git a/examples/glPtexViewer/glPtexViewer.cpp b/examples/glPtexViewer/glPtexViewer.cpp index 7f43bf15..18c8aab7 100644 --- a/examples/glPtexViewer/glPtexViewer.cpp +++ b/examples/glPtexViewer/glPtexViewer.cpp @@ -54,56 +54,39 @@ GLFWmonitor* g_primary = 0; #include #include +#include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL; #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController * g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL + #include #include - #include - #include - #include "../common/clDeviceContext.h" - CLDeviceContext g_clDeviceContext; - OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_CUDA + #include #include - #include - #include - #include "../common/cudaDeviceContext.h" - CudaDeviceContext g_cudaDeviceContext; - OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - #include - #include + #include #include - OpenSubdiv::Osd::GLSLTransformFeedbackComputeController - *g_glslTransformFeedbackComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - #include - #include + #include #include - OpenSubdiv::Osd::GLSLComputeController * g_glslComputeController = NULL; #endif #include @@ -1029,26 +1012,20 @@ createOsdMesh(int level, int kernel) { int numVaryingElements = 0; if (kernel == kCPU) { - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, + OpenSubdiv::Far::StencilTables, + OpenSubdiv::Osd::CpuEvaluator, + OpenSubdiv::Osd::GLDrawContext>( refiner, numVertexElements, numVaryingElements, level, bits); #ifdef OPENSUBDIV_HAS_OPENMP } else if (kernel == kOPENMP) { - if (not g_ompComputeController) { - g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController(); - } g_mesh = new OpenSubdiv::Osd::Mesh( - g_ompComputeController, + OpenSubdiv::Far::StencilTables, + OpenSubdiv::Osd::OmpEvaluator, + OpenSubdiv::Osd::GLDrawContext>( refiner, numVertexElements, numVaryingElements, @@ -1056,13 +1033,10 @@ createOsdMesh(int level, int kernel) { #endif #ifdef OPENSUBDIV_HAS_TBB } else if (kernel == kTBB) { - if (not g_tbbComputeController) { - g_tbbComputeController = new OpenSubdiv::Osd::TbbComputeController(); - } g_mesh = new OpenSubdiv::Osd::Mesh( - g_tbbComputeController, + OpenSubdiv::Far::StencilTables, + OpenSubdiv::Osd::TbbEvaluator, + OpenSubdiv::Osd::GLDrawContext>( refiner, numVertexElements, numVaryingElements, @@ -1070,30 +1044,25 @@ createOsdMesh(int level, int kernel) { #endif #ifdef OPENSUBDIV_HAS_OPENCL } else if (kernel == kCL) { - if (not g_clComputeController) { - g_clComputeController = new OpenSubdiv::Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - } + static OpenSubdiv::Osd::EvaluatorCacheT clEvaluatorCache; g_mesh = new OpenSubdiv::Osd::Mesh( - g_clComputeController, + OpenSubdiv::Osd::CLStencilTables, + OpenSubdiv::Osd::CLEvaluator, + OpenSubdiv::Osd::GLDrawContext, + CLDeviceContext>( refiner, numVertexElements, numVaryingElements, - level, bits, &g_clDeviceContext); + level, bits, + &clEvaluatorCache, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_CUDA } else if (kernel == kCUDA) { - if (not g_cudaComputeController) { - g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController(); - } g_mesh = new OpenSubdiv::Osd::Mesh( - g_cudaComputeController, + OpenSubdiv::Osd::CudaStencilTables, + OpenSubdiv::Osd::CudaEvaluator, + OpenSubdiv::Osd::GLDrawContext>( refiner, numVertexElements, numVaryingElements, @@ -1101,32 +1070,29 @@ createOsdMesh(int level, int kernel) { #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK } else if (kernel == kGLSL) { - if (not g_glslTransformFeedbackComputeController) { - g_glslTransformFeedbackComputeController = - new OpenSubdiv::Osd::GLSLTransformFeedbackComputeController(); - } + static OpenSubdiv::Osd::EvaluatorCacheT glXFBEvaluatorCache; g_mesh = new OpenSubdiv::Osd::Mesh( - g_glslTransformFeedbackComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + OpenSubdiv::Osd::GLStencilTablesTBO, + OpenSubdiv::Osd::GLXFBEvaluator, + OpenSubdiv::Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &glXFBEvaluatorCache); #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE } else if (kernel == kGLSLCompute) { - if (not g_glslComputeController) { - g_glslComputeController = new OpenSubdiv::Osd::GLSLComputeController(); - } + static OpenSubdiv::Osd::EvaluatorCacheT glComputeEvaluatorCache; g_mesh = new OpenSubdiv::Osd::Mesh( - g_glslComputeController, + OpenSubdiv::Osd::GLStencilTablesSSBO, + OpenSubdiv::Osd::GLComputeEvaluator, + OpenSubdiv::Osd::GLDrawContext>( refiner, numVertexElements, numVaryingElements, - level, bits); + level, bits, + &glComputeEvaluatorCache); #endif } else { printf("Unsupported kernel %s\n", getKernelName(kernel)); @@ -1977,32 +1943,6 @@ void uninitGL() { if (g_mesh) delete g_mesh; - delete g_cpuComputeController; - -#ifdef OPENSUBDIV_HAS_OPENMP - delete g_ompComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_TBB - delete g_tbbComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_OPENCL - delete g_clComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_CUDA - delete g_cudaComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - delete g_glslTransformFeedbackComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - delete g_glslComputeController; -#endif - if (g_diffuseEnvironmentMap) glDeleteTextures(1, &g_diffuseEnvironmentMap); if (g_specularEnvironmentMap) diff --git a/examples/glShareTopology/glShareTopology.cpp b/examples/glShareTopology/glShareTopology.cpp index 14847af7..1ca2e1a0 100644 --- a/examples/glShareTopology/glShareTopology.cpp +++ b/examples/glShareTopology/glShareTopology.cpp @@ -46,55 +46,42 @@ GLFWmonitor* g_primary=0; #include #include #include +#include #include +#include +#include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL; +#include #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL #include - #include - #include - OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL; - + #include #include "../common/clDeviceContext.h" CLDeviceContext g_clDeviceContext; #endif #ifdef OPENSUBDIV_HAS_CUDA #include - #include - #include - OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL; - + #include #include "../common/cudaDeviceContext.h" CudaDeviceContext g_cudaDeviceContext; #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - #include - #include - #include - OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslXFBComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - #include - #include - #include - OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL; + #include #endif @@ -171,7 +158,6 @@ public: if (interleaved) { assert(vertexDesc.stride == varyingDesc.stride); - _vertexBuffer = createVertexBuffer( vertexDesc.stride, numInstances * numVertices); } else { @@ -259,9 +245,12 @@ public: return _restPosition; } - int GetNumVertices() const { + int GetNumVertices() const { // total (control + refined) return _numVertices; } + int GetNumControlVertices() const { + return _numControlVertices; + } protected: @@ -281,40 +270,47 @@ protected: } int _numVertices; + int _numControlVertices; private: Osd::GLDrawContext *_drawContext; std::vector _restPosition; }; -template class Topology : public TopologyBase { - public: - - typedef COMPUTE_CONTROLLER ComputeController; - typedef typename COMPUTE_CONTROLLER::ComputeContext ComputeContext; + typedef EVALUATOR Evaluator; + typedef STENCIL_TABLES StencilTables; typedef DEVICE_CONTEXT DeviceContext; + typedef Osd::EvaluatorCacheT EvaluatorCache; - Topology(ComputeController * computeController, - Far::PatchTables const * patchTables, - Far::StencilTables const * vertexStencils, + Topology(Far::PatchTables const * patchTables, + Far::StencilTables const * vertexStencils, //XXX: takes ownership Far::StencilTables const * varyingStencils, + int numControlVertices, + EvaluatorCache * evaluatorCache = NULL, DeviceContext * deviceContext = NULL) : TopologyBase(patchTables), - _computeController(computeController), + _evaluatorCache(evaluatorCache), _deviceContext(deviceContext) { - _computeContext = ComputeContext::Create( - vertexStencils, varyingStencils, deviceContext); + _numControlVertices = numControlVertices; + _numVertices = numControlVertices + vertexStencils->GetNumStencils(); + + _vertexStencils = Osd::convertToCompatibleStencilTables( + vertexStencils, deviceContext); + _varyingStencils = Osd::convertToCompatibleStencilTables( + varyingStencils, deviceContext); - _numVertices = vertexStencils->GetNumStencils() + - vertexStencils->GetNumControlVertices(); } ~Topology() { - delete _computeContext; + delete _vertexStencils; + delete _varyingStencils; } void Refine(InstancesBase *instance, int numInstances) { @@ -329,21 +325,59 @@ public: for (int i = 0; i < numInstances; ++i) { - Osd::VertexBufferDescriptor vertexDesc( - globalVertexDesc.offset + _numVertices*globalVertexDesc.stride*i, + Osd::VertexBufferDescriptor vertexSrcDesc( + globalVertexDesc.offset + _numVertices*i*globalVertexDesc.stride, globalVertexDesc.length, globalVertexDesc.stride); - Osd::VertexBufferDescriptor varyingDesc( - globalVaryingDesc.offset + _numVertices*globalVaryingDesc.stride*i, - globalVaryingDesc.length, - globalVaryingDesc.stride); + Osd::VertexBufferDescriptor vertexDstDesc( + globalVertexDesc.offset + (_numVertices*i + _numControlVertices)*globalVertexDesc.stride, + globalVertexDesc.length, + globalVertexDesc.stride); - _computeController->Compute(_computeContext, - typedInstance->GetVertexBuffer(), - typedInstance->GetVaryingBuffer(), - &vertexDesc, - &varyingDesc); + // vertex + Evaluator const *evalInstance = Osd::GetEvaluator( + _evaluatorCache, vertexSrcDesc, vertexDstDesc, _deviceContext); + + Evaluator::EvalStencils(typedInstance->GetVertexBuffer(), vertexSrcDesc, + typedInstance->GetVertexBuffer(), vertexDstDesc, + _vertexStencils, + evalInstance, + _deviceContext); + + // varying + if (_varyingStencils) { + Osd::VertexBufferDescriptor varyingSrcDesc( + globalVaryingDesc.offset + _numVertices*i*globalVaryingDesc.stride, + globalVaryingDesc.length, + globalVaryingDesc.stride); + + Osd::VertexBufferDescriptor varyingDstDesc( + globalVaryingDesc.offset + (_numVertices*i + _numControlVertices)*globalVaryingDesc.stride, + globalVaryingDesc.length, + globalVaryingDesc.stride); + + evalInstance = Osd::GetEvaluator( + _evaluatorCache, varyingSrcDesc, varyingDstDesc, _deviceContext); + + if (typedInstance->GetVaryingBuffer()) { + // non interleaved + Evaluator::EvalStencils( + typedInstance->GetVaryingBuffer(), varyingSrcDesc, + typedInstance->GetVaryingBuffer(), varyingDstDesc, + _varyingStencils, + evalInstance, + _deviceContext); + } else { + // interleaved + Evaluator::EvalStencils( + typedInstance->GetVertexBuffer(), varyingSrcDesc, + typedInstance->GetVertexBuffer(), varyingDstDesc, + _varyingStencils, + evalInstance, + _deviceContext); + } + } } } @@ -359,7 +393,7 @@ public: } virtual void Synchronize() { - _computeController->Synchronize(); + Evaluator::Synchronize(_deviceContext); } virtual void UpdateVertexTexture(InstancesBase *instances) { @@ -371,8 +405,9 @@ public: } private: - ComputeController *_computeController; - ComputeContext *_computeContext; + StencilTables const *_vertexStencils; + StencilTables const *_varyingStencils; + EvaluatorCache * _evaluatorCache; DeviceContext *_deviceContext; }; @@ -644,81 +679,85 @@ createOsdMesh( const std::string &shapeStr, int level, Scheme scheme=kCatmark ) } } + int numControlVertices = refiner->GetNumVertices(0); // create partitioned patcharray TopologyBase *topology = NULL; if (g_kernel == kCPU) { - if (not g_cpuComputeController) - g_cpuComputeController = new Osd::CpuComputeController(); - topology = new Topology(g_cpuComputeController, + topology = new Topology( patchTables, - vertexStencils, varyingStencils); + vertexStencils, varyingStencils, + numControlVertices); #ifdef OPENSUBDIV_HAS_OPENMP } else if (g_kernel == kOPENMP) { - if (not g_ompComputeController) - g_ompComputeController = new Osd::OmpComputeController(); - topology = new Topology(g_ompComputeController, + topology = new Topology( patchTables, - vertexStencils, varyingStencils); + vertexStencils, varyingStencils, + numControlVertices); #endif #ifdef OPENSUBDIV_HAS_TBB } else if (g_kernel == kTBB) { - if (not g_tbbComputeController) - g_tbbComputeController = new Osd::TbbComputeController(); - topology = new Topology(g_tbbComputeController, + topology = new Topology( patchTables, - vertexStencils, varyingStencils); + vertexStencils, varyingStencils, + numControlVertices); #endif #ifdef OPENSUBDIV_HAS_CUDA } else if (g_kernel == kCUDA) { - if (not g_cudaComputeController) - g_cudaComputeController = new Osd::CudaComputeController(); - topology = new Topology(g_cudaComputeController, + topology = new Topology( patchTables, - vertexStencils, varyingStencils); + vertexStencils, varyingStencils, + numControlVertices); #endif #ifdef OPENSUBDIV_HAS_OPENCL } else if (g_kernel == kCL) { - if (not g_clComputeController) - g_clComputeController = new Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - topology = new Topology(g_clComputeController, - patchTables, - vertexStencils, varyingStencils, - &g_clDeviceContext); + static Osd::EvaluatorCacheT clEvaluatorCache; + topology = new Topology( + patchTables, + vertexStencils, varyingStencils, + numControlVertices, + &clEvaluatorCache, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK } else if (g_kernel == kGLSL) { - if (not g_glslXFBComputeController) - g_glslXFBComputeController = new Osd::GLSLTransformFeedbackComputeController(); - topology = new Topology(g_glslXFBComputeController, - patchTables, - vertexStencils, varyingStencils); + static Osd::EvaluatorCacheT glXFBEvaluatorCache; + topology = new Topology( + patchTables, + vertexStencils, varyingStencils, + numControlVertices); #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE } else if (g_kernel == kGLSLCompute) { - if (not g_glslComputeController) - g_glslComputeController = new Osd::GLSLComputeController(); - topology = new Topology(g_glslComputeController, - patchTables, - vertexStencils, varyingStencils); + static Osd::EvaluatorCacheT glComputeEvaluatorCache; + topology = new Topology( + patchTables, + vertexStencils, varyingStencils, + numControlVertices); #endif } else { } delete refiner; - delete vertexStencils; - delete varyingStencils; + // XXX: Weired API. think again.. +/// delete vertexStencils; +/// delete varyingStencils; delete patchTables; // centering rest position @@ -1291,28 +1330,6 @@ uninitGL() { delete g_instances; if (g_topology) delete g_topology; - - delete g_cpuComputeController; - -#ifdef OPENSUBDIV_HAS_OPENMP - delete g_ompComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_TBB - delete g_tbbComputeController; -#endif -#ifdef OPENSUBDIV_HAS_OPENCL - delete g_clComputeController; -#endif -#ifdef OPENSUBDIV_HAS_CUDA - delete g_cudaComputeController; -#endif -#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - delete g_glslXFBComputeController; -#endif -#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - delete g_glslComputeController; -#endif } //------------------------------------------------------------------------------ @@ -1453,7 +1470,7 @@ static void callbackDisplayStyle(int b) { g_displayStyle = b; - rebuildInstances(); + rebuildOsdMesh(); } static void diff --git a/examples/glStencilViewer/glStencilViewer.cpp b/examples/glStencilViewer/glStencilViewer.cpp index 63fdeca0..ef240eb4 100644 --- a/examples/glStencilViewer/glStencilViewer.cpp +++ b/examples/glStencilViewer/glStencilViewer.cpp @@ -54,8 +54,7 @@ GLFWmonitor* g_primary=0; #include #include -#include -#include +#include #include #include @@ -154,18 +153,12 @@ Osd::VertexBufferDescriptor g_controlDesc( /*offset*/ 0, /*legnth*/ 3, /*stride* g_outputDuDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 18 ), g_outputDvDesc( /*offset*/ 9, /*legnth*/ 3, /*stride*/ 18 ); -Osd::CpuEvalStencilsContext * g_evalCtx=0; - -Osd::CpuEvalStencilsController g_evalCpuCtrl; - #if defined(OPENSUBDIV_HAS_OPENMP) - #include - Osd::OmpEvalStencilsController g_evalOmpCtrl; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - Osd::TbbEvalStencilsController g_evalTbbCtrl; + #include #endif @@ -200,48 +193,41 @@ updateGeom() { float * ptr = g_stencilValues->BindCpuBuffer(); memset(ptr, 0, g_controlStencils->GetNumStencils() * 18 * sizeof(float)); - // Uppdate random points by applying point & tangent stencils + // Update random points by applying point & tangent stencils switch (g_kernel) { case kCPU: { - g_evalCpuCtrl.UpdateValues( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDataDesc, g_stencilValues ); - - g_evalCpuCtrl.UpdateDerivs( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDuDesc, g_stencilValues, - g_outputDvDesc, g_stencilValues ); + Osd::CpuEvaluator::EvalStencils( + g_controlValues, g_controlDesc, // input + g_stencilValues, g_outputDataDesc, // position + g_stencilValues, g_outputDuDesc, // Du + g_stencilValues, g_outputDvDesc, // Dv + // Normals will be filled afterwards + g_controlStencils); } break; #if defined(OPENSUBDIV_HAS_OPENMP) case kOPENMP: { - g_evalOmpCtrl.UpdateValues( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDataDesc, g_stencilValues ); - - g_evalOmpCtrl.UpdateDerivs( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDuDesc, g_stencilValues, - g_outputDvDesc, g_stencilValues ); +// FIXME: implements OmpEvaluator + Osd::CpuEvaluator::EvalStencils( + g_controlValues, g_controlDesc, // input + g_stencilValues, g_outputDataDesc, // position + g_stencilValues, g_outputDuDesc, // Du + g_stencilValues, g_outputDvDesc, // Dv + // Normals will be filled afterwards + g_controlStencils); } break; #endif #if defined(OPENSUBDIV_HAS_TBB) +// FIXME: implements TbbEvaluator case kTBB: { - g_evalTbbCtrl.UpdateValues( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDataDesc, g_stencilValues ); - - g_evalTbbCtrl.UpdateDerivs( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDuDesc, g_stencilValues, - g_outputDvDesc, g_stencilValues ); + Osd::CpuEvaluator::EvalStencils( + g_controlValues, g_controlDesc, // input + g_stencilValues, g_outputDataDesc, // position + g_stencilValues, g_outputDuDesc, // Du + g_stencilValues, g_outputDvDesc, // Dv + // Normals will be filled afterwards + g_controlStencils); } break; #endif default: @@ -364,9 +350,6 @@ createMesh(ShapeDesc const & shapeDesc, int level) { g_controlValues = Osd::CpuVertexBuffer::Create(3, nverts); // Create eval context & data buffers - delete g_evalCtx; - g_evalCtx = Osd::CpuEvalStencilsContext::Create(g_controlStencils); - delete g_stencilValues; g_stencilValues = Osd::CpuGLVertexBuffer::Create(3, g_controlStencils->GetNumStencils() * 6 ); @@ -674,6 +657,7 @@ drawStencils() { g_samplesProgram.EnableVertexAttributes(); + glDrawArrays(GL_POINTS, 0, numEdges*2); glDrawArrays(GL_LINES, 0, numEdges*2); glBindVertexArray(0); diff --git a/examples/glViewer/glViewer.cpp b/examples/glViewer/glViewer.cpp index d91dc5ae..f127cd33 100644 --- a/examples/glViewer/glViewer.cpp +++ b/examples/glViewer/glViewer.cpp @@ -46,55 +46,41 @@ GLFWmonitor* g_primary=0; #include #include +#include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL; #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL #include - #include - #include - + #include #include "../common/clDeviceContext.h" CLDeviceContext g_clDeviceContext; - OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_CUDA #include - #include - #include - + #include #include "../common/cudaDeviceContext.h" CudaDeviceContext g_cudaDeviceContext; - OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - #include - #include + #include #include - OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslTransformFeedbackComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - #include - #include + #include #include - OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL; #endif #include @@ -438,13 +424,7 @@ updateGeom() { Stopwatch s; s.Start(); - if (g_displayStyle == kInterleavedVaryingColor) { - OpenSubdiv::Osd::VertexBufferDescriptor vertexDesc(0, 3, 7); - OpenSubdiv::Osd::VertexBufferDescriptor varyingDesc(3, 4, 7); - g_mesh->Refine(&vertexDesc, &varyingDesc, true); - } else { - g_mesh->Refine(); - } + g_mesh->Refine(); s.Stop(); g_cpuTime = float(s.GetElapsed() * 1000.0f); @@ -481,7 +461,8 @@ getKernelName(int kernel) { static void createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=kCatmark) { - typedef OpenSubdiv::Far::ConstIndexArray IndexArray; + using namespace OpenSubdiv; + typedef Far::ConstIndexArray IndexArray; bool doAnim = g_objAnim and g_currentShape==0; @@ -493,12 +474,12 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme= } // create Vtr mesh (topology) - OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape); - OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape); + Sdc::SchemeType sdctype = GetSdcType(*shape); + Sdc::Options sdcoptions = GetSdcOptions(*shape); - OpenSubdiv::Far::TopologyRefiner * refiner = - OpenSubdiv::Far::TopologyRefinerFactory::Create(*shape, - OpenSubdiv::Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); + Far::TopologyRefiner * refiner = + Far::TopologyRefinerFactory::Create(*shape, + Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); // save coarse topology (used for coarse mesh drawing) int nedges = refiner->GetNumEdges(0), @@ -533,117 +514,104 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme= interleaveVarying = g_displayStyle == kInterleavedVaryingColor, doSingleCreasePatch = (g_singleCreasePatch!=0 and g_scheme==kCatmark); - OpenSubdiv::Osd::MeshBitset bits; - bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive); - bits.set(OpenSubdiv::Osd::MeshUseSingleCreasePatch, doSingleCreasePatch); - bits.set(OpenSubdiv::Osd::MeshInterleaveVarying, interleaveVarying); - bits.set(OpenSubdiv::Osd::MeshFVarData, g_displayStyle == kFaceVaryingColor); - bits.set(OpenSubdiv::Osd::MeshEndCapBSplineBasis, g_endCap == kEndCapBSplineBasis); - bits.set(OpenSubdiv::Osd::MeshEndCapGregoryBasis, g_endCap == kEndCapGregoryBasis); - bits.set(OpenSubdiv::Osd::MeshEndCapLegacyGregory, g_endCap == kEndCapLegacyGregory); + Osd::MeshBitset bits; + bits.set(Osd::MeshAdaptive, doAdaptive); + bits.set(Osd::MeshUseSingleCreasePatch, doSingleCreasePatch); + bits.set(Osd::MeshInterleaveVarying, interleaveVarying); + bits.set(Osd::MeshFVarData, g_displayStyle == kFaceVaryingColor); + bits.set(Osd::MeshEndCapBSplineBasis, g_endCap == kEndCapBSplineBasis); + bits.set(Osd::MeshEndCapGregoryBasis, g_endCap == kEndCapGregoryBasis); + bits.set(Osd::MeshEndCapLegacyGregory, g_endCap == kEndCapLegacyGregory); int numVertexElements = 3; int numVaryingElements = (g_displayStyle == kVaryingColor or interleaveVarying) ? 4 : 0; if (kernel == kCPU) { - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #ifdef OPENSUBDIV_HAS_OPENMP } else if (kernel == kOPENMP) { - if (not g_ompComputeController) { - g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_ompComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_TBB } else if (kernel == kTBB) { - if (not g_tbbComputeController) { - g_tbbComputeController = new OpenSubdiv::Osd::TbbComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_tbbComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_OPENCL } else if(kernel == kCL) { - if (not g_clComputeController) { - g_clComputeController = new OpenSubdiv::Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_clComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, &g_clDeviceContext); + // CLKernel + static Osd::EvaluatorCacheT clEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &clEvaluatorCache, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_CUDA } else if(kernel == kCUDA) { - if (not g_cudaComputeController) { - g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cudaComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK } else if(kernel == kGLSL) { - if (not g_glslTransformFeedbackComputeController) { - g_glslTransformFeedbackComputeController = new OpenSubdiv::Osd::GLSLTransformFeedbackComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_glslTransformFeedbackComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + static Osd::EvaluatorCacheT glXFBEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &glXFBEvaluatorCache); #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE } else if(kernel == kGLSLCompute) { - if (not g_glslComputeController) { - g_glslComputeController = new OpenSubdiv::Osd::GLSLComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_glslComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + static Osd::EvaluatorCacheT glComputeEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &glComputeEvaluatorCache); + + #endif } else { printf("Unsupported kernel %s\n", getKernelName(kernel)); @@ -1384,28 +1352,6 @@ uninitGL() { if (g_mesh) delete g_mesh; - - delete g_cpuComputeController; - -#ifdef OPENSUBDIV_HAS_OPENMP - delete g_ompComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_TBB - delete g_tbbComputeController; -#endif -#ifdef OPENSUBDIV_HAS_OPENCL - delete g_clComputeController; -#endif -#ifdef OPENSUBDIV_HAS_CUDA - delete g_cudaComputeController; -#endif -#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - delete g_glslTransformFeedbackComputeController; -#endif -#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - delete g_glslComputeController; -#endif } //------------------------------------------------------------------------------ diff --git a/examples/mayaPolySmooth/mayaPolySmooth.cpp b/examples/mayaPolySmooth/mayaPolySmooth.cpp index 1b90c98c..97a2712a 100644 --- a/examples/mayaPolySmooth/mayaPolySmooth.cpp +++ b/examples/mayaPolySmooth/mayaPolySmooth.cpp @@ -64,8 +64,6 @@ #include #include -#include -#include #include diff --git a/opensubdiv/osd/CMakeLists.txt b/opensubdiv/osd/CMakeLists.txt index 2fa33a88..88381328 100644 --- a/opensubdiv/osd/CMakeLists.txt +++ b/opensubdiv/osd/CMakeLists.txt @@ -27,20 +27,11 @@ #------------------------------------------------------------------------------- # source & headers set(CPU_SOURCE_FILES + cpuEvaluator.cpp cpuKernel.cpp - cpuComputeController.cpp - cpuComputeContext.cpp - cpuEvalLimitContext.cpp - cpuEvalLimitController.cpp - cpuEvalLimitKernel.cpp - cpuEvalStencilsContext.cpp - cpuEvalStencilsController.cpp - cpuSmoothNormalContext.cpp - cpuSmoothNormalController.cpp cpuVertexBuffer.cpp drawContext.cpp drawRegistry.cpp - evalLimitContext.cpp ) set(GPU_SOURCE_FILES ) @@ -50,21 +41,11 @@ set(INC_FILES ) set(PRIVATE_HEADER_FILES debug.h cpuKernel.h - cpuEvalLimitKernel.h ) set(PUBLIC_HEADER_FILES - computeController.h - cpuComputeContext.h - cpuComputeController.h - cpuEvalLimitContext.h - cpuEvalLimitController.h - cpuEvalStencilsContext.h - cpuEvalStencilsController.h - cpuSmoothNormalContext.h - cpuSmoothNormalController.h + cpuEvaluator.h cpuVertexBuffer.h - evalLimitContext.h mesh.h nonCopyable.h opengl.h @@ -75,21 +56,16 @@ set(PUBLIC_HEADER_FILES set(DOXY_HEADER_FILES ${PUBLIC_HEADER_FILES}) - #------------------------------------------------------------------------------- set(OPENMP_PUBLIC_HEADERS + ompEvaluator.h ompKernel.h - ompComputeController.h - ompEvalStencilsController.h - ompSmoothNormalController.h ) if(OPENMP_FOUND ) list(APPEND CPU_SOURCE_FILES + ompEvaluator.cpp ompKernel.cpp - ompComputeController.cpp - ompEvalStencilsController.cpp - ompSmoothNormalController.cpp ) list(APPEND PUBLIC_HEADER_FILES ${OPENMP_PUBLIC_HEADERS}) @@ -103,20 +79,16 @@ list(APPEND DOXY_HEADER_FILES ${OPENMP_PUBLIC_HEADERS}) #------------------------------------------------------------------------------- set(TBB_PUBLIC_HEADERS + tbbEvaluator.h tbbKernel.h - tbbComputeController.h - tbbEvalStencilsController.h - tbbSmoothNormalController.h ) if( TBB_FOUND ) include_directories("${TBB_INCLUDE_DIR}") list(APPEND CPU_SOURCE_FILES + tbbEvaluator.cpp tbbKernel.cpp - tbbComputeController.cpp - tbbEvalStencilsController.cpp - tbbSmoothNormalController.cpp ) list(APPEND PUBLIC_HEADER_FILES ${TBB_PUBLIC_HEADERS}) @@ -166,18 +138,16 @@ list(APPEND DOXY_HEADER_FILES ${GL_PUBLIC_HEADERS}) # OpenGL 4.2 dependencies # note : (GLSL transform feedback kernels require GL 4.2) set(GL_4_2_PUBLIC_HEADERS - glslTransformFeedbackComputeContext.h - glslTransformFeedbackComputeController.h + glXFBEvaluator.h ) if( OPENGL_4_2_FOUND ) list(APPEND GPU_SOURCE_FILES - glslTransformFeedbackComputeContext.cpp - glslTransformFeedbackComputeController.cpp + glXFBEvaluator.cpp ) list(APPEND PUBLIC_HEADER_FILES ${GL_4_2_PUBLIC_HEADERS}) list(APPEND KERNEL_FILES - glslTransformFeedbackKernel.glsl + glslXFBKernel.glsl ) list(APPEND PLATFORM_GPU_LIBRARIES ${GLEW_LIBRARY} @@ -191,14 +161,12 @@ list(APPEND DOXY_HEADER_FILES ${GL_4_2_PUBLIC_HEADERS}) # OpenGL 4.3 dependencies # note : (GLSL compute shader kernels require GL 4.3) set(GL_4_3_PUBLIC_HEADERS - glslComputeContext.h - glslComputeController.h + glComputeEvaluator.h ) if( OPENGL_4_3_FOUND ) list(APPEND GPU_SOURCE_FILES - glslComputeContext.cpp - glslComputeController.cpp + glComputeEvaluator.cpp ) list(APPEND PUBLIC_HEADER_FILES ${GL_4_3_PUBLIC_HEADERS}) list(APPEND KERNEL_FILES @@ -215,8 +183,7 @@ list(APPEND DOXY_HEADER_FILES ${GL_4_3_PUBLIC_HEADERS}) # DX11 code & dependencies set(DXSDK_PUBLIC_HEADERS cpuD3D11VertexBuffer.h - d3d11ComputeContext.h - d3d11ComputeController.h + d3d11ComputeEvaluator.h d3d11DrawContext.h d3d11DrawRegistry.h d3d11VertexBuffer.h @@ -225,8 +192,7 @@ set(DXSDK_PUBLIC_HEADERS if( DXSDK_FOUND ) list(APPEND GPU_SOURCE_FILES cpuD3D11VertexBuffer.cpp - d3d11ComputeContext.cpp - d3d11ComputeController.cpp + d3d11ComputeEvaluator.cpp d3d11DrawContext.cpp d3d11DrawRegistry.cpp d3d11VertexBuffer.cpp @@ -248,16 +214,14 @@ list(APPEND DOXY_HEADER_FILES ${DXSDK_PUBLIC_HEADERS}) #------------------------------------------------------------------------------- # OpenCL code & dependencies set(OPENCL_PUBLIC_HEADERS - clComputeContext.h - clComputeController.h + clEvaluator.h clVertexBuffer.h opencl.h ) if ( OPENCL_FOUND ) list(APPEND GPU_SOURCE_FILES - clComputeContext.cpp - clComputeController.cpp + clEvaluator.cpp clVertexBuffer.cpp ) list(APPEND PUBLIC_HEADER_FILES ${OPENCL_PUBLIC_HEADERS}) @@ -288,15 +252,13 @@ list(APPEND DOXY_HEADER_FILES ${OPENCL_PUBLIC_HEADERS}) #------------------------------------------------------------------------------- # CUDA code & dependencies set(CUDA_PUBLIC_HEADERS - cudaComputeContext.h - cudaComputeController.h + cudaEvaluator.h cudaVertexBuffer.h ) if( CUDA_FOUND ) list(APPEND GPU_SOURCE_FILES - cudaComputeController.cpp - cudaComputeContext.cpp + cudaEvaluator.cpp cudaVertexBuffer.cpp ) list(APPEND PUBLIC_HEADER_FILES ${CUDA_PUBLIC_HEADERS}) diff --git a/opensubdiv/osd/clComputeContext.cpp b/opensubdiv/osd/clComputeContext.cpp deleted file mode 100644 index 674a1a4a..00000000 --- a/opensubdiv/osd/clComputeContext.cpp +++ /dev/null @@ -1,233 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/clComputeContext.h" - -#include - -#include "../far/stencilTables.h" -#include "../far/error.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// ----------------------------------------------------------------------------- - -template cl_mem -createCLBuffer(std::vector const & src, cl_context clContext) { - - cl_mem devicePtr = 0; - cl_int errNum = 0; - - devicePtr = clCreateBuffer(clContext, - CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, - src.size()*sizeof(T), - (void*)(&src.at(0)), - &errNum); - - if (errNum != CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", errNum); - } - - return devicePtr; -} - -// ----------------------------------------------------------------------------- - -class CLComputeContext::CLStencilTables { - -public: - CLStencilTables(Far::StencilTables const & stencilTables, - cl_context clContext) { - _numStencils = stencilTables.GetNumStencils(); - if (_numStencils > 0) { - _sizes = createCLBuffer(stencilTables.GetSizes(), clContext); - _offsets = createCLBuffer(stencilTables.GetOffsets(), clContext); - _indices = createCLBuffer(stencilTables.GetControlIndices(), - clContext); - _weights = createCLBuffer(stencilTables.GetWeights(), clContext); - } else { - _sizes = _offsets = _indices = _weights = NULL; - } - } - - ~CLStencilTables() { - if (_sizes) clReleaseMemObject(_sizes); - if (_offsets) clReleaseMemObject(_offsets); - if (_indices) clReleaseMemObject(_indices); - if (_weights) clReleaseMemObject(_weights); - } - - bool IsValid() const { - return _sizes and _offsets and _indices and _weights; - } - - cl_mem GetSizes() const { - return _sizes; - } - - cl_mem GetOffsets() const { - return _offsets; - } - - cl_mem GetIndices() const { - return _indices; - } - - cl_mem GetWeights() const { - return _weights; - } - - int GetNumStencils() const { - return _numStencils; - } - -private: - cl_mem _sizes, - _offsets, - _indices, - _weights; - int _numStencils; -}; - -// ----------------------------------------------------------------------------- - -CLComputeContext::CLComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - cl_context clContext) : - _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0) { - - if (vertexStencilTables) { - _vertexStencilTables = new CLStencilTables(*vertexStencilTables, - clContext); - _numControlVertices = vertexStencilTables->GetNumControlVertices(); - } - - if (varyingStencilTables) { - _varyingStencilTables = new CLStencilTables(*varyingStencilTables, - clContext); - - if (_numControlVertices) { - assert(_numControlVertices - == varyingStencilTables->GetNumControlVertices()); - } else { - _numControlVertices = varyingStencilTables->GetNumControlVertices(); - } - } -} - -CLComputeContext::~CLComputeContext() { - delete _vertexStencilTables; - delete _varyingStencilTables; -} - -// ---------------------------------------------------------------------------- - -bool -CLComputeContext::HasVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->IsValid() : false; -} - -bool -CLComputeContext::HasVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; -} - -int -CLComputeContext::GetNumStencilsInVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; -} - -int -CLComputeContext::GetNumStencilsInVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; -} -// ---------------------------------------------------------------------------- - -cl_mem -CLComputeContext::GetVertexStencilTablesSizes() const { - return _vertexStencilTables ? _vertexStencilTables->GetSizes() : 0; -} - -cl_mem -CLComputeContext::GetVertexStencilTablesOffsets() const { - return _vertexStencilTables ? _vertexStencilTables->GetOffsets() : 0; -} - -cl_mem -CLComputeContext::GetVertexStencilTablesIndices() const { - return _vertexStencilTables ? _vertexStencilTables->GetIndices() : 0; -} - -cl_mem -CLComputeContext::GetVertexStencilTablesWeights() const { - return _vertexStencilTables ? _vertexStencilTables->GetWeights() : 0; -} - -// ---------------------------------------------------------------------------- - -cl_mem -CLComputeContext::GetVaryingStencilTablesSizes() const { - return _varyingStencilTables ? _varyingStencilTables->GetSizes() : 0; -} - -cl_mem -CLComputeContext::GetVaryingStencilTablesOffsets() const { - return _varyingStencilTables ? _varyingStencilTables->GetOffsets() : 0; -} - -cl_mem -CLComputeContext::GetVaryingStencilTablesIndices() const { - return _varyingStencilTables ? _varyingStencilTables->GetIndices() : 0; -} - -cl_mem -CLComputeContext::GetVaryingStencilTablesWeights() const { - return _varyingStencilTables ? _varyingStencilTables->GetWeights() : 0; -} - - -// ----------------------------------------------------------------------------- - -CLComputeContext * -CLComputeContext::Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - cl_context clContext) { - - CLComputeContext *result = - new CLComputeContext( - vertexStencilTables, varyingStencilTables, clContext); - - return result; -} - -// ----------------------------------------------------------------------------- -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/clComputeContext.h b/opensubdiv/osd/clComputeContext.h deleted file mode 100644 index 6ef2f50f..00000000 --- a/opensubdiv/osd/clComputeContext.h +++ /dev/null @@ -1,155 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CL_COMPUTE_CONTEXT_H -#define OSD_CL_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include "../osd/nonCopyable.h" -#include "../osd/opencl.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far { class StencilTables; } - -namespace Osd { - -/// -/// \brief OpenCL Refine Context -/// -/// The OpenCL-Compute implementation of the Refine module contextual functionality. -/// -/// Contexts interface the serialized topological data pertaining to the -/// geometric primitives with the capabilities of the selected discrete -/// compute device. -/// -class CLComputeContext : public NonCopyable { - -public: - /// Creates an CLComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param clContext An active OpenCL compute context - /// - static CLComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - cl_context clContext); - - /// Creates an CLComputeContext instance (template version) - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param context A user defined class to provide cl_context. - /// must implement GetContext() - /// - template - static CLComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - DEVICE_CONTEXT context) { - return Create(vertexStencilTables, varyingStencilTables, - context->GetContext()); - } - - /// Destructor - virtual ~CLComputeContext(); - - - /// Returns true if the Context has a 'vertex' interpolation stencil table - bool HasVertexStencilTables() const; - - /// Returns true if the Context has a 'varying' interpolation stencil table - bool HasVaryingStencilTables() const; - - /// Returns the number of control vertices - int GetNumControlVertices() const { - return _numControlVertices; - } - - /// Returns the number of stencils in vertex stencil table - int GetNumStencilsInVertexStencilTables() const; - - /// Returns the number of stencils in varying stencil table - int GetNumStencilsInVaryingStencilTables() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil sizes - cl_mem GetVertexStencilTablesSizes() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil offsets - cl_mem GetVertexStencilTablesOffsets() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil indices - cl_mem GetVertexStencilTablesIndices() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil weights - cl_mem GetVertexStencilTablesWeights() const; - - - /// Returns the Cuda buffer containing Varying-stencil stencil sizes - cl_mem GetVaryingStencilTablesSizes() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil offsets - cl_mem GetVaryingStencilTablesOffsets() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil indices - cl_mem GetVaryingStencilTablesIndices() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil weights - cl_mem GetVaryingStencilTablesWeights() const; - - -protected: - explicit CLComputeContext(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - cl_context clContext); - -private: - class CLStencilTables; - - CLStencilTables * _vertexStencilTables, - * _varyingStencilTables; - - int _numControlVertices; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_CL_COMPUTE_CONTEXT_H diff --git a/opensubdiv/osd/clComputeController.cpp b/opensubdiv/osd/clComputeController.cpp deleted file mode 100644 index d59631ef..00000000 --- a/opensubdiv/osd/clComputeController.cpp +++ /dev/null @@ -1,285 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/clComputeController.h" -#include "../far/error.h" - -#if defined(_WIN32) - #include -#endif - -#include -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -static const char *clSource = -#include "clKernel.gen.h" -; - -// ----------------------------------------------------------------------------- - -static cl_kernel buildKernel(cl_program prog, const char * name) { - - cl_int errNum; - cl_kernel k = clCreateKernel(prog, name, &errNum); - - if (errNum != CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel '%s' (%d)\n", name, errNum); - } - return k; -} - -// ----------------------------------------------------------------------------- - -class CLComputeController::KernelBundle : - NonCopyable { - -public: - - bool Compile(cl_context clContext, - VertexBufferDescriptor const & srcDesc, - VertexBufferDescriptor const & dstDesc) { - - cl_int errNum; - - // XXX: only store srcDesc. - // this is ok since currently this kernel doesn't get called with - // different strides for src and dst. This function will be - // refactored soon. - _desc = VertexBufferDescriptor(0, srcDesc.length, srcDesc.stride); - - std::ostringstream defines; - defines << "#define LENGTH " << srcDesc.length << "\n" - << "#define SRC_STRIDE " << srcDesc.stride << "\n" - << "#define DST_STRIDE " << dstDesc.stride << "\n"; - std::string defineStr = defines.str(); - - const char *sources[] = { defineStr.c_str(), clSource }; - _program = clCreateProgramWithSource(clContext, 2, sources, 0, &errNum); - if (errNum!=CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "clCreateProgramWithSource (%d)", errNum); - } - - errNum = clBuildProgram(_program, 0, NULL, NULL, NULL, NULL); - if (errNum != CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, "clBuildProgram (%d) \n", errNum); - - cl_int numDevices = 0; - clGetContextInfo(clContext, - CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL); - - cl_device_id *devices = new cl_device_id[numDevices]; - clGetContextInfo(clContext, CL_CONTEXT_DEVICES, - sizeof(cl_device_id)*numDevices, devices, NULL); - - for (int i = 0; i < numDevices; ++i) { - char cBuildLog[10240]; - clGetProgramBuildInfo(_program, devices[i], - CL_PROGRAM_BUILD_LOG, sizeof(cBuildLog), cBuildLog, NULL); - Far::Error(Far::FAR_RUNTIME_ERROR, cBuildLog); - } - delete[] devices; - - return false; - } - - // compile all cl compute kernels - _stencilsKernel = buildKernel(_program, "computeStencils"); - - return true; - } - - cl_kernel GetStencilsKernel() const { - return _stencilsKernel; - } - - struct Match { - - Match(VertexBufferDescriptor const & d) : desc(d) { } - - bool operator() (KernelBundle const * kernel) { - return (desc.length==kernel->_desc.length and - desc.stride==kernel->_desc.stride); - } - - VertexBufferDescriptor desc; - }; - -private: - - cl_program _program; - - cl_kernel _stencilsKernel; - - VertexBufferDescriptor _desc; -}; - -// ---------------------------------------------------------------------------- - -void -CLComputeController::ApplyStencilTableKernel(ComputeContext const *context) { - - assert(context); - - cl_int errNum; - - size_t globalWorkSize = 0; - - if (context->HasVertexStencilTables()) { - int start = 0; - int end = context->GetNumStencilsInVertexStencilTables(); - globalWorkSize = (size_t)(end - start); - - KernelBundle const * bundle = getKernel(_currentBindState.vertexDesc); - - cl_kernel kernel = bundle->GetStencilsKernel(); - - cl_mem sizes = context->GetVertexStencilTablesSizes(), - offsets = context->GetVertexStencilTablesOffsets(), - indices = context->GetVertexStencilTablesIndices(), - weights = context->GetVertexStencilTablesWeights(); - - cl_mem src = _currentBindState.vertexBuffer; - cl_mem dst = _currentBindState.vertexBuffer; - - VertexBufferDescriptor srcDesc = _currentBindState.vertexDesc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride; - - clSetKernelArg(kernel, 0, sizeof(cl_mem), &src); - clSetKernelArg(kernel, 1, sizeof(int), &srcDesc.offset); - clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst); - clSetKernelArg(kernel, 3, sizeof(int), &dstDesc.offset); - clSetKernelArg(kernel, 4, sizeof(cl_mem), &sizes); - clSetKernelArg(kernel, 5, sizeof(cl_mem), &offsets); - clSetKernelArg(kernel, 6, sizeof(cl_mem), &indices); - clSetKernelArg(kernel, 7, sizeof(cl_mem), &weights); - clSetKernelArg(kernel, 8, sizeof(int), &start); - clSetKernelArg(kernel, 9, sizeof(int), &end); - - errNum = clEnqueueNDRangeKernel( - _clQueue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL); - if (errNum!=CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "ApplyStencilTableKernel (%d) ", errNum); - } - } - - if (context->HasVaryingStencilTables()) { - int start = 0; - int end = context->GetNumStencilsInVaryingStencilTables(); - globalWorkSize = (size_t)(end - start); - - KernelBundle const * bundle = getKernel(_currentBindState.varyingDesc); - - cl_kernel kernel = bundle->GetStencilsKernel(); - - cl_mem sizes = context->GetVaryingStencilTablesSizes(), - offsets = context->GetVaryingStencilTablesOffsets(), - indices = context->GetVaryingStencilTablesIndices(), - weights = context->GetVaryingStencilTablesWeights(); - - cl_mem src = _currentBindState.varyingBuffer; - cl_mem dst = _currentBindState.varyingBuffer; - - VertexBufferDescriptor srcDesc = _currentBindState.varyingDesc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride; - - clSetKernelArg(kernel, 0, sizeof(cl_mem), &src); - clSetKernelArg(kernel, 1, sizeof(int), &srcDesc.offset); - clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst); - clSetKernelArg(kernel, 3, sizeof(int), &dstDesc.offset); - clSetKernelArg(kernel, 4, sizeof(cl_mem), &sizes); - clSetKernelArg(kernel, 5, sizeof(cl_mem), &offsets); - clSetKernelArg(kernel, 6, sizeof(cl_mem), &indices); - clSetKernelArg(kernel, 7, sizeof(cl_mem), &weights); - clSetKernelArg(kernel, 8, sizeof(int), &start); - clSetKernelArg(kernel, 9, sizeof(int), &end); - - errNum = clEnqueueNDRangeKernel( - _clQueue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL); - if (errNum!=CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "ApplyStencilTableKernel (%d)", errNum); - } - } -} - - -// ---------------------------------------------------------------------------- - -CLComputeController::KernelBundle const * -CLComputeController::getKernel(VertexBufferDescriptor const &desc) { - - KernelRegistry::iterator it = - std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(), - KernelBundle::Match(desc)); - - if (it != _kernelRegistry.end()) { - return *it; - } else { - KernelBundle * kernelBundle = new KernelBundle(); - kernelBundle->Compile(_clContext, desc, desc); - _kernelRegistry.push_back(kernelBundle); - return kernelBundle; - } -} - -// ---------------------------------------------------------------------------- - -CLComputeController::CLComputeController( - cl_context clContext, cl_command_queue queue) : - _clContext(clContext), _clQueue(queue) { -} - -CLComputeController::~CLComputeController() { - for (KernelRegistry::iterator it = _kernelRegistry.begin(); - it != _kernelRegistry.end(); ++it) { - delete *it; - } -} - -// ---------------------------------------------------------------------------- - -void -CLComputeController::Synchronize() { - - clFinish(_clQueue); -} - - -// ----------------------------------------------------------------------------- - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/clComputeController.h b/opensubdiv/osd/clComputeController.h deleted file mode 100644 index 16b59d11..00000000 --- a/opensubdiv/osd/clComputeController.h +++ /dev/null @@ -1,204 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CL_COMPUTE_CONTROLLER_H -#define OSD_CL_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/clComputeContext.h" -#include "../osd/vertexDescriptor.h" -#include "../osd/opencl.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -class CLKernelBundle; - -/// \brief Compute controller for launching OpenCL Compute subdivision kernels. -/// -/// CLComputeController is a compute controller class to launch -/// OpenCL subdivision kernels. It requires CLVertexBufferInterface -/// as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class CLComputeController { -public: - typedef CLComputeContext ComputeContext; - - /// Constructor. - /// - /// @param clContext a valid instanciated OpenCL context - /// - /// @param queue a valid non-zero OpenCL command queue - /// - CLComputeController(cl_context clContext, cl_command_queue queue); - - /// Destructor. - ~CLComputeController(); - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CLContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( CLComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - - ApplyStencilTableKernel(context); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CLContext to apply refinement operations to - /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(CLComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - - /// Returns CL context - cl_context GetContext() const { return _clContext; } - - /// Returns CL command queue - cl_command_queue GetCommandQueue() const { return _clQueue; } - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context); - - template - void bind( VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc, - VertexBufferDescriptor const * varyingDesc ) { - - // if the vertex buffer descriptor is specified, use it. - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (vertexDesc) { - _currentBindState.vertexDesc = *vertexDesc; - } else { - int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0; - _currentBindState.vertexDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - if (varyingDesc) { - _currentBindState.varyingDesc = *varyingDesc; - } else { - int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0; - _currentBindState.varyingDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.vertexBuffer = vertexBuffer ? - vertexBuffer->BindCLBuffer(_clQueue) : 0; - _currentBindState.varyingBuffer = varyingBuffer ? - varyingBuffer->BindCLBuffer(_clQueue) : 0; - } - - void unbind() { - _currentBindState.Reset(); - } - - -private: - - class KernelBundle; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : vertexBuffer(0), varyingBuffer(0) { } - - void Reset() { - vertexBuffer = varyingBuffer = NULL; - vertexDesc.Reset(); - varyingDesc.Reset(); - } - - cl_mem vertexBuffer, - varyingBuffer; - - VertexBufferDescriptor vertexDesc, - varyingDesc; - }; - - BindState _currentBindState; - - KernelBundle const * getKernel(VertexBufferDescriptor const &desc); - - typedef std::vector KernelRegistry; - - KernelRegistry _kernelRegistry; - - cl_context _clContext; - cl_command_queue _clQueue; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/clD3D11VertexBuffer.cpp b/opensubdiv/osd/clD3D11VertexBuffer.cpp old mode 100755 new mode 100644 diff --git a/opensubdiv/osd/clD3D11VertexBuffer.h b/opensubdiv/osd/clD3D11VertexBuffer.h index c28881a6..3d318934 100644 --- a/opensubdiv/osd/clD3D11VertexBuffer.h +++ b/opensubdiv/osd/clD3D11VertexBuffer.h @@ -51,7 +51,7 @@ namespace Osd { /// D3D11VertexBuffer implements CLVertexBufferInterface and /// D3D11VertexBufferInterface. /// -/// An instance of this buffer class can be passed to D3D11ComputeController. +/// An instance of this buffer class can be passed to D3D11ComputeEvaluator. /// class CLD3D11VertexBuffer { public: diff --git a/opensubdiv/osd/clEvaluator.cpp b/opensubdiv/osd/clEvaluator.cpp new file mode 100644 index 00000000..edc6fbee --- /dev/null +++ b/opensubdiv/osd/clEvaluator.cpp @@ -0,0 +1,206 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/clEvaluator.h" + +#include +#include +#include + +#include "../osd/opencl.h" +#include "../far/error.h" +#include "../far/stencilTables.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +static const char *clSource = +#include "clKernel.gen.h" +; + +// ---------------------------------------------------------------------------- + +template cl_mem +createCLBuffer(std::vector const & src, cl_context clContext) { + cl_int errNum = 0; + cl_mem devicePtr = clCreateBuffer(clContext, + CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, + src.size()*sizeof(T), + (void*)(&src.at(0)), + &errNum); + + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", errNum); + } + + return devicePtr; +} + +// ---------------------------------------------------------------------------- + +CLStencilTables::CLStencilTables(Far::StencilTables const *stencilTables, + cl_context clContext) { + _numStencils = stencilTables->GetNumStencils(); + + if (_numStencils > 0) { + _sizes = createCLBuffer(stencilTables->GetSizes(), clContext); + _offsets = createCLBuffer(stencilTables->GetOffsets(), clContext); + _indices = createCLBuffer(stencilTables->GetControlIndices(), + clContext); + _weights = createCLBuffer(stencilTables->GetWeights(), clContext); + } else { + _sizes = _offsets = _indices = _weights = NULL; + } +} + +CLStencilTables::~CLStencilTables() { + if (_sizes) clReleaseMemObject(_sizes); + if (_offsets) clReleaseMemObject(_offsets); + if (_indices) clReleaseMemObject(_indices); + if (_weights) clReleaseMemObject(_weights); +} + +// --------------------------------------------------------------------------- + +CLEvaluator::CLEvaluator(cl_context context, cl_command_queue queue) + : _clContext(context), _clCommandQueue(queue), + _program(NULL), _stencilsKernel(NULL) { +} + +CLEvaluator::~CLEvaluator() { + if (_stencilsKernel) clReleaseKernel(_stencilsKernel); + if (_program) clReleaseProgram(_program); +} + +bool +CLEvaluator::Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc) { + if (srcDesc.length > dstDesc.length) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "srcDesc length must be less than or equal to " + "dstDesc length.\n"); + return false; + } + + cl_int errNum; + + std::ostringstream defines; + defines << "#define LENGTH " << srcDesc.length << "\n" + << "#define SRC_STRIDE " << srcDesc.stride << "\n" + << "#define DST_STRIDE " << dstDesc.stride << "\n"; + std::string defineStr = defines.str(); + + const char *sources[] = { defineStr.c_str(), clSource }; + _program = clCreateProgramWithSource(_clContext, 2, sources, 0, &errNum); + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "clCreateProgramWithSource (%d)", errNum); + } + + errNum = clBuildProgram(_program, 0, NULL, NULL, NULL, NULL); + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, "clBuildProgram (%d) \n", errNum); + + cl_int numDevices = 0; + clGetContextInfo( + _clContext, CL_CONTEXT_NUM_DEVICES, + sizeof(cl_uint), &numDevices, NULL); + + cl_device_id *devices = new cl_device_id[numDevices]; + clGetContextInfo(_clContext, CL_CONTEXT_DEVICES, + sizeof(cl_device_id)*numDevices, devices, NULL); + + for (int i = 0; i < numDevices; ++i) { + char cBuildLog[10240]; + clGetProgramBuildInfo( + _program, devices[i], + CL_PROGRAM_BUILD_LOG, sizeof(cBuildLog), cBuildLog, NULL); + Far::Error(Far::FAR_RUNTIME_ERROR, cBuildLog); + } + delete[] devices; + + return false; + } + + _stencilsKernel = clCreateKernel(_program, "computeStencils", &errNum); + + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum); + return false; + } + return true; +} + +bool +CLEvaluator::EvalStencils(cl_mem src, + VertexBufferDescriptor const &srcDesc, + cl_mem dst, + VertexBufferDescriptor const &dstDesc, + cl_mem sizes, + cl_mem offsets, + cl_mem indices, + cl_mem weights, + int start, + int end) const { + if (end <= start) return true; + + size_t globalWorkSize = (size_t)(end - start); + + clSetKernelArg(_stencilsKernel, 0, sizeof(cl_mem), &src); + clSetKernelArg(_stencilsKernel, 1, sizeof(int), &srcDesc.offset); + clSetKernelArg(_stencilsKernel, 2, sizeof(cl_mem), &dst); + clSetKernelArg(_stencilsKernel, 3, sizeof(int), &dstDesc.offset); + clSetKernelArg(_stencilsKernel, 4, sizeof(cl_mem), &sizes); + clSetKernelArg(_stencilsKernel, 5, sizeof(cl_mem), &offsets); + clSetKernelArg(_stencilsKernel, 6, sizeof(cl_mem), &indices); + clSetKernelArg(_stencilsKernel, 7, sizeof(cl_mem), &weights); + clSetKernelArg(_stencilsKernel, 8, sizeof(int), &start); + clSetKernelArg(_stencilsKernel, 9, sizeof(int), &end); + + cl_int errNum = clEnqueueNDRangeKernel( + _clCommandQueue, _stencilsKernel, 1, NULL, + &globalWorkSize, NULL, 0, NULL, NULL); + + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "ApplyStencilTableKernel (%d) ", errNum); + return false; + } + + clFinish(_clCommandQueue); + return true; +} + +/* static */ +void +CLEvaluator::Synchronize(cl_command_queue clCommandQueue) { + clFinish(clCommandQueue); +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/clEvaluator.h b/opensubdiv/osd/clEvaluator.h new file mode 100644 index 00000000..73fff8ff --- /dev/null +++ b/opensubdiv/osd/clEvaluator.h @@ -0,0 +1,230 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_CL_EVALUATOR_H +#define OPENSUBDIV_OSD_CL_EVALUATOR_H + +#include "../version.h" + +#include "../osd/opencl.h" +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far { + class StencilTables; +} + +namespace Osd { + +/// \brief OpenCL stencil tables +/// +/// This class is an OpenCL buffer representation of Far::StencilTables. +/// +/// CLCompute consumes this table to apply stencils +/// +/// +class CLStencilTables { +public: + template + static CLStencilTables *Create(Far::StencilTables const *stencilTables, + DEVICE_CONTEXT context) { + return new CLStencilTables(stencilTables, context->GetContext()); + } + + CLStencilTables(Far::StencilTables const *stencilTables, + cl_context clContext); + ~CLStencilTables(); + + // interfaces needed for CLComputeKernel + cl_mem GetSizesBuffer() const { return _sizes; } + cl_mem GetOffsetsBuffer() const { return _offsets; } + cl_mem GetIndicesBuffer() const { return _indices; } + cl_mem GetWeightsBuffer() const { return _weights; } + int GetNumStencils() const { return _numStencils; } + +private: + cl_mem _sizes; + cl_mem _offsets; + cl_mem _indices; + cl_mem _weights; + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +/// \brief OpenCL stencil kernel +/// +/// +class CLEvaluator { +public: + typedef bool Instantiatable; + /// Constructor. + CLEvaluator(cl_context context, cl_command_queue queue); + + /// Desctructor. + ~CLEvaluator(); + + /// Generic creator template. + template + static CLEvaluator *Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + DEVICE_CONTEXT deviceContext) { + return Create(srcDesc, dstDesc, + deviceContext->GetContext(), + deviceContext->GetCommandQueue()); + } + + static CLEvaluator * Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + cl_context clContext, + cl_command_queue clCommandQueue) { + CLEvaluator *kernel = new CLEvaluator(clContext, clCommandQueue); + if (kernel->Compile(srcDesc, dstDesc)) return kernel; + delete kernel; + return NULL; + } + + /// \brief Generic static compute function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCLBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCLBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTables stencil table to be applied. The table must have + /// OpenCL memory interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext client providing context class which supports + /// cL_context GetContext() + /// cl_command_queue GetCommandQueue() + /// methods. + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + CLEvaluator const *instance, + DEVICE_CONTEXT deviceContext) { + if (instance) { + return instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + } else { + // Create an instance on demand (slow) + instance = Create(srcDesc, dstDesc, deviceContext); + if (instance) { + bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// Generic compute function. + /// Dispatch the CL compute kernel asynchronously. + /// Returns false if the kernel hasn't been compiled yet. + template + bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const { + return EvalStencils(srcVertexBuffer->BindCLBuffer(_clCommandQueue), + srcDesc, + dstVertexBuffer->BindCLBuffer(_clCommandQueue), + dstDesc, + stencilTable->GetSizesBuffer(), + stencilTable->GetOffsetsBuffer(), + stencilTable->GetIndicesBuffer(), + stencilTable->GetWeightsBuffer(), + 0, + stencilTable->GetNumStencils()); + } + + /// Dispatch the CL compute kernel asynchronously. + /// returns false if the kernel hasn't been compiled yet. + bool EvalStencils(cl_mem src, + VertexBufferDescriptor const &srcDesc, + cl_mem dst, + VertexBufferDescriptor const &dstDesc, + cl_mem sizes, + cl_mem offsets, + cl_mem indices, + cl_mem weights, + int start, + int end) const; + + /// Configure OpenCL kernel. + /// Returns false if it fails to compile the kernel. + bool Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc); + + /// Wait the OpenCL kernels finish. + template + static void Synchronize(DEVICE_CONTEXT deviceContext) { + Synchronize(deviceContext->GetCommandQueue()); + } + + static void Synchronize(cl_command_queue queue); + +private: + cl_context _clContext; + cl_command_queue _clCommandQueue; + cl_program _program; + cl_kernel _stencilsKernel; +}; + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_CL_EVALUATOR_H diff --git a/opensubdiv/osd/clVertexBuffer.h b/opensubdiv/osd/clVertexBuffer.h index 500ce2f6..a20f70ad 100644 --- a/opensubdiv/osd/clVertexBuffer.h +++ b/opensubdiv/osd/clVertexBuffer.h @@ -37,7 +37,7 @@ namespace Osd { /// \brief Concrete vertex buffer class for OpenCL subvision. /// /// CLVertexBuffer implements CLVertexBufferInterface. An instance of this -/// buffer class can be passed to CLComputeController +/// buffer class can be passed to CLEvaluator /// class CLVertexBuffer { diff --git a/opensubdiv/osd/cpuComputeContext.cpp b/opensubdiv/osd/cpuComputeContext.cpp deleted file mode 100644 index 977764e7..00000000 --- a/opensubdiv/osd/cpuComputeContext.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" - -#include "../osd/cpuComputeContext.h" -#include "../osd/cpuKernel.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// ---------------------------------------------------------------------------- - -CpuComputeContext::CpuComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables) : - _vertexStencilTables(0), _varyingStencilTables(0) { - - // XXXX manuelk we do not own the tables, so use copy-constructor for now - // smart pointers eventually - if (vertexStencilTables) { - _vertexStencilTables = new Far::StencilTables(*vertexStencilTables); - } - - if (varyingStencilTables) { - _varyingStencilTables = new Far::StencilTables(*varyingStencilTables); - } -} - -// ---------------------------------------------------------------------------- - -CpuComputeContext::~CpuComputeContext() { - - delete _vertexStencilTables; - delete _varyingStencilTables; -} - -// ---------------------------------------------------------------------------- - -CpuComputeContext * -CpuComputeContext::Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void * /*deviceContext*/) { - - return new CpuComputeContext(vertexStencilTables, varyingStencilTables); -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuComputeContext.h b/opensubdiv/osd/cpuComputeContext.h deleted file mode 100644 index 4f024897..00000000 --- a/opensubdiv/osd/cpuComputeContext.h +++ /dev/null @@ -1,102 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CPU_COMPUTE_CONTEXT_H -#define OSD_CPU_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include - -#include "../osd/nonCopyable.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far { class StencilTables; } - -namespace Osd { - -/// -/// \brief CPU Compute Context -/// -/// The CPU implementation of the Compute module contextual functionality. -/// -/// The Osd Compute module provides functionality to interpolate primitive -/// variable data according to a subdivision scheme. -/// -/// Contexts provide an interface between the serialized topological data -/// of a geometric primitive and the computation resources of a compute device. -/// -class CpuComputeContext : private NonCopyable { -public: - /// Creates an CpuComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param deviceContext (not used) - /// - static CpuComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void *deviceContext = NULL); - - /// Destructor - virtual ~CpuComputeContext(); - - /// Returns the stencils data applied by this context for vertex - /// interpolation - Far::StencilTables const * GetVertexStencilTables() const { - return _vertexStencilTables; - } - - /// Returns the stencils data applied by this context for varying - /// interpolation - Far::StencilTables const * GetVaryingStencilTables() const { - return _varyingStencilTables; - } - -protected: - explicit CpuComputeContext(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables); - -private: - Far::StencilTables const * _vertexStencilTables, - * _varyingStencilTables; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_CPU_COMPUTE_CONTEXT_H - - diff --git a/opensubdiv/osd/cpuComputeController.cpp b/opensubdiv/osd/cpuComputeController.cpp deleted file mode 100644 index a3c4af0b..00000000 --- a/opensubdiv/osd/cpuComputeController.cpp +++ /dev/null @@ -1,106 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" -#include "../osd/cpuComputeContext.h" -#include "../osd/cpuComputeController.h" -#include "../osd/cpuKernel.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -CpuComputeController::CpuComputeController() { -} - -CpuComputeController::~CpuComputeController() { -} - -void -CpuComputeController::Synchronize() { -} - -void -CpuComputeController::ApplyStencilTableKernel( - ComputeContext const *context) const { - - assert(context); - - Far::StencilTables const * vertexStencils = context->GetVertexStencilTables(); - - if (vertexStencils and _currentBindState.vertexBuffer) { - VertexBufferDescriptor srcDesc = _currentBindState.vertexDesc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += vertexStencils->GetNumControlVertices() * dstDesc.stride; - - int start = 0; - int end = vertexStencils->GetNumStencils(); - - if (end > start) { - CpuComputeStencils(_currentBindState.vertexBuffer, - srcDesc, - _currentBindState.vertexBuffer, - dstDesc, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - start, - end); - } - } - - Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); - - if (varyingStencils and _currentBindState.varyingBuffer) { - VertexBufferDescriptor srcDesc = _currentBindState.varyingDesc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += varyingStencils->GetNumControlVertices() * dstDesc.stride; - - int start = 0; - int end = varyingStencils->GetNumStencils(); - - if (end > start) { - CpuComputeStencils(_currentBindState.varyingBuffer, - srcDesc, - _currentBindState.varyingBuffer, - dstDesc, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - start, - end); - } - } -} - - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuComputeController.h b/opensubdiv/osd/cpuComputeController.h deleted file mode 100644 index 2a9edde7..00000000 --- a/opensubdiv/osd/cpuComputeController.h +++ /dev/null @@ -1,179 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CPU_COMPUTE_CONTROLLER_H -#define OSD_CPU_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/cpuComputeContext.h" -#include "../osd/vertexDescriptor.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching CPU subdivision kernels. -/// -/// CpuComputeController is a compute controller class to launch -/// single threaded CPU subdivision kernels. It requires -/// CpuVertexBufferInterface as arguments of the Refine() function. -/// -/// The Osd Compute module provides functionality to interpolate primitive -/// variable data according to a subdivision scheme. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class CpuComputeController { -public: - typedef CpuComputeContext ComputeContext; - - /// Constructor. - CpuComputeController(); - - /// Destructor. - ~CpuComputeController(); - - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( CpuComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - - ApplyStencilTableKernel(context); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(CpuComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context) const; - - template - void bind( VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc, - VertexBufferDescriptor const * varyingDesc ) { - - // if the vertex buffer descriptor is specified, use it. - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (vertexDesc) { - _currentBindState.vertexDesc = *vertexDesc; - } else { - int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0; - _currentBindState.vertexDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - if (varyingDesc) { - _currentBindState.varyingDesc = *varyingDesc; - } else { - int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0; - _currentBindState.varyingDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.vertexBuffer = vertexBuffer ? - vertexBuffer->BindCpuBuffer() : 0; - - _currentBindState.varyingBuffer = varyingBuffer ? - varyingBuffer->BindCpuBuffer() : 0; - } - - void unbind() { - _currentBindState.Reset(); - } - -private: - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : vertexBuffer(0), varyingBuffer(0) { } - - void Reset() { - vertexBuffer = varyingBuffer = 0; - vertexDesc.Reset(); - varyingDesc.Reset(); - } - - float * vertexBuffer, - * varyingBuffer; - - VertexBufferDescriptor vertexDesc, - varyingDesc; - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_CPU_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/cpuD3D11VertexBuffer.h b/opensubdiv/osd/cpuD3D11VertexBuffer.h index eade74dd..145ec599 100644 --- a/opensubdiv/osd/cpuD3D11VertexBuffer.h +++ b/opensubdiv/osd/cpuD3D11VertexBuffer.h @@ -43,7 +43,7 @@ namespace Osd { /// CpuD3D11VertexBuffer implements CpuVertexBufferInterface and /// D3D11VertexBufferInterface. /// -/// An instance of this buffer class can be passed to CpuComputeController. +/// An instance of this buffer class can be passed to CpuEvaluator. /// class CpuD3D11VertexBuffer { public: diff --git a/opensubdiv/osd/cpuEvalLimitContext.cpp b/opensubdiv/osd/cpuEvalLimitContext.cpp deleted file mode 100644 index f505ac94..00000000 --- a/opensubdiv/osd/cpuEvalLimitContext.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/cpuEvalLimitContext.h" -#include "../osd/vertexDescriptor.h" - -#include -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -CpuEvalLimitContext * -CpuEvalLimitContext::Create(Far::PatchTables const & patchTables) { - - // there is no limit with uniform subdivision - if (not patchTables.IsFeatureAdaptive()) - return NULL; - - return new CpuEvalLimitContext(patchTables); -} - -CpuEvalLimitContext::CpuEvalLimitContext(Far::PatchTables const & patchTables) : - EvalLimitContext(patchTables), - _patchTables(patchTables), - _patchMap(patchTables) { -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuEvalLimitContext.h b/opensubdiv/osd/cpuEvalLimitContext.h deleted file mode 100644 index 11a2f8b0..00000000 --- a/opensubdiv/osd/cpuEvalLimitContext.h +++ /dev/null @@ -1,80 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CPU_EVAL_LIMIT_CONTEXT_H -#define OSD_CPU_EVAL_LIMIT_CONTEXT_H - -#include "../version.h" - -#include "../osd/evalLimitContext.h" -#include "../far/patchTables.h" -#include "../far/patchMap.h" - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -class CpuEvalLimitContext : public EvalLimitContext { -public: - - /// \brief Factory - /// Returns an EvalLimitContext from the given far patch tables. - /// Note : the patchtables is expected to be feature-adaptive and have ptex - /// coordinates tables. - /// - /// @param patchTables a pointer to an initialized Far::PatchTables - /// - static CpuEvalLimitContext * Create(Far::PatchTables const &patchTables); - - Far::PatchTables const & GetPatchTables() const { - return _patchTables; - } - - Far::PatchMap const & GetPatchMap() const { - return _patchMap; - } - -protected: - - explicit CpuEvalLimitContext(Far::PatchTables const & patchTables); - -private: - - Far::PatchTables const _patchTables; // Patch topology data - Far::PatchMap const _patchMap; // Patch search accelerator -}; - - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif /* OSD_CPU_EVAL_LIMIT_CONTEXT_H */ diff --git a/opensubdiv/osd/cpuEvalLimitController.cpp b/opensubdiv/osd/cpuEvalLimitController.cpp deleted file mode 100644 index f0498456..00000000 --- a/opensubdiv/osd/cpuEvalLimitController.cpp +++ /dev/null @@ -1,288 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/cpuEvalLimitController.h" -#include "../osd/cpuEvalLimitContext.h" -#include "../osd/cpuEvalLimitKernel.h" -#include "../far/patchTables.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -CpuEvalLimitController::CpuEvalLimitController() { -} - -CpuEvalLimitController::~CpuEvalLimitController() { -} - -// Vertex interpolation of a sample at the limit -int -CpuEvalLimitController::EvalLimitSample( LimitLocation const & coord, - CpuEvalLimitContext * context, - VertexBufferDescriptor const & outDesc, - float * outQ, - float * outDQU, - float * outDQV ) const { - typedef Far::PatchDescriptor Desc; - - float s=coord.s, - t=coord.t; - - Far::PatchMap::Handle const * handle = context->GetPatchMap().FindPatch( coord.ptexIndex, s, t ); - if (not handle) { - return 0; // no handle if there is a hole or 'coord' is incorrect - } - - VertexData const & vertexData = _currentBindState.vertexData; - - if (vertexData.in) { - - Far::PatchTables const & ptables = context->GetPatchTables(); - - Far::PatchParam pparam = ptables.GetPatchParam(*handle); - - Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle); - - Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle); - switch (desc.GetType()) { - case Desc::REGULAR : evalBSpline( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - case Desc::CORNER : evalCorner( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - case Desc::GREGORY : evalGregory( pparam.bitField, t, s, cvs.begin(), - &ptables.GetVertexValenceTable()[0], - ptables.GetPatchQuadOffsets(*handle).begin(), - ptables.GetMaxValence(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(), - &ptables.GetVertexValenceTable()[0], - ptables.GetPatchQuadOffsets(*handle).begin(), - ptables.GetMaxValence(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - case Desc::GREGORY_BASIS : { - evalGregoryBasis( pparam.bitField, s, t, - cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - outQ, outDQU, outDQV ); - } break; - case Desc::QUADS : evalBilinear( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - default: - assert(0); - } - } - assert(0); - return 1; -} - -// Vertex interpolation of samples at the limit -int -CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords, - CpuEvalLimitContext * context, - unsigned int index ) const { - typedef Far::PatchDescriptor Desc; - - float s=coords.s, - t=coords.t; - - Far::PatchMap::Handle const * handle = context->GetPatchMap().FindPatch( coords.ptexIndex, s, t ); - if (not handle) { - return 0; // no handle if there is a hole or 'coord' is incorrect - } - - VertexData const & vertexData = _currentBindState.vertexData; - - Far::PatchTables const & ptables = context->GetPatchTables(); - - Far::PatchParam pparam = ptables.GetPatchParam(*handle); - - Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle); - - Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle); - - if (vertexData.in) { - - int offset = vertexData.outDesc.stride * index, - doffset = vertexData.outDesc.length * index; - - if (vertexData.out) { - - // note : don't apply outDesc.offset here, it's done inside patch - // evaluation - float * out = vertexData.out+offset, - * outDu = vertexData.outDu ? vertexData.outDu+doffset : 0, - * outDv = vertexData.outDv ? vertexData.outDv+doffset : 0; - - switch (desc.GetType()) { - case Desc::REGULAR : evalBSpline( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - case Desc::CORNER : evalCorner( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - case Desc::GREGORY : evalGregory( pparam.bitField, t, s, cvs.begin(), - &ptables.GetVertexValenceTable()[0], - ptables.GetPatchQuadOffsets(*handle).begin(), - ptables.GetMaxValence(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(), - &ptables.GetVertexValenceTable()[0], - ptables.GetPatchQuadOffsets(*handle).begin(), - ptables.GetMaxValence(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - case Desc::GREGORY_BASIS : { - evalGregoryBasis( pparam.bitField, s, t, - cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - } break; - case Desc::QUADS : evalBilinear( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - default: - assert(0); - } - } - } - - VaryingData const & varyingData = _currentBindState.varyingData; - - if (varyingData.in and varyingData.out) { - - static int const zeroRings[6][4] = { {5, 6,10, 9}, // regular - {1, 2, 6, 5}, // boundary / single-crease - {1, 2, 5, 4}, // corner - {0, 1, 2, 3}, // no permutation - {0, 5, 10, 15} }; // gregory basis - - int const * permute = 0; - switch (desc.GetType()) { - case Desc::REGULAR : permute = zeroRings[0]; break; - case Desc::BOUNDARY : permute = zeroRings[1]; break; - case Desc::CORNER : permute = zeroRings[2]; break; - case Desc::GREGORY : - case Desc::GREGORY_BOUNDARY : permute = zeroRings[3]; break; - case Desc::GREGORY_BASIS : permute = zeroRings[4]; break; - default: - assert(0); - }; - - int offset = varyingData.outDesc.stride * index; - - Far::Index zeroRing[4] = { cvs[permute[0]], - cvs[permute[1]], - cvs[permute[2]], - cvs[permute[3]] }; - - evalBilinear( pparam.bitField, s, t, zeroRing, - varyingData.inDesc, - varyingData.in, - varyingData.outDesc, - varyingData.out+offset, 0, 0); - - } - - // Note : currently we only support bilinear boundary interpolation rules - // for limit face-varying data. - - FacevaryingData const & facevaryingData = _currentBindState.facevaryingData; - - if (facevaryingData.in and facevaryingData.out) { - - int offset = facevaryingData.outDesc.stride * index; - - static int const zeroRing[4] = {0,1,2,3}; - - // XXXX manuelk this assumes FVar data is ordered with 4 CVs / patch : - // bi-cubic FVar interpolation will require proper topology - // accessors in Far::PatchTables and this code will change - evalBilinear( pparam.bitField, s, t, zeroRing, - facevaryingData.inDesc, - &facevaryingData.in[handle->patchIndex*4*facevaryingData.outDesc.stride], - facevaryingData.outDesc, - facevaryingData.out+offset, 0, 0); - - } - return 1; -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuEvalLimitController.h b/opensubdiv/osd/cpuEvalLimitController.h deleted file mode 100644 index 432fe8ba..00000000 --- a/opensubdiv/osd/cpuEvalLimitController.h +++ /dev/null @@ -1,298 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CPU_EVAL_LIMIT_CONTROLLER_H -#define OSD_CPU_EVAL_LIMIT_CONTROLLER_H - -#include "../version.h" - -#include "../osd/vertexDescriptor.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -struct LimitLocation; -class CpuEvalLimitContext; - -/// \brief CPU controler for limit surface evaluation. -/// -/// A CPU-driven controller that can be called to evaluate samples on the limit -/// surface for a given EvalContext. -/// -/// Warning : this eval controller is re-entrant but it breaks the Osd API pattern -/// by requiring client code to bind and unbind the data buffers to the -/// Controller before calling evaluation methods. -/// -/// Ex : -/// \code -/// evalCtroller->BindVertexBuffers( ... ); -/// evalCtroller->BindVaryingBuffers( ... ); -/// evalCtroller->BindFacevaryingBuffers( ... ); -/// -/// parallel_for( int index=0; iEvalLimitSample( coord, evalCtxt, index ); -/// } -/// -/// evalCtroller->Unbind(); -/// \endcode -/// -class CpuEvalLimitController { - -public: - /// Constructor. - CpuEvalLimitController(); - - /// Destructor. - ~CpuEvalLimitController(); - - /// \brief Binds control vertex data buffer - /// - /// @param iDesc data descriptor shared by all input data buffers - /// - /// @param inQ input vertex data - /// - /// @param oDesc data descriptor for the outQ data buffer - /// -- derivative buffers do not have a descriptor and - /// cannot be offset or padded with a stride (yet ?) - /// - /// @param outQ output vertex data - /// - /// @param outdQu output derivative along "u" of the vertex data (optional) - /// - /// @param outdQv output derivative along "v" of the vertex data (optional) - /// - template - void BindVertexBuffers( VertexBufferDescriptor const & iDesc, INPUT_BUFFER *inQ, - VertexBufferDescriptor const & oDesc, OUTPUT_BUFFER *outQ, - OUTPUT_BUFFER *outdQu=0, - OUTPUT_BUFFER *outdQv=0 ) { - _currentBindState.vertexData.inDesc = iDesc; - _currentBindState.vertexData.in = inQ ? inQ->BindCpuBuffer() : 0; - - _currentBindState.vertexData.outDesc = oDesc; - _currentBindState.vertexData.out = outQ ? outQ->BindCpuBuffer() : 0; - _currentBindState.vertexData.outDu = outdQu ? outdQu->BindCpuBuffer() : 0; - _currentBindState.vertexData.outDv = outdQv ? outdQv->BindCpuBuffer() : 0; - } - - /// \brief Binds the varying-interpolated data streams - /// - /// @param iDesc data descriptor shared by all input data buffers - /// - /// @param inQ input varying data - /// - /// @param oDesc data descriptor for the outQ data buffer - /// - /// @param outQ output varying data - /// - template - void BindVaryingBuffers( VertexBufferDescriptor const & iDesc, INPUT_BUFFER *inQ, - VertexBufferDescriptor const & oDesc, OUTPUT_BUFFER *outQ ) { - _currentBindState.varyingData.inDesc = iDesc; - _currentBindState.varyingData.in = inQ ? inQ->BindCpuBuffer() : 0; - - _currentBindState.varyingData.outDesc = oDesc; - _currentBindState.varyingData.out = outQ ? outQ->BindCpuBuffer() : 0; - } - - /// \brief Binds the face-varying-interpolated data streams - /// - /// Note : currently we only support bilinear boundary interpolation rules - /// for face-varying data. Although Hbr supports 3 addition smooth rule sets, - /// the feature-adaptive patch interpolation code currently does not support - /// them, and neither does this EvalContext - /// - /// @param iDesc data descriptor shared by all input data buffers - /// - /// @param inQ input face-varying data - /// - /// @param oDesc data descriptor for the outQ data buffer - /// - /// @param outQ output face-varying data - /// - template - void BindFacevaryingBuffers( VertexBufferDescriptor const & iDesc, INPUT_BUFFER *inQ, - VertexBufferDescriptor const & oDesc, OUTPUT_BUFFER *outQ ) { - _currentBindState.facevaryingData.inDesc = iDesc; - _currentBindState.facevaryingData.in = inQ ? inQ->BindCpuBuffer() : 0; - - _currentBindState.facevaryingData.outDesc = oDesc; - _currentBindState.facevaryingData.out = outQ ? outQ->BindCpuBuffer() : 0; - } - - /// \brief Vertex interpolation of a single sample at the limit - /// - /// Evaluates "vertex" interpolation of a single sample on the surface limit. - /// - /// This function is re-entrant but does not require binding the - /// output vertex buffers. Pointers to memory where the data is - /// output are explicitly passed to the function. - /// - /// @param coord location on the limit surface to be evaluated - /// - /// @param context the EvalLimitContext that the controller will evaluate - /// - /// @param outDesc data descriptor for the outQ data buffer - /// -- derivative buffers do not have a descriptor and - /// cannot be offset or padded with a stride (yet ?) - /// - /// @param outQ output vertex data - /// - /// @param outDQU output derivative along "u" of the vertex data (optional) - /// - /// @param outDQV output derivative along "v" of the vertex data (optional) - /// - /// @return 1 if the sample was found - /// - int EvalLimitSample( LimitLocation const & coord, - CpuEvalLimitContext * context, - VertexBufferDescriptor const & outDesc, - float * outQ, - float * outDQU, - float * outDQV ) const; - - /// \brief Vertex interpolation of samples at the limit - /// - /// Evaluates "vertex" interpolation of a sample on the surface limit. - /// - /// @param coords location on the limit surface to be evaluated - /// - /// @param context the EvalLimitContext that the controller will evaluate - /// - /// @param index the index of the vertex in the output buffers bound to the - /// context - /// - /// @return the number of samples found (0 if the location was tagged as a hole - /// or the coordinate was invalid) - /// - int EvalLimitSample( LimitLocation const & coords, - CpuEvalLimitContext * context, - unsigned int index ) const { - if (not context) - return 0; - - int n = _EvalLimitSample( coords, context, index ); - - return n; - } - - void Unbind() { - _currentBindState.Reset(); - } - -protected: - - - // Vertex interpolated streams - struct VertexData { - - VertexData() : in(0), out(0), outDu(0), outDv(0) { } - - - void Reset() { - in = out = outDu = outDv = NULL; - inDesc.Reset(); - outDesc.Reset(); - } - - VertexBufferDescriptor inDesc, - outDesc; - float * in, - * out, - * outDu, - * outDv; - }; - - // Varying interpolated streams - struct VaryingData { - - VaryingData() : in(0), out(0) { } - - - void Reset() { - in = out = NULL; - inDesc.Reset(); - outDesc.Reset(); - } - - VertexBufferDescriptor inDesc, - outDesc; - float * in, - * out; - }; - - // Facevarying interpolated streams - struct FacevaryingData { - - FacevaryingData() : in(0), out(0) { } - - void Reset() { - in = out = NULL; - inDesc.Reset(); - outDesc.Reset(); - } - - VertexBufferDescriptor inDesc, - outDesc; - float * in, - * out; - }; - - -private: - - int _EvalLimitSample( LimitLocation const & coords, - CpuEvalLimitContext * context, - unsigned int index ) const; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of vertex buffers. - struct BindState { - - BindState() { } - - void Reset() { - vertexData.Reset(); - varyingData.Reset(); - facevaryingData.Reset(); - } - - VertexData vertexData; // vertex interpolated data descriptor - VaryingData varyingData; // varying interpolated data descriptor - FacevaryingData facevaryingData; // face-varying interpolated data descriptor - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif /* OSD_CPU_EVAL_LIMIT_CONTROLLER_H */ diff --git a/opensubdiv/osd/cpuEvalLimitKernel.cpp b/opensubdiv/osd/cpuEvalLimitKernel.cpp deleted file mode 100644 index 7f8488dc..00000000 --- a/opensubdiv/osd/cpuEvalLimitKernel.cpp +++ /dev/null @@ -1,1131 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/cpuEvalLimitKernel.h" -#include "../far/interpolate.h" -#include "../far/stencilTables.h" - -#include -#include -#include -#include -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -#ifdef TENSOR_PRODUCT_CUBIC_SPLINES - -// manuelk code was refactored to use the matrix formulation of cubic splines -// exposed in Far::PatchTables for consistency. I am keeping these temporarily -// for reference. - -inline void -cubicBezier(float u, float B[4], float BU[3]) { - float u2 = u*u, - w0 = 1.0f - u, - w2 = w0 * w0; - - B[0] = w0*w2; - B[1] = 3.0f * u * w2; - B[2] = 3.0f * u2 * w0; - B[3] = u*u2; - - if (BU) { - BU[0] = w2; - BU[1] = 2.0f * u * w0; - BU[2] = u2; - } -} - -inline void -cubicBSpline(float u, float B[4], float BU[4]) { - float t = u; - float s = 1.0f - u; - - float A0 = s * (0.5f * s); - float A1 = t * (s + 0.5f * t) + s * (0.5f * s + t); - float A2 = t * ( 0.5f * t); - - B[0] = 1.f/3.f * s * A0; - B[1] = (2.f/3.f * s + t) * A0 + (2.f/3.f * s + 1.f/3.f * t) * A1; - B[2] = (1.f/3.f * s + 2.f/3.f * t) * A1 + ( s + 2.f/3.f * t) * A2; - B[3] = 1.f/3.f * t * A2; - - if (BU) { - BU[0] = - A0; - BU[1] = A0 - A1; - BU[2] = A1 - A2; - BU[3] = A2; - } -} - -inline void -univar4x4(float u, float B[4], float D[4]) { - - float t = u; - float s = 1.0f - u; - - float A0 = s * s; - float A1 = 2 * s * t; - float A2 = t * t; - - B[0] = s * A0; - B[1] = t * A0 + s * A1; - B[2] = t * A1 + s * A2; - B[3] = t * A2; - - if (D) { - D[0] = - A0; - D[1] = A0 - A1; - D[2] = A1 - A2; - D[3] = A2; - } -} - -#endif - -void -evalBilinear(Far::PatchParam::BitField bits, - float s, float t, - Far::Index const * vertexIndices, - VertexBufferDescriptor const & inDesc, - float const * inQ, - VertexBufferDescriptor const & outDesc, - float * outQ, - float * outDQ1, - float * outDQ2) { - - assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) ); - - float Q[4], dQ1[4], dQ2[4]; - Far::GetBilinearWeights(bits, s, t, outQ ? Q:0, outDQ1 ? dQ1:0, outDQ2 ? dQ2:0); - - float const * inOffset = inQ + inDesc.offset; - - outQ += outDesc.offset; - - memset(outQ, 0, inDesc.length*sizeof(float)); - if (outDQ1) { - memset(outDQ1, 0, inDesc.length*sizeof(float)); - } - if (outDQ2) { - memset(outDQ2, 0, inDesc.length*sizeof(float)); - } - - for (int i=0; i<4; ++i) { - - float const * in = inOffset + vertexIndices[i]*inDesc.stride; - - for (int k=0; k= 0) - ? inOffset + vertexIndices[index]*inDesc.stride - : &CP[(-index-1)*inDesc.length]; - - for (int k=0; k2) { - for (int k=0; k0); - float const * diagonal = inOffset + idx_diagonal * inDesc.stride; - - for (int j=0; j0 and idx_diagonal>0 ); - - float const * neighbor = inOffset + idx_neighbor * inDesc.stride; - diagonal = inOffset + idx_diagonal * inDesc.stride; - - for (int j=0; j 2) { - float s1 = 3.0f - 2.0f*csf(n-3,2)-csf(np-3,2), - s2 = 2.0f*csf(n-3,2), - s3 = 3.0f -2.0f*cosf(2.0f*float(M_PI)/float(n)) - cosf(2.0f*float(M_PI)/float(nm)); - - for (int k=0, ofs=vofs; k { - -public: - /// \brief Creates an CpuEvalStencilsContext instance - /// - /// @param stencils a pointer to the Far::StencilTables - /// - static CpuEvalStencilsContext * Create(Far::LimitStencilTables const *stencils); - - /// \brief Returns the Far::StencilTables applied - Far::LimitStencilTables const * GetStencilTables() const { - return _stencils; - } - -protected: - - CpuEvalStencilsContext(Far::LimitStencilTables const *stencils); - -private: - - Far::LimitStencilTables const * _stencils; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // FAR_CPU_EVALSTENCILS_CONTEXT_H diff --git a/opensubdiv/osd/cpuEvalStencilsController.cpp b/opensubdiv/osd/cpuEvalStencilsController.cpp deleted file mode 100644 index 3ba86929..00000000 --- a/opensubdiv/osd/cpuEvalStencilsController.cpp +++ /dev/null @@ -1,149 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/cpuEvalStencilsController.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -CpuEvalStencilsController::CpuEvalStencilsController() { -} - -CpuEvalStencilsController::~CpuEvalStencilsController() { -} - -int -CpuEvalStencilsController::_UpdateValues( CpuEvalStencilsContext * context ) { - - int result=0; - - Far::LimitStencilTables const * stencils = context->GetStencilTables(); - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return result; - - VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc, - outDesc = _currentBindState.outputDataDesc; - - // make sure that we have control data to work with - if (not ctrlDesc.CanEval(outDesc)) - return 0; - - float const * ctrl = _currentBindState.controlData + ctrlDesc.offset; - - float * out = _currentBindState.outputData + outDesc.offset; - - if ((not ctrl) or (not out)) - return result; - - unsigned char const * sizes = &stencils->GetSizes().at(0); - Far::Index const * index = &stencils->GetControlIndices().at(0); - - float const * weight = &stencils->GetWeights().at(0); - - for (int i=0; iGetStencilTables(); - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return result; - - VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc, - duDesc = _currentBindState.outputDuDesc, - dvDesc = _currentBindState.outputDvDesc; - - // make sure that we have control data to work with - if (not (ctrlDesc.CanEval(duDesc) and ctrlDesc.CanEval(dvDesc))) - return 0; - - float const * ctrl = _currentBindState.controlData + ctrlDesc.offset; - - float * du = _currentBindState.outputUDeriv + duDesc.offset, - * dv = _currentBindState.outputVDeriv + dvDesc.offset; - - if ((not ctrl) or (not du) or (not dv)) - return result; - - unsigned char const * sizes = &stencils->GetSizes().at(0); - Far::Index const * index = &stencils->GetControlIndices().at(0); - - float const * duweight = &stencils->GetDuWeights().at(0), - * dvweight = &stencils->GetDvWeights().at(0); - - for (int i=0; i - int UpdateValues( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDataDesc, OUTPUT_BUFFER *outputData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputData( outputDataDesc, outputData ); - - int n = _UpdateValues( context ); - - unbind(); - - return n; - } - - /// \brief Applies derivative stencil weights to the control vertex data - /// - /// Computes the U and V derivative stencils to the control vertex data at - /// the parametric locations contained in each stencil - /// - /// @param context the CpuEvalStencilsContext with the stencil weights - /// - /// @param controlDataDesc vertex buffer descriptor for the control vertex data - /// - /// @param controlVertices vertex buffer with the control vertices data - /// - /// @param outputDuDesc vertex buffer descriptor for the U derivative output data - /// - /// @param outputDuData output vertex buffer for the U derivative data - /// - /// @param outputDvDesc vertex buffer descriptor for the V deriv output data - /// - /// @param outputDvData output vertex buffer for the V derivative data - /// - template - int UpdateDerivs( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDuDesc, OUTPUT_BUFFER *outputDuData, - VertexBufferDescriptor const & outputDvDesc, OUTPUT_BUFFER *outputDvData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputDerivData( outputDuDesc, outputDuData, outputDvDesc, outputDvData ); - - int n = _UpdateDerivs( context ); - - unbind(); - - return n; - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - /// \brief Binds control vertex data buffer - template - void bindControlData(VertexBufferDescriptor const & controlDataDesc, VERTEX_BUFFER *controlData ) { - - _currentBindState.controlData = controlData ? controlData->BindCpuBuffer() : 0; - _currentBindState.controlDataDesc = controlDataDesc; - - } - - /// \brief Binds output vertex data buffer - template - void bindOutputData( VertexBufferDescriptor const & outputDataDesc, VERTEX_BUFFER *outputData ) { - - _currentBindState.outputData = outputData ? outputData->BindCpuBuffer() : 0; - _currentBindState.outputDataDesc = outputDataDesc; - } - - /// \brief Binds output derivative vertex data buffer - template - void bindOutputDerivData( VertexBufferDescriptor const & outputDuDesc, VERTEX_BUFFER *outputDu, - VertexBufferDescriptor const & outputDvDesc, VERTEX_BUFFER *outputDv ) { - - _currentBindState.outputUDeriv = outputDu ? outputDu ->BindCpuBuffer() : 0; - _currentBindState.outputVDeriv = outputDv ? outputDv->BindCpuBuffer() : 0; - _currentBindState.outputDuDesc = outputDuDesc; - _currentBindState.outputDvDesc = outputDvDesc; - } - - /// \brief Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - } - -private: - - int _UpdateValues( CpuEvalStencilsContext * context ); - int _UpdateDerivs( CpuEvalStencilsContext * context ); - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of vertex buffers. - struct BindState { - - BindState() : controlData(0), outputData(0), outputUDeriv(0), outputVDeriv(0) { } - - void Reset() { - controlData = outputData = outputUDeriv = outputVDeriv = NULL; - controlDataDesc.Reset(); - outputDataDesc.Reset(); - outputDuDesc.Reset(); - outputDvDesc.Reset(); - } - - // transient mesh data - VertexBufferDescriptor controlDataDesc, - outputDataDesc, - outputDuDesc, - outputDvDesc; - - float * controlData, - * outputData, - * outputUDeriv, - * outputVDeriv; - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // FAR_CPU_EVALSTENCILS_CONTROLLER_H diff --git a/opensubdiv/osd/cpuEvaluator.cpp b/opensubdiv/osd/cpuEvaluator.cpp new file mode 100644 index 00000000..e42e8a3e --- /dev/null +++ b/opensubdiv/osd/cpuEvaluator.cpp @@ -0,0 +1,148 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/cpuEvaluator.h" +#include "../osd/cpuKernel.h" + +#include + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +/* static */ +bool +CpuEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end) { + if (end <= start) return true; + if (srcDesc.length != dstDesc.length) return false; + + // XXX: we can probably expand cpuKernel.cpp to here. + CpuEvalStencils(src, srcDesc, dst, dstDesc, + sizes, offsets, indices, weights, start, end); + + return true; +} + +/* static */ +bool +CpuEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + float *dstDu, + VertexBufferDescriptor const &dstDuDesc, + float *dstDv, + VertexBufferDescriptor const &dstDvDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, int end) { + if (end <= start) return true; + if (srcDesc.length != dstDesc.length) return false; + if (srcDesc.length != dstDuDesc.length) return false; + if (srcDesc.length != dstDvDesc.length) return false; + + CpuEvalStencils(src, srcDesc, + dst, dstDesc, + dstDu, dstDuDesc, + dstDv, dstDvDesc, + sizes, offsets, indices, + weights, duWeights, dvWeights, + start, end); + + return true; +} + +template +struct BufferAdapter { + BufferAdapter(T *p, int length, int stride) : + _p(p), _length(length), _stride(stride) { } + void Clear() { + for (int i = 0; i < _length; ++i) _p[i] = 0; + } + void AddWithWeight(T const *src, float w, float wu, float wv) { + (void)wu; + (void)wv; + // TODO: derivatives. + for (int i = 0; i < _length; ++i) { + _p[i] += src[i] * w; + } + } + const T *operator[] (int index) const { + return _p + _stride * index; + } + BufferAdapter & operator ++() { + _p += _stride; + return *this; + } + + T *_p; + int _length; + int _stride; +}; + +/* static */ +int +CpuEvaluator::EvalPatches(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + PatchCoordArray const &patchCoords, + Far::PatchTables const *patchTable) { + src += srcDesc.offset; + dst += dstDesc.offset; + int count = 0; + + // XXX: this implementaion is temporary. + BufferAdapter srcT(src, srcDesc.length, srcDesc.stride); + BufferAdapter dstT(dst, dstDesc.length, dstDesc.stride); + + for (size_t i = 0; i < patchCoords.size(); ++i) { + PatchCoord const &coords = patchCoords[i]; + + patchTable->Evaluate(coords.handle, coords.s, coords.t, + srcT, dstT); + ++count; + ++dstT; + } + return count; +} + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuEvaluator.h b/opensubdiv/osd/cpuEvaluator.h new file mode 100644 index 00000000..13a7ed86 --- /dev/null +++ b/opensubdiv/osd/cpuEvaluator.h @@ -0,0 +1,242 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_CPU_EVALUATOR_H +#define OPENSUBDIV_OSD_CPU_EVALUATOR_H + +#include "../version.h" + +#include +#include +#include "../osd/vertexDescriptor.h" +#include "../far/patchTables.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +/// \brief Coordinates set on a patch table +/// XXX: this is a temporary structure, exists during Osd refactoring work. +/// +struct PatchCoord { + /// \brief Constructor + /// + /// @param p patch handle + /// + /// @param s parametric location on the patch + /// + /// @param t parametric location on the patch + /// + PatchCoord(Far::PatchTables::PatchHandle handle, float s, float t) : + handle(handle), s(s), t(t) { } + + Far::PatchTables::PatchHandle handle; ///< patch handle + float s, t; ///< parametric location on patch +}; + +typedef std::vector PatchCoordArray; + + +class CpuEvaluator { +public: + /// \brief Generic static eval stencils function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. + /// + /// @param instance not used in the cpu kernel + /// (declared as a typed pointer to prevent + /// undesirable template resolution) + /// + /// @param deviceContext not used in the cpu kernel + /// + template + static bool EvalStencils(SRC_BUFFER *srcBuffer, + VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + const CpuEvaluator *instance = NULL, + void * deviceContext = NULL) { + (void)instance; // unused + (void)deviceContext; // unused + + return EvalStencils(srcBuffer->BindCpuBuffer(), + srcDesc, + dstBuffer->BindCpuBuffer(), + dstDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + /// stencil evaluate function. + static bool EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end); + + template + static bool EvalStencils(SRC_BUFFER *srcBuffer, + VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + VertexBufferDescriptor const &dstDesc, + DST_BUFFER *dstDuBuffer, + VertexBufferDescriptor const &dstDuDesc, + DST_BUFFER *dstDvBuffer, + VertexBufferDescriptor const &dstDvDesc, + STENCIL_TABLE const *stencilTable, + const CpuEvaluator *evaluator = NULL, + void * deviceContext = NULL) { + (void)evaluator; // unused + (void)deviceContext; // unused + + return EvalStencils(srcBuffer->BindCpuBuffer(), + srcDesc, + dstBuffer->BindCpuBuffer(), + dstDesc, + dstDuBuffer->BindCpuBuffer(), + dstDuDesc, + dstDvBuffer->BindCpuBuffer(), + dstDvDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + &stencilTable->GetDuWeights()[0], + &stencilTable->GetDvWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + static bool EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + float *dstDu, + VertexBufferDescriptor const &dstDuDesc, + float *dstDv, + VertexBufferDescriptor const &dstDvDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, + int end); + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently. + /// + /// XXX: This interface is still work in progress. XXX + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param patchCoord array of locations to be evaluated. + /// + /// @param patchTable Far::PatchTable + /// + /// @param instanced not used in the cpu evaluator + /// + /// @param deviceContext not used in the cpu evaluator + /// + template + static int EvalPatches(SRC_BUFFER *srcBuffer, + VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + VertexBufferDescriptor const &dstDesc, + PatchCoordArray const &patchCoords, + Far::PatchTables const *patchTable, + CpuEvaluator const *instance, + void * deviceContext = NULL) { + (void)instance; // unused + (void)deviceContext; // unused + + return EvalPatches(srcBuffer->BindCpuBuffer(), + srcDesc, + dstBuffer->BindCpuBuffer(), + dstDesc, + patchCoords, + patchTable); + } + + /// \brief limit eval function. + static int EvalPatches(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + PatchCoordArray const &patchCoords, + Far::PatchTables const *patchTable); + + /// \brief synchronize all asynchronous computation invoked on this device. + static void Synchronize(void * /*deviceContext = NULL*/) { + // nothing. + } +}; + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_CPU_EVALUATOR_H diff --git a/opensubdiv/osd/cpuKernel.cpp b/opensubdiv/osd/cpuKernel.cpp index a1579c12..1a7baddf 100644 --- a/opensubdiv/osd/cpuKernel.cpp +++ b/opensubdiv/osd/cpuKernel.cpp @@ -70,15 +70,15 @@ copy(float *dst, int dstIndex, const float *src, } void -CpuComputeStencils(float const * src, - VertexBufferDescriptor const &srcDesc, - float * dst, - VertexBufferDescriptor const &dstDesc, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end) { +CpuEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end) { assert(start>=0 and start 0) { sizes += start; indices += offsets[start]; diff --git a/opensubdiv/osd/cpuKernel.h b/opensubdiv/osd/cpuKernel.h index b3bcba18..9b152a6a 100644 --- a/opensubdiv/osd/cpuKernel.h +++ b/opensubdiv/osd/cpuKernel.h @@ -22,8 +22,8 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_CPU_KERNEL_H -#define OSD_CPU_KERNEL_H +#ifndef OPENSUBDIV_OSD_CPU_KERNEL_H +#define OPENSUBDIV_OSD_CPU_KERNEL_H #include "../version.h" #include @@ -36,32 +36,32 @@ namespace Osd { struct VertexBufferDescriptor; void -CpuComputeStencils(float const * src, - VertexBufferDescriptor const &srcDesc, - float * dst, - VertexBufferDescriptor const &dstDesc, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end); +CpuEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end); void -CpuComputeStencils(float const * src, - VertexBufferDescriptor const &srcDesc, - float * dst, - VertexBufferDescriptor const &dstDesc, - float * dstDu, - VertexBufferDescriptor const &dstDuDesc, - float * dstDv, - VertexBufferDescriptor const &dstDvDesc, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - float const * duWeights, - float const * dvWeights, - int start, int end); +CpuEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + float * dstDu, + VertexBufferDescriptor const &dstDuDesc, + float * dstDv, + VertexBufferDescriptor const &dstDvDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + float const * duWeights, + float const * dvWeights, + int start, int end); // // SIMD ICC optimization of the stencil kernel diff --git a/opensubdiv/osd/cpuVertexBuffer.h b/opensubdiv/osd/cpuVertexBuffer.h index 20b9a9ed..bc85f51e 100644 --- a/opensubdiv/osd/cpuVertexBuffer.h +++ b/opensubdiv/osd/cpuVertexBuffer.h @@ -37,7 +37,7 @@ namespace Osd { /// \brief Concrete vertex buffer class for cpu subvision. /// /// CpuVertexBuffer implements the VertexBufferInterface. An instance -/// of this buffer class can be passed to CpuComputeController +/// of this buffer class can be passed to CpuEvaluator /// class CpuVertexBuffer { public: diff --git a/opensubdiv/osd/cudaComputeContext.cpp b/opensubdiv/osd/cudaComputeContext.cpp deleted file mode 100644 index cdd94d4f..00000000 --- a/opensubdiv/osd/cudaComputeContext.cpp +++ /dev/null @@ -1,227 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" - -#include "../osd/cudaComputeContext.h" - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// ---------------------------------------------------------------------------- - -template void * -createCudaBuffer(std::vector const & src) { - - void * devicePtr = 0; - - size_t size = src.size()*sizeof(T); - - cudaError_t err = cudaMalloc(&devicePtr, size); - if (err != cudaSuccess) { - return devicePtr; - } - - err = cudaMemcpy(devicePtr, &src.at(0), size, cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - cudaFree(devicePtr); - return 0; - } - return devicePtr; -} - -// ---------------------------------------------------------------------------- - -class CudaComputeContext::CudaStencilTables { - -public: - explicit CudaStencilTables(Far::StencilTables const & stencilTables) { - _numStencils = stencilTables.GetNumStencils(); - if (_numStencils > 0) { - _sizes = createCudaBuffer(stencilTables.GetSizes()); - _offsets = createCudaBuffer(stencilTables.GetOffsets()); - _indices = createCudaBuffer(stencilTables.GetControlIndices()); - _weights = createCudaBuffer(stencilTables.GetWeights()); - } else { - _sizes = _offsets = _indices = _weights = NULL; - } - } - - ~CudaStencilTables() { - if (_sizes) { cudaFree(_sizes); } - if (_offsets) { cudaFree(_offsets); } - if (_indices) { cudaFree(_indices); } - if (_weights) { cudaFree(_weights); } - } - - bool IsValid() const { - return _sizes and _offsets and _indices and _weights; - } - - void * GetSizes() const { - return _sizes; - } - - void * GetOffsets() const { - return _offsets; - } - - void * GetIndices() const { - return _indices; - } - - void * GetWeights() const { - return _weights; - } - - int GetNumStencils() const { - return _numStencils; - } - -private: - void * _sizes, - * _offsets, - * _indices, - * _weights; - int _numStencils; -}; - -// ---------------------------------------------------------------------------- - -CudaComputeContext::CudaComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables) : - _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0) { - - if (vertexStencilTables) { - _vertexStencilTables = new CudaStencilTables(*vertexStencilTables); - _numControlVertices = vertexStencilTables->GetNumControlVertices(); - } - - if (varyingStencilTables) { - _varyingStencilTables = new CudaStencilTables(*varyingStencilTables); - - if (_numControlVertices) { - assert(_numControlVertices == - varyingStencilTables->GetNumControlVertices()); - } else { - _numControlVertices = varyingStencilTables->GetNumControlVertices(); - } - } -} - -CudaComputeContext::~CudaComputeContext() { - delete _vertexStencilTables; - delete _varyingStencilTables; -} - -// ---------------------------------------------------------------------------- - -bool -CudaComputeContext::HasVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->IsValid() : false; -} - -bool -CudaComputeContext::HasVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; -} - -int -CudaComputeContext::GetNumStencilsInVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; -} - -int -CudaComputeContext::GetNumStencilsInVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; -} - -// ---------------------------------------------------------------------------- - -void * -CudaComputeContext::GetVertexStencilTablesSizes() const { - return _vertexStencilTables ? _vertexStencilTables->GetSizes() : 0; -} - -void * -CudaComputeContext::GetVertexStencilTablesOffsets() const { - return _vertexStencilTables ? _vertexStencilTables->GetOffsets() : 0; -} - -void * -CudaComputeContext::GetVertexStencilTablesIndices() const { - return _vertexStencilTables ? _vertexStencilTables->GetIndices() : 0; -} - -void * -CudaComputeContext::GetVertexStencilTablesWeights() const { - return _vertexStencilTables ? _vertexStencilTables->GetWeights() : 0; -} - -// ---------------------------------------------------------------------------- - -void * -CudaComputeContext::GetVaryingStencilTablesSizes() const { - return _varyingStencilTables ? _varyingStencilTables->GetSizes() : 0; -} - -void * -CudaComputeContext::GetVaryingStencilTablesOffsets() const { - return _varyingStencilTables ? _varyingStencilTables->GetOffsets() : 0; -} - -void * -CudaComputeContext::GetVaryingStencilTablesIndices() const { - return _varyingStencilTables ? _varyingStencilTables->GetIndices() : 0; -} - -void * -CudaComputeContext::GetVaryingStencilTablesWeights() const { - return _varyingStencilTables ? _varyingStencilTables->GetWeights() : 0; -} - -// ---------------------------------------------------------------------------- - -CudaComputeContext * -CudaComputeContext::Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void * /*deviceContext*/) { - - CudaComputeContext *result = - new CudaComputeContext(vertexStencilTables, varyingStencilTables); - - return result; -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cudaComputeContext.h b/opensubdiv/osd/cudaComputeContext.h deleted file mode 100644 index 61ca7808..00000000 --- a/opensubdiv/osd/cudaComputeContext.h +++ /dev/null @@ -1,134 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CUDA_COMPUTE_CONTEXT_H -#define OSD_CUDA_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include - -#include "../osd/nonCopyable.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far { class StencilTables; } - -namespace Osd { - -/// -/// \brief CUDA Refine Context -/// -/// The CUDA implementation of the Refine module contextual functionality. -/// -/// Contexts interface the serialized topological data pertaining to the -/// geometric primitives with the capabilities of the selected discrete -/// compute device. -/// -class CudaComputeContext : public NonCopyable { - -public: - /// Creates an CudaComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param deviceContext (not used) - /// - static CudaComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void *deviceContext = NULL); - - /// Destructor - virtual ~CudaComputeContext(); - - /// Returns true if the Context has a 'vertex' interpolation stencil table - bool HasVertexStencilTables() const; - - /// Returns true if the Context has a 'varying' interpolation stencil table - bool HasVaryingStencilTables() const; - - /// Returns the number of control vertices - int GetNumControlVertices() const { - return _numControlVertices; - } - - /// Returns the number of stencils in vertex stencil tables - int GetNumStencilsInVertexStencilTables() const; - - /// Returns the number of stencils in varying stencil tables - int GetNumStencilsInVaryingStencilTables() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil sizes - void * GetVertexStencilTablesSizes() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil offsets - void * GetVertexStencilTablesOffsets() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil indices - void * GetVertexStencilTablesIndices() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil weights - void * GetVertexStencilTablesWeights() const; - - - /// Returns the Cuda buffer containing Varying-stencil stencil sizes - void * GetVaryingStencilTablesSizes() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil offsets - void * GetVaryingStencilTablesOffsets() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil indices - void * GetVaryingStencilTablesIndices() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil weights - void * GetVaryingStencilTablesWeights() const; - - -protected: - explicit CudaComputeContext(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables); - -private: - class CudaStencilTables; - - CudaStencilTables * _vertexStencilTables, - * _varyingStencilTables; - - int _numControlVertices; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_CUDA_COMPUTE_CONTEXT_H diff --git a/opensubdiv/osd/cudaComputeController.cpp b/opensubdiv/osd/cudaComputeController.cpp deleted file mode 100644 index ef8afbf1..00000000 --- a/opensubdiv/osd/cudaComputeController.cpp +++ /dev/null @@ -1,124 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/cudaComputeController.h" - -#include -#include -#include - -extern "C" { - void CudaComputeStencils(const float *src, - float *dst, - int length, - int srcStride, - int dstStride, - const unsigned char * sizes, - const int * offsets, - const int * indices, - const float * weights, - int start, - int end); -} - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -void -CudaComputeController::ApplyStencilTableKernel( - ComputeContext const *context) const { - - assert(context); - - if (context->HasVertexStencilTables() and _currentBindState.vertexBuffer) { - VertexBufferDescriptor srcDesc = _currentBindState.vertexDesc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride; - - int start = 0; - int end = context->GetNumStencilsInVertexStencilTables(); - - float const * src = _currentBindState.vertexBuffer; - float * dst = _currentBindState.vertexBuffer; - - if (end > start) { - CudaComputeStencils(src + srcDesc.offset, - dst + dstDesc.offset, - srcDesc.length, - srcDesc.stride, - dstDesc.stride, - (unsigned char const *)context->GetVertexStencilTablesSizes(), - (int const *)context->GetVertexStencilTablesOffsets(), - (int const *)context->GetVertexStencilTablesIndices(), - (float const *)context->GetVertexStencilTablesWeights(), - start, - end); - } - } - - if (context->HasVaryingStencilTables() and _currentBindState.varyingBuffer) { - VertexBufferDescriptor srcDesc = _currentBindState.varyingDesc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride; - - int start = 0; - int end = context->GetNumStencilsInVaryingStencilTables(); - - float const * src = _currentBindState.varyingBuffer; - float * dst = _currentBindState.varyingBuffer; - - if (end > start) { - CudaComputeStencils(src + srcDesc.offset, - dst + dstDesc.offset, - srcDesc.length, - srcDesc.stride, - dstDesc.stride, - (unsigned char const *)context->GetVaryingStencilTablesSizes(), - (int const *)context->GetVaryingStencilTablesOffsets(), - (int const *)context->GetVaryingStencilTablesIndices(), - (float const *)context->GetVaryingStencilTablesWeights(), - start, - end); - } - } -} - -CudaComputeController::CudaComputeController() { -} - -CudaComputeController::~CudaComputeController() { -} - -void -CudaComputeController::Synchronize() { - - cudaThreadSynchronize(); -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cudaComputeController.h b/opensubdiv/osd/cudaComputeController.h deleted file mode 100644 index f3486b8c..00000000 --- a/opensubdiv/osd/cudaComputeController.h +++ /dev/null @@ -1,180 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CUDA_COMPUTE_CONTROLLER_H -#define OSD_CUDA_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/cudaComputeContext.h" -#include "../osd/vertexDescriptor.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching CUDA subdivision kernels. -/// -/// CudaComputeController is a compute controller class to launch -/// Cuda subdivision kernels. It requires CudaVertexBufferInterface -/// as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class CudaComputeController { -public: - typedef CudaComputeContext ComputeContext; - - /// Constructor. - CudaComputeController(); - - /// Destructor. - ~CudaComputeController(); - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CudaContext to apply refinement operations to - /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( CudaComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - - ApplyStencilTableKernel(context); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CudaContext to apply refinement operations to - /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(CudaComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context) const; - - template - void bind( VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc, - VertexBufferDescriptor const * varyingDesc ) { - - // if the vertex buffer descriptor is specified, use it. - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (vertexDesc) { - _currentBindState.vertexDesc = *vertexDesc; - } else { - int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0; - _currentBindState.vertexDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - if (varyingDesc) { - _currentBindState.varyingDesc = *varyingDesc; - } else { - int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0; - _currentBindState.varyingDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.vertexBuffer = vertexBuffer ? - static_cast(vertexBuffer->BindCudaBuffer()) : 0; - _currentBindState.varyingBuffer = varyingBuffer ? - static_cast(varyingBuffer->BindCudaBuffer()) : 0; - } - - /// Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - } - -private: - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {} - - void Reset() { - vertexBuffer = varyingBuffer = NULL; - vertexDesc.Reset(); - varyingDesc.Reset(); - } - - float * vertexBuffer, // cuda buffers - * varyingBuffer; - - VertexBufferDescriptor vertexDesc, - varyingDesc; - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_CUDA_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/cudaEvaluator.cpp b/opensubdiv/osd/cudaEvaluator.cpp new file mode 100644 index 00000000..4ae450e2 --- /dev/null +++ b/opensubdiv/osd/cudaEvaluator.cpp @@ -0,0 +1,124 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/cudaEvaluator.h" + +#include +#include + +#include "../far/stencilTables.h" + +extern "C" { + void CudaEvalStencils(const float *src, + float *dst, + int length, + int srcStride, + int dstStride, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end); +} + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +template void * +createCudaBuffer(std::vector const & src) { + void * devicePtr = 0; + + size_t size = src.size()*sizeof(T); + + cudaError_t err = cudaMalloc(&devicePtr, size); + if (err != cudaSuccess) { + return devicePtr; + } + + err = cudaMemcpy(devicePtr, &src.at(0), size, cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + cudaFree(devicePtr); + return 0; + } + return devicePtr; +} + +// ---------------------------------------------------------------------------- + +CudaStencilTables::CudaStencilTables(Far::StencilTables const *stencilTables) { + _numStencils = stencilTables->GetNumStencils(); + if (_numStencils > 0) { + _sizes = createCudaBuffer(stencilTables->GetSizes()); + _offsets = createCudaBuffer(stencilTables->GetOffsets()); + _indices = createCudaBuffer(stencilTables->GetControlIndices()); + _weights = createCudaBuffer(stencilTables->GetWeights()); + } else { + _sizes = _offsets = _indices = _weights = NULL; + } +} + +CudaStencilTables::~CudaStencilTables() { + if (_sizes) cudaFree(_sizes); + if (_offsets) cudaFree(_offsets); + if (_indices) cudaFree(_indices); + if (_weights) cudaFree(_weights); +} + +// --------------------------------------------------------------------------- + +/* static */ +bool +CudaEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end) { + CudaEvalStencils(src + srcDesc.offset, + dst + dstDesc.offset, + srcDesc.length, + srcDesc.stride, + dstDesc.stride, + sizes, offsets, indices, weights, + start, end); + return true; +} + +/* static */ +void +CudaEvaluator::Synchronize(void * /*deviceContext*/) { + cudaThreadSynchronize(); +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cudaEvaluator.h b/opensubdiv/osd/cudaEvaluator.h new file mode 100644 index 00000000..c0df1bba --- /dev/null +++ b/opensubdiv/osd/cudaEvaluator.h @@ -0,0 +1,148 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_CUDA_EVALUATOR_H +#define OPENSUBDIV_OSD_CUDA_EVALUATOR_H + +#include "../version.h" + +#include +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far { + class StencilTables; +} + +namespace Osd { + +/// \brief CUDA stencil tables +/// +/// This class is a cuda buffer representation of Far::StencilTables. +/// +/// CudaComputeKernel consumes this table to apply stencils +/// +/// +class CudaStencilTables { +public: + static CudaStencilTables *Create(Far::StencilTables const *stencilTables, + void *deviceContext = NULL) { + (void)deviceContext; // unused + return new CudaStencilTables(stencilTables); + } + + explicit CudaStencilTables(Far::StencilTables const *stencilTables); + ~CudaStencilTables(); + + // interfaces needed for CudaCompute + void *GetSizesBuffer() const { return _sizes; } + void *GetOffsetsBuffer() const { return _offsets; } + void *GetIndicesBuffer() const { return _indices; } + void *GetWeightsBuffer() const { return _weights; } + int GetNumStencils() const { return _numStencils; } + +private: + void * _sizes, + * _offsets, + * _indices, + * _weights; + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +class CudaEvaluator { +public: + /// \brief Generic static compute function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCudaBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTables stencil table to be applied. The table must have + /// Cuda memory interfaces. + /// + /// @param instance not used in the CudaEvaluator + /// + /// @param deviceContext not used in the CudaEvaluator + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + const void *instance = NULL, + void * deviceContext = NULL) { + + (void)instance; // unused + (void)deviceContext; // unused + return EvalStencils(srcVertexBuffer->BindCudaBuffer(), + srcDesc, + dstVertexBuffer->BindCudaBuffer(), + dstDesc, + (unsigned char const *)stencilTable->GetSizesBuffer(), + (int const *)stencilTable->GetOffsetsBuffer(), + (int const *)stencilTable->GetIndicesBuffer(), + (float const *)stencilTable->GetWeightsBuffer(), + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + static bool EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end); + + static void Synchronize(void *deviceContext = NULL); +}; + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_CUDA_EVALUATOR_H diff --git a/opensubdiv/osd/cudaKernel.cu b/opensubdiv/osd/cudaKernel.cu index fb5e2b11..58a8bc5f 100644 --- a/opensubdiv/osd/cudaKernel.cu +++ b/opensubdiv/osd/cudaKernel.cu @@ -257,17 +257,17 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs, extern "C" { -void CudaComputeStencils(const float *src, - float *dst, - int length, - int srcStride, - int dstStride, - const unsigned char * sizes, - const int * offsets, - const int * indices, - const float * weights, - int start, - int end) +void CudaEvalStencils(const float *src, + float *dst, + int length, + int srcStride, + int dstStride, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end) { // assert(cvs and dst and sizes and offsets and indices and weights and (end>=start)); diff --git a/opensubdiv/osd/cudaVertexBuffer.h b/opensubdiv/osd/cudaVertexBuffer.h index 4b6ea2e4..c073939d 100644 --- a/opensubdiv/osd/cudaVertexBuffer.h +++ b/opensubdiv/osd/cudaVertexBuffer.h @@ -35,7 +35,7 @@ namespace Osd { /// \brief Concrete vertex buffer class for Cuda subvision. /// /// CudaVertexBuffer implements CudaVertexBufferInterface. -/// An instance of this buffer class can be passed to CudaComputeController +/// An instance of this buffer class can be passed to CudaEvaluator /// class CudaVertexBuffer { diff --git a/opensubdiv/osd/d3d11ComputeContext.cpp b/opensubdiv/osd/d3d11ComputeContext.cpp deleted file mode 100644 index 541e5ac1..00000000 --- a/opensubdiv/osd/d3d11ComputeContext.cpp +++ /dev/null @@ -1,284 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" - -#include "../osd/d3d11ComputeContext.h" -#include "../far/error.h" - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } - -// ---------------------------------------------------------------------------- - -struct D3D11Table { - - D3D11Table() : buffer(0), srv(0) { } - - ~D3D11Table() { - SAFE_RELEASE(buffer); - SAFE_RELEASE(srv); - } - - bool IsValid() const { - return (buffer and srv); - } - - template void initialize(std::vector const & src, - DXGI_FORMAT format, ID3D11DeviceContext *deviceContext) { - - size_t size = src.size()*sizeof(T); - - if (size==0) { - buffer = 0; - srv = 0; - return; - } - - ID3D11Device *device = 0; - deviceContext->GetDevice(&device); - assert(device); - - D3D11_BUFFER_DESC bd; - bd.ByteWidth = (unsigned int)size; - bd.Usage = D3D11_USAGE_IMMUTABLE; - bd.BindFlags = D3D11_BIND_SHADER_RESOURCE; - bd.CPUAccessFlags = 0; - bd.MiscFlags = 0; - bd.StructureByteStride = 0; - - D3D11_SUBRESOURCE_DATA initData; - initData.pSysMem = &src.at(0); - - HRESULT hr = device->CreateBuffer(&bd, &initData, &buffer); - if (FAILED(hr)) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "Error creating compute table buffer\n"); - return; - } - - D3D11_SHADER_RESOURCE_VIEW_DESC srvd; - ZeroMemory(&srvd, sizeof(srvd)); - srvd.Format = format; - srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; - srvd.Buffer.FirstElement = 0; - srvd.Buffer.NumElements = (unsigned int)src.size(); - - hr = device->CreateShaderResourceView(buffer, &srvd, &srv); - if (FAILED(hr)) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "Error creating compute table shader resource view\n"); - return; - } - } - - ID3D11Buffer * buffer; - ID3D11ShaderResourceView * srv; -}; - - -// ---------------------------------------------------------------------------- - -class D3D11ComputeContext::D3D11StencilTables { - -public: - - D3D11StencilTables(Far::StencilTables const & stencilTables, - ID3D11DeviceContext *deviceContext) { - - _numStencils = stencilTables.GetNumStencils(); - if (_numStencils > 0) { - // convert unsigned char sizes buffer to ints - // (HLSL does not have uint8 type) - std::vector const sizes(stencilTables.GetSizes().begin(), - stencilTables.GetSizes().end()); - - _sizes.initialize(sizes, - DXGI_FORMAT_R32_SINT, - deviceContext); - _offsets.initialize(stencilTables.GetOffsets(), - DXGI_FORMAT_R32_SINT, - deviceContext); - _indices.initialize(stencilTables.GetControlIndices(), - DXGI_FORMAT_R32_SINT, - deviceContext); - _weights.initialize(stencilTables.GetWeights(), - DXGI_FORMAT_R32_FLOAT, - deviceContext); - } - } - - bool IsValid() const { - return _sizes.IsValid() and _offsets.IsValid() and - _indices.IsValid() and _weights.IsValid(); - } - - D3D11Table const & GetSizes() const { - return _sizes; - } - - D3D11Table const & GetOffsets() const { - return _offsets; - } - - D3D11Table const & GetIndices() const { - return _indices; - } - - D3D11Table const & GetWeights() const { - return _weights; - } - - int GetNumStencils() const { - return _numStencils; - } - - void Bind(ID3D11DeviceContext * deviceContext) const { - ID3D11ShaderResourceView *SRViews[] = { - _sizes.srv, - _offsets.srv, - _indices.srv, - _weights.srv - }; - deviceContext->CSSetShaderResources(1, 4, SRViews); // t1-t4 - } - - static void Unbind(ID3D11DeviceContext * deviceContext) { - ID3D11ShaderResourceView *SRViews[] = { 0, 0, 0, 0 }; - deviceContext->CSSetShaderResources(1, 4, SRViews); - } - - -private: - - D3D11Table _sizes, - _offsets, - _indices, - _weights; - - int _numStencils; -}; - -// ---------------------------------------------------------------------------- - -D3D11ComputeContext::D3D11ComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - ID3D11DeviceContext *deviceContext) : - _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0) { - - if (vertexStencilTables) { - _vertexStencilTables = - new D3D11StencilTables(*vertexStencilTables, deviceContext); - _numControlVertices = vertexStencilTables->GetNumControlVertices(); - } - - if (varyingStencilTables) { - _varyingStencilTables = - new D3D11StencilTables(*varyingStencilTables, deviceContext); - - if (_numControlVertices) { - assert(_numControlVertices==varyingStencilTables->GetNumControlVertices()); - } else { - _numControlVertices = varyingStencilTables->GetNumControlVertices(); - } - } -} - -D3D11ComputeContext::~D3D11ComputeContext() { - delete _vertexStencilTables; - delete _varyingStencilTables; -} - - -// ---------------------------------------------------------------------------- - -bool -D3D11ComputeContext::HasVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->IsValid() : false; -} - -bool -D3D11ComputeContext::HasVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; -} - -int -D3D11ComputeContext::GetNumStencilsInVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; -} - -int -D3D11ComputeContext::GetNumStencilsInVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; -} - -// ---------------------------------------------------------------------------- - -void -D3D11ComputeContext::BindVertexStencilTables(ID3D11DeviceContext *deviceContext) const { - if (_vertexStencilTables) { - _vertexStencilTables->Bind(deviceContext); - } -} - -void -D3D11ComputeContext::BindVaryingStencilTables(ID3D11DeviceContext *deviceContext) const { - if (_varyingStencilTables) { - _varyingStencilTables->Bind(deviceContext); - } -} - -void -D3D11ComputeContext::UnbindStencilTables(ID3D11DeviceContext *deviceContext) const { - D3D11StencilTables::Unbind(deviceContext); -} - - -// ---------------------------------------------------------------------------- - -D3D11ComputeContext * -D3D11ComputeContext::Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - ID3D11DeviceContext *deviceContext) { - - D3D11ComputeContext *result = - new D3D11ComputeContext(vertexStencilTables, varyingStencilTables, - deviceContext); - - return result; -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/d3d11ComputeContext.h b/opensubdiv/osd/d3d11ComputeContext.h deleted file mode 100644 index 7d3dcd4f..00000000 --- a/opensubdiv/osd/d3d11ComputeContext.h +++ /dev/null @@ -1,128 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_D3D11_COMPUTE_CONTEXT_H -#define OSD_D3D11_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include "../osd/nonCopyable.h" - -struct ID3D11DeviceContext; - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far{ class StencilTables; } - -namespace Osd { - -/// -/// \brief D3D Refine Context -/// -/// The D3D implementation of the Refine module contextual functionality. -/// -/// Contexts interface the serialized topological data pertaining to the -/// geometric primitives with the capabilities of the selected discrete -/// compute device. -/// -class D3D11ComputeContext : public NonCopyable { -public: - - /// Creates an D3D11ComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param deviceContext The D3D device - /// - static D3D11ComputeContext * Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - ID3D11DeviceContext *deviceContext); - - /// Destructor - virtual ~D3D11ComputeContext(); - - /// Returns true if the Context has a 'vertex' interpolation stencil table - bool HasVertexStencilTables() const; - - /// Returns true if the Context has a 'varying' interpolation stencil table - bool HasVaryingStencilTables() const; - - /// Returns the number of control vertices - int GetNumControlVertices() const { - return _numControlVertices; - } - - /// Returns the number of stencils in vertex stencil table - int GetNumStencilsInVertexStencilTables() const; - - /// Returns the number of stencils in varying stencil table - int GetNumStencilsInVaryingStencilTables() const; - - /// Binds D3D11 buffers containing stencils for 'vertex' interpolation - /// - /// @param deviceContext The D3D device - /// - void BindVertexStencilTables(ID3D11DeviceContext *deviceContext) const; - - /// Binds D3D11 buffers containing stencils for 'varying' interpolation - /// - /// @param deviceContext The D3D device - /// - void BindVaryingStencilTables(ID3D11DeviceContext *deviceContext) const; - - /// Unbinds D3D11 stencil buffers - /// - /// @param deviceContext The D3D device - /// - void UnbindStencilTables(ID3D11DeviceContext *deviceContext) const; - -protected: - - explicit D3D11ComputeContext(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - ID3D11DeviceContext *deviceContext); - -private: - - class D3D11StencilTables; - - D3D11StencilTables * _vertexStencilTables, - * _varyingStencilTables; - - int _numControlVertices; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_D3D11_COMPUTE_CONTEXT_H diff --git a/opensubdiv/osd/d3d11ComputeController.cpp b/opensubdiv/osd/d3d11ComputeController.cpp deleted file mode 100644 index c8ea64e4..00000000 --- a/opensubdiv/osd/d3d11ComputeController.cpp +++ /dev/null @@ -1,340 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/d3d11ComputeController.h" -#include "../far/error.h" -#include "../osd/vertexDescriptor.h" - -#define INITGUID // for IID_ID3D11ShaderReflection -#include -#include -#include - -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } - -static const char *shaderSource = -#include "../osd/hlslComputeKernel.gen.h" -; - -// ---------------------------------------------------------------------------- - -// must match constant buffer declaration in hlslComputeKernel.hlsl -__declspec(align(16)) - -struct KernelUniformArgs { - - int start; // batch - int end; - int srcOffset; - int dstOffset; -}; - -// ---------------------------------------------------------------------------- - -class D3D11ComputeController::KernelBundle : - NonCopyable { - -public: - - KernelBundle() : - _computeShader(0), - _classLinkage(0), - _singleBufferKernel(0), - _separateBufferKernel(0), - _uniformArgs(0), - _workGroupSize(64) { } - - ~KernelBundle() { - SAFE_RELEASE(_computeShader); - SAFE_RELEASE(_classLinkage); - SAFE_RELEASE(_singleBufferKernel); - SAFE_RELEASE(_separateBufferKernel); - SAFE_RELEASE(_uniformArgs); - } - - - bool Compile(VertexBufferDescriptor const &srcDesc, - VertexBufferDescriptor const &dstDesc, - ID3D11DeviceContext *deviceContext) { - - // XXX: only store srcDesc. - // this is ok since currently this kernel doesn't get called with - // different strides for src and dst. This function will be - // refactored soon. - _desc = VertexBufferDescriptor(0, srcDesc.length, srcDesc.stride); - - DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS; - #ifdef _DEBUG - dwShaderFlags |= D3DCOMPILE_DEBUG; - #endif - - std::ostringstream ss; - ss << srcDesc.length; std::string lengthValue(ss.str()); ss.str(""); - ss << srcDesc.stride; std::string srcStrideValue(ss.str()); ss.str(""); - ss << dstDesc.stride; std::string dstStrideValue(ss.str()); ss.str(""); - ss << _workGroupSize; std::string workgroupSizeValue(ss.str()); ss.str(""); - - D3D_SHADER_MACRO defines[] = - { "LENGTH", lengthValue.c_str(), - "SRC_STRIDE", srcStrideValue.c_str(), - "DST_STRIDE", dstStrideValue.c_str(), - "WORK_GROUP_SIZE", workgroupSizeValue.c_str(), - 0, 0 }; - - ID3DBlob * computeShaderBuffer = NULL; - ID3DBlob * errorBuffer = NULL; - - HRESULT hr = D3DCompile(shaderSource, strlen(shaderSource), - NULL, &defines[0], NULL, - "cs_main", "cs_5_0", - dwShaderFlags, 0, - &computeShaderBuffer, &errorBuffer); - if (FAILED(hr)) { - if (errorBuffer != NULL) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "Error compiling HLSL shader: %s\n", - (CHAR*)errorBuffer->GetBufferPointer()); - errorBuffer->Release(); - return false; - } - } - - ID3D11Device *device = NULL; - deviceContext->GetDevice(&device); - assert(device); - - device->CreateClassLinkage(&_classLinkage); - assert(_classLinkage); - - device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(), - computeShaderBuffer->GetBufferSize(), - _classLinkage, - &_computeShader); - assert(_computeShader); - - ID3D11ShaderReflection *reflector; - D3DReflect(computeShaderBuffer->GetBufferPointer(), - computeShaderBuffer->GetBufferSize(), - IID_ID3D11ShaderReflection, (void**) &reflector); - assert(reflector); - - assert(reflector->GetNumInterfaceSlots() == 1); - reflector->Release(); - - computeShaderBuffer->Release(); - - _classLinkage->GetClassInstance("singleBufferCompute", 0, &_singleBufferKernel); - assert(_singleBufferKernel); - _classLinkage->GetClassInstance("separateBufferCompute", 0, &_separateBufferKernel); - assert(_separateBufferKernel); - - return true; - } - - void ApplyStencilTableKernel(VertexBufferDescriptor const &srcDesc, - VertexBufferDescriptor const &dstDesc, - int start, - int end, - ID3D11DeviceContext *deviceContext) { - - int count = end - start; - if (count <= 0) return; - - KernelUniformArgs args; - args.start = start; - args.end = end; - args.srcOffset = srcDesc.offset; - args.dstOffset = dstDesc.offset; - - if (not _uniformArgs) { - ID3D11Device *device = NULL; - deviceContext->GetDevice(&device); - assert(device); - - D3D11_BUFFER_DESC cbDesc; - ZeroMemory(&cbDesc, sizeof(cbDesc)); - cbDesc.Usage = D3D11_USAGE_DYNAMIC; - cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - cbDesc.MiscFlags = 0; - cbDesc.ByteWidth = sizeof(KernelUniformArgs); - device->CreateBuffer(&cbDesc, NULL, &_uniformArgs); - } - assert(_uniformArgs); - - D3D11_MAPPED_SUBRESOURCE mappedResource; - deviceContext->Map(_uniformArgs, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); - CopyMemory(mappedResource.pData, &args, sizeof(KernelUniformArgs)); - - deviceContext->Unmap(_uniformArgs, 0); - deviceContext->CSSetConstantBuffers(0, 1, &_uniformArgs); // b0 - - deviceContext->CSSetShader(_computeShader, &_singleBufferKernel, 1); - deviceContext->Dispatch((count + _workGroupSize - 1) / _workGroupSize, 1, 1); - } - - struct Match { - - Match(VertexBufferDescriptor const & d) : desc(d) { } - - bool operator() (KernelBundle const * kernel) { - return (desc.length==kernel->_desc.length and - desc.stride==kernel->_desc.stride); - } - - VertexBufferDescriptor desc; - }; - -private: - - ID3D11ComputeShader * _computeShader; - - ID3D11ClassLinkage * _classLinkage; - - ID3D11ClassInstance * _singleBufferKernel; - ID3D11ClassInstance * _separateBufferKernel; - - ID3D11Buffer * _uniformArgs; // uniform paramaeters for kernels - - VertexBufferDescriptor _desc; // primvar buffer descriptor - - int _workGroupSize; -}; - -// ---------------------------------------------------------------------------- -void -D3D11ComputeController::Synchronize() { - - if (not _query) { - ID3D11Device *device = NULL; - _deviceContext->GetDevice(&device); - assert(device); - - D3D11_QUERY_DESC desc; - desc.Query = D3D11_QUERY_EVENT; - desc.MiscFlags = 0; - device->CreateQuery(&desc, &_query); - } - _deviceContext->Flush(); - _deviceContext->End(_query); - while (S_OK != _deviceContext->GetData(_query, NULL, 0, 0)); -} - -// ---------------------------------------------------------------------------- - -D3D11ComputeController::KernelBundle const * -D3D11ComputeController::getKernel(VertexBufferDescriptor const &desc) { - - KernelRegistry::iterator it = - std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(), - KernelBundle::Match(desc)); - - if (it != _kernelRegistry.end()) { - return *it; - } else { - assert(_deviceContext); - KernelBundle * kernelBundle = new KernelBundle(); - kernelBundle->Compile(desc, desc, _deviceContext); - _kernelRegistry.push_back(kernelBundle); - return kernelBundle; - } -} - -void -D3D11ComputeController::bindBuffer() { - - // Unbind the vertexBuffer from the input assembler - ID3D11Buffer *NULLBuffer = 0; - UINT voffset = 0, vstride = 0; - _deviceContext->IASetVertexBuffers(0, 1, &NULLBuffer, &voffset, &vstride); - - // Unbind the vertexBuffer from the vertex shader - ID3D11ShaderResourceView *NULLSRV = 0; - _deviceContext->VSSetShaderResources(0, 1, &NULLSRV); - - if (_currentBindState.buffer) - _deviceContext->CSSetUnorderedAccessViews(0, 1, &_currentBindState.buffer, 0); // u0 -} - -void -D3D11ComputeController::unbindBuffer() { - assert(_deviceContext); - ID3D11UnorderedAccessView *UAViews[] = { 0 }; - _deviceContext->CSSetUnorderedAccessViews(0, 1, UAViews, 0); // u0 -} - -// ---------------------------------------------------------------------------- - -void -D3D11ComputeController::ApplyStencilTableKernel( - D3D11ComputeContext const *context, int numStencils) const { - - assert(context); - - // XXXX manuelk messy const drop forced by D3D API - could use better solution - D3D11ComputeController::KernelBundle * bundle = - const_cast(_currentBindState.kernelBundle); - - VertexBufferDescriptor srcDesc = _currentBindState.desc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride; - - bundle->ApplyStencilTableKernel(srcDesc, - dstDesc, - 0, - numStencils, - _deviceContext); - -} - - -// ---------------------------------------------------------------------------- - -D3D11ComputeController::D3D11ComputeController( - ID3D11DeviceContext *deviceContext) - : _deviceContext(deviceContext), _query(0) { -} - -D3D11ComputeController::~D3D11ComputeController() { - - for (KernelRegistry::iterator it = _kernelRegistry.begin(); - it != _kernelRegistry.end(); ++it) { - delete *it; - } - SAFE_RELEASE(_query); -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/d3d11ComputeController.h b/opensubdiv/osd/d3d11ComputeController.h deleted file mode 100644 index c40c2e6a..00000000 --- a/opensubdiv/osd/d3d11ComputeController.h +++ /dev/null @@ -1,213 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_D3D11_COMPUTE_CONTROLLER_H -#define OSD_D3D11_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/d3d11ComputeContext.h" -#include "../osd/vertexDescriptor.h" - -#include - -struct ID3D11DeviceContext; -struct ID3D11Query; -struct ID3D11UnorderedAccessView; - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching D3D11 Compute subdivision kernels. -/// -/// D3D11ComputeController is a compute controller class to launch -/// D3D11Compute transfrom feedback subdivision kernels. It requires -/// GLVertexBufferInterface as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class D3D11ComputeController { -public: - typedef D3D11ComputeContext ComputeContext; - - /// Constructor. - /// - /// @param deviceContext a valid instanciated D3D11 device context - /// - D3D11ComputeController(ID3D11DeviceContext *deviceContext); - - /// Destructor. - ~D3D11ComputeController(); - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The D3D11Context to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( D3D11ComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - if (vertexBuffer) { - bind(vertexBuffer, vertexDesc); - - context->BindVertexStencilTables(_deviceContext); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVertexStencilTables()); - } - - if (varyingBuffer) { - bind(varyingBuffer, varyingDesc); - - context->BindVaryingStencilTables(_deviceContext); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVaryingStencilTables()); - } - - context->UnbindStencilTables(_deviceContext); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The D3D11Context to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(D3D11ComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context, - int numStencils) const; - - template - void bind( BUFFER * buffer, - VertexBufferDescriptor const * desc ) { - - assert(buffer); - - // if the vertex buffer descriptor is specified, use it - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (desc) { - _currentBindState.desc = *desc; - } else { - int numElements = buffer ? buffer->GetNumElements() : 0; - _currentBindState.desc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.buffer = buffer->BindD3D11UAV(_deviceContext); - - _currentBindState.kernelBundle = getKernel(_currentBindState.desc); - - bindBuffer(); - } - - - // Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - unbindBuffer(); - } - - // binds the primvar data buffer - void bindBuffer(); - - // unbinds the primvar data buffer - void unbindBuffer(); - - -private: - - ID3D11DeviceContext *_deviceContext; - ID3D11Query *_query; - - class KernelBundle; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : buffer(0), kernelBundle(0) { } - - void Reset() { - buffer = 0; - desc.Reset(); - kernelBundle = 0; - } - - ID3D11UnorderedAccessView * buffer; - - VertexBufferDescriptor desc; - - KernelBundle const * kernelBundle; - }; - - BindState _currentBindState; - - typedef std::vector KernelRegistry; - - KernelBundle const * getKernel(VertexBufferDescriptor const &desc); - - KernelRegistry _kernelRegistry; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_D3D11_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/d3d11ComputeEvaluator.cpp b/opensubdiv/osd/d3d11ComputeEvaluator.cpp new file mode 100644 index 00000000..7f34306f --- /dev/null +++ b/opensubdiv/osd/d3d11ComputeEvaluator.cpp @@ -0,0 +1,375 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/d3d11ComputeEvaluator.h" + +#include +#include +#include +#include + +#define INITGUID // for IID_ID3D11ShaderReflection +#include +#include +#include + +#include "../far/error.h" +#include "../far/stencilTables.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } + +static const char *shaderSource = +#include "../osd/hlslComputeKernel.gen.h" +; + +// ---------------------------------------------------------------------------- + +// must match constant buffer declaration in hlslComputeKernel.hlsl +__declspec(align(16)) + +struct KernelUniformArgs { + + int start; // batch + int end; + + int srcOffset; + int dstOffset; +}; + +// ---------------------------------------------------------------------------- + +template +static ID3D11Buffer *createBuffer(std::vector const &src, + ID3D11Device *device) { + + size_t size = src.size()*sizeof(T); + + ID3D11Buffer *buffer = NULL; + D3D11_BUFFER_DESC bd; + bd.ByteWidth = (unsigned int)size; + bd.Usage = D3D11_USAGE_IMMUTABLE; + bd.BindFlags = D3D11_BIND_SHADER_RESOURCE; + bd.CPUAccessFlags = 0; + bd.MiscFlags = 0; + bd.StructureByteStride = 0; + + D3D11_SUBRESOURCE_DATA initData; + initData.pSysMem = &src.at(0); + + HRESULT hr = device->CreateBuffer(&bd, &initData, &buffer); + if (FAILED(hr)) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "Error creating compute table buffer\n"); + return NULL; + } + return buffer; +} + +static ID3D11ShaderResourceView *createSRV(ID3D11Buffer *buffer, + DXGI_FORMAT format, + ID3D11Device *device, + size_t size) { + ID3D11ShaderResourceView *srv = NULL; + D3D11_SHADER_RESOURCE_VIEW_DESC srvd; + ZeroMemory(&srvd, sizeof(srvd)); + srvd.Format = format; + srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + srvd.Buffer.FirstElement = 0; + srvd.Buffer.NumElements = (unsigned int)size; + + HRESULT hr = device->CreateShaderResourceView(buffer, &srvd, &srv); + if (FAILED(hr)) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "Error creating compute table shader resource view\n"); + return NULL; + } + return srv; +} + +D3D11StencilTables::D3D11StencilTables(Far::StencilTables const *stencilTables, + ID3D11DeviceContext *deviceContext) + { + ID3D11Device *device = NULL; + deviceContext->GetDevice(&device); + assert(device); + + _numStencils = stencilTables->GetNumStencils(); + if (_numStencils > 0) { + // convert unsigned char sizes buffer to ints + // (HLSL does not have uint8 type) + std::vector const sizes(stencilTables->GetSizes().begin(), + stencilTables->GetSizes().end()); + + _sizesBuffer = createBuffer(sizes, device); + _offsetsBuffer = createBuffer(stencilTables->GetOffsets(), device); + _indicesBuffer = createBuffer(stencilTables->GetControlIndices(), device); + _weightsBuffer = createBuffer(stencilTables->GetWeights(), device); + + _sizes = createSRV(_sizesBuffer, DXGI_FORMAT_R32_SINT, device, + stencilTables->GetSizes().size()); + _offsets = createSRV(_offsetsBuffer, DXGI_FORMAT_R32_SINT, device, + stencilTables->GetOffsets().size()); + _indices = createSRV(_indicesBuffer, DXGI_FORMAT_R32_SINT, device, + stencilTables->GetControlIndices().size()); + _weights= createSRV(_weightsBuffer, DXGI_FORMAT_R32_FLOAT, device, + stencilTables->GetWeights().size()); + } else { + _sizes = _offsets = _indices = _weights = NULL; + _sizesBuffer = _offsetsBuffer = _indicesBuffer = _weightsBuffer = NULL; + } +} + +D3D11StencilTables::~D3D11StencilTables() { + SAFE_RELEASE(_sizes); + SAFE_RELEASE(_sizesBuffer); + SAFE_RELEASE(_offsets); + SAFE_RELEASE(_offsetsBuffer); + SAFE_RELEASE(_indices); + SAFE_RELEASE(_indicesBuffer); + SAFE_RELEASE(_weights); + SAFE_RELEASE(_weightsBuffer); +} + +// --------------------------------------------------------------------------- + + +D3D11ComputeEvaluator::D3D11ComputeEvaluator() : + _computeShader(NULL), + _classLinkage(NULL), + _singleBufferKernel(NULL), + _separateBufferKernel(NULL), + _uniformArgs(NULL), + _workGroupSize(64) { + +} + +D3D11ComputeEvaluator * +D3D11ComputeEvaluator::Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + ID3D11DeviceContext *deviceContext) { + (void)deviceContext; // not used + D3D11ComputeEvaluator *instance = new D3D11ComputeEvaluator(); + if (instance->Compile(srcDesc, dstDesc, deviceContext)) return instance; + delete instance; + return NULL; +} + +D3D11ComputeEvaluator::~D3D11ComputeEvaluator() { + SAFE_RELEASE(_computeShader); + SAFE_RELEASE(_classLinkage); + SAFE_RELEASE(_singleBufferKernel); + SAFE_RELEASE(_separateBufferKernel); + SAFE_RELEASE(_uniformArgs); +} + +bool +D3D11ComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + ID3D11DeviceContext *deviceContext) { + + if (srcDesc.length > dstDesc.length) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "srcDesc length must be less than or equal to " + "dstDesc length.\n"); + return false; + } + + DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS + | D3D10_SHADER_RESOURCES_MAY_ALIAS; +#ifdef _DEBUG + dwShaderFlags |= D3DCOMPILE_DEBUG; +#endif + + std::ostringstream ss; + ss << srcDesc.length; std::string lengthValue(ss.str()); ss.str(""); + ss << srcDesc.stride; std::string srcStrideValue(ss.str()); ss.str(""); + ss << dstDesc.stride; std::string dstStrideValue(ss.str()); ss.str(""); + ss << _workGroupSize; std::string workgroupSizeValue(ss.str()); ss.str(""); + + D3D_SHADER_MACRO defines[] = + { "LENGTH", lengthValue.c_str(), + "SRC_STRIDE", srcStrideValue.c_str(), + "DST_STRIDE", dstStrideValue.c_str(), + "WORK_GROUP_SIZE", workgroupSizeValue.c_str(), + 0, 0 }; + + ID3DBlob * computeShaderBuffer = NULL; + ID3DBlob * errorBuffer = NULL; + + HRESULT hr = D3DCompile(shaderSource, strlen(shaderSource), + NULL, &defines[0], NULL, + "cs_main", "cs_5_0", + dwShaderFlags, 0, + &computeShaderBuffer, &errorBuffer); + if (FAILED(hr)) { + if (errorBuffer != NULL) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "Error compiling HLSL shader: %s\n", + (CHAR*)errorBuffer->GetBufferPointer()); + errorBuffer->Release(); + return false; + } + } + + ID3D11Device *device = NULL; + deviceContext->GetDevice(&device); + assert(device); + + device->CreateClassLinkage(&_classLinkage); + assert(_classLinkage); + + device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(), + computeShaderBuffer->GetBufferSize(), + _classLinkage, + &_computeShader); + assert(_computeShader); + + ID3D11ShaderReflection *reflector; + D3DReflect(computeShaderBuffer->GetBufferPointer(), + computeShaderBuffer->GetBufferSize(), + IID_ID3D11ShaderReflection, (void**) &reflector); + assert(reflector); + + assert(reflector->GetNumInterfaceSlots() == 1); + reflector->Release(); + + computeShaderBuffer->Release(); + + _classLinkage->GetClassInstance("singleBufferCompute", 0, &_singleBufferKernel); + assert(_singleBufferKernel); + _classLinkage->GetClassInstance("separateBufferCompute", 0, &_separateBufferKernel); + assert(_separateBufferKernel); + + D3D11_BUFFER_DESC cbDesc; + ZeroMemory(&cbDesc, sizeof(cbDesc)); + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + cbDesc.MiscFlags = 0; + cbDesc.ByteWidth = sizeof(KernelUniformArgs); + device->CreateBuffer(&cbDesc, NULL, &_uniformArgs); + + return true; +} + +/* static */ +void +D3D11ComputeEvaluator::Synchronize(ID3D11DeviceContext *deviceContext) { + // XXX: this is currently just for the performance measuring purpose. + + // XXXFIXME! + ID3D11Query *query = NULL; + + ID3D11Device *device = NULL; + deviceContext->GetDevice(&device); + assert(device); + + D3D11_QUERY_DESC desc; + desc.Query = D3D11_QUERY_EVENT; + desc.MiscFlags = 0; + device->CreateQuery(&desc, &query); + + deviceContext->Flush(); + deviceContext->End(query); + while (S_OK != deviceContext->GetData(query, NULL, 0, 0)); + + SAFE_RELEASE(query); +} + +bool +D3D11ComputeEvaluator::EvalStencils(ID3D11UnorderedAccessView *srcUAV, + VertexBufferDescriptor const &srcDesc, + ID3D11UnorderedAccessView *dstUAV, + VertexBufferDescriptor const &dstDesc, + ID3D11ShaderResourceView *sizesSRV, + ID3D11ShaderResourceView *offsetsSRV, + ID3D11ShaderResourceView *indicesSRV, + ID3D11ShaderResourceView *weightsSRV, + int start, + int end, + ID3D11DeviceContext *deviceContext) const { + assert(deviceContext); + + int count = end - start; + if (count <= 0) return true; + + KernelUniformArgs args; + args.start = start; + args.end = end; + args.srcOffset = srcDesc.offset; + args.dstOffset = dstDesc.offset; + + D3D11_MAPPED_SUBRESOURCE mappedResource; + deviceContext->Map(_uniformArgs, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); + CopyMemory(mappedResource.pData, &args, sizeof(KernelUniformArgs)); + + deviceContext->Unmap(_uniformArgs, 0); + deviceContext->CSSetConstantBuffers(0, 1, &_uniformArgs); // b0 + + // Unbind the vertexBuffer from the input assembler + ID3D11Buffer *NULLBuffer = 0; + UINT voffset = 0, vstride = 0; + deviceContext->IASetVertexBuffers(0, 1, &NULLBuffer, &voffset, &vstride); + ID3D11ShaderResourceView *NULLSRV = 0; + deviceContext->VSSetShaderResources(0, 1, &NULLSRV); + + // bind UAV + ID3D11UnorderedAccessView *UAViews[] = { srcUAV, dstUAV }; + ID3D11ShaderResourceView *SRViews[] = { + sizesSRV, offsetsSRV, indicesSRV, weightsSRV }; + + // bind source vertex and stencil tables + deviceContext->CSSetShaderResources(1, 4, SRViews); // t1-t4 + + if (srcUAV == dstUAV) { + deviceContext->CSSetUnorderedAccessViews(0, 1, UAViews, 0); // u0 + // Dispatch src == dst buffer + deviceContext->CSSetShader(_computeShader, &_singleBufferKernel, 1); + deviceContext->Dispatch((count + _workGroupSize - 1) / _workGroupSize, 1, 1); + } else { + deviceContext->CSSetUnorderedAccessViews(0, 2, UAViews, 0); // u0, u1 + // Dispatch src != dst buffer + deviceContext->CSSetShader(_computeShader, &_separateBufferKernel, 1); + deviceContext->Dispatch((count + _workGroupSize - 1) / _workGroupSize, 1, 1); + } + + // unbind stencil tables and vertexbuffers + SRViews[0] = SRViews[1] = SRViews[2] = SRViews[3] = NULL; + deviceContext->CSSetShaderResources(1, 4, SRViews); + + UAViews[0] = UAViews[1] = NULL; + deviceContext->CSSetUnorderedAccessViews(0, 2, UAViews, 0); + + return true; +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/d3d11ComputeEvaluator.h b/opensubdiv/osd/d3d11ComputeEvaluator.h new file mode 100644 index 00000000..91eb7596 --- /dev/null +++ b/opensubdiv/osd/d3d11ComputeEvaluator.h @@ -0,0 +1,227 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_D3D11_COMPUTE_EVALUATOR_H +#define OPENSUBDIV_OSD_D3D11_COMPUTE_EVALUATOR_H + +#include "../version.h" + +struct ID3D11DeviceContext; +struct ID3D11Buffer; +struct ID3D11ComputeShader; +struct ID3D11ClassLinkage; +struct ID3D11ClassInstance; +struct ID3D11ShaderResourceView; +struct ID3D11UnorderedAccessView; + +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far { + class StencilTables; +} + +namespace Osd { + +/// \brief D3D11 stencil tables +/// +/// This class is a D3D11 Shader Resource View representation of +/// Far::StencilTables. +/// +/// D3D11ComputeEvaluator consumes this table to apply stencils +/// +class D3D11StencilTables { +public: + template + static D3D11StencilTables *Create(Far::StencilTables const *stencilTables, + DEVICE_CONTEXT context) { + return new D3D11StencilTables(stencilTables, context->GetDeviceContext()); + } + + static D3D11StencilTables *Create(Far::StencilTables const *stencilTables, + ID3D11DeviceContext *deviceContext) { + return new D3D11StencilTables(stencilTables, deviceContext); + } + + D3D11StencilTables(Far::StencilTables const *stencilTables, + ID3D11DeviceContext *deviceContext); + + ~D3D11StencilTables(); + + // interfaces needed for D3D11ComputeEvaluator + ID3D11ShaderResourceView *GetSizesSRV() const { return _sizes; } + ID3D11ShaderResourceView *GetOffsetsSRV() const { return _offsets; } + ID3D11ShaderResourceView *GetIndicesSRV() const { return _indices; } + ID3D11ShaderResourceView *GetWeightsSRV() const { return _weights; } + int GetNumStencils() const { return _numStencils; } + +private: + ID3D11ShaderResourceView *_sizes; + ID3D11ShaderResourceView *_offsets; + ID3D11ShaderResourceView *_indices; + ID3D11ShaderResourceView *_weights; + ID3D11Buffer *_sizesBuffer; + ID3D11Buffer *_offsetsBuffer; + ID3D11Buffer *_indicesBuffer; + ID3D11Buffer *_weightsBuffer; + + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +class D3D11ComputeEvaluator { +public: + typedef bool Instantiatable; + static D3D11ComputeEvaluator * Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + ID3D11DeviceContext *deviceContext); + + /// Constructor. + D3D11ComputeEvaluator(); + + /// Destructor. + ~D3D11ComputeEvaluator(); + + /// \brief Generic static compute function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTables stencil table to be applied. The table must have + /// SSBO interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext ID3D11DeviceContext. + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + D3D11ComputeEvaluator const *instance, + ID3D11DeviceContext * deviceContext) { + if (instance) { + return instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable, + deviceContext); + } else { + // Create an instace on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, deviceContext); + if (instance) { + bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable, + deviceContext); + delete instance; + return r; + } + return false; + } + } + + /// Dispatch the DX compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + template + bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + ID3D11DeviceContext *deviceContext) const { + return EvalStencils(srcVertexBuffer->BindD3D11UAV(deviceContext), + srcDesc, + dstVertexBuffer->BindD3D11UAV(deviceContext), + dstDesc, + stencilTable->GetSizesSRV(), + stencilTable->GetOffsetsSRV(), + stencilTable->GetIndicesSRV(), + stencilTable->GetWeightsSRV(), + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils(), + deviceContext); + } + + /// Dispatch the DX compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + bool EvalStencils(ID3D11UnorderedAccessView *srcSRV, + VertexBufferDescriptor const &srcDesc, + ID3D11UnorderedAccessView *dstUAV, + VertexBufferDescriptor const &dstDesc, + ID3D11ShaderResourceView *sizesSRV, + ID3D11ShaderResourceView *offsetsSRV, + ID3D11ShaderResourceView *indicesSRV, + ID3D11ShaderResourceView *weightsSRV, + int start, + int end, + ID3D11DeviceContext *deviceContext) const; + + /// Configure DX kernel. Returns false if it fails to compile the kernel. + bool Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + ID3D11DeviceContext *deviceContext); + + /// Wait the dispatched kernel finishes. + static void Synchronize(ID3D11DeviceContext *deviceContext); + +private: + ID3D11ComputeShader * _computeShader; + ID3D11ClassLinkage * _classLinkage; + ID3D11ClassInstance * _singleBufferKernel; + ID3D11ClassInstance * _separateBufferKernel; + ID3D11Buffer * _uniformArgs; // uniform paramaeters for kernels + + int _workGroupSize; +}; + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_D3D11_COMPUTE_EVALUATOR_H diff --git a/opensubdiv/osd/d3d11VertexBuffer.h b/opensubdiv/osd/d3d11VertexBuffer.h index c9066c64..0f727801 100644 --- a/opensubdiv/osd/d3d11VertexBuffer.h +++ b/opensubdiv/osd/d3d11VertexBuffer.h @@ -41,7 +41,7 @@ namespace Osd { /// \brief Concrete vertex buffer class for DirectX subvision and DirectX drawing. /// /// D3D11VertexBuffer implements D3D11VertexBufferInterface. An instance -/// of this buffer class can be passed to D3D11ComputeController. +/// of this buffer class can be passed to D3D11ComputeEvaluator. /// class D3D11VertexBuffer { public: diff --git a/opensubdiv/osd/evalLimitContext.cpp b/opensubdiv/osd/evalLimitContext.cpp deleted file mode 100644 index 5e133171..00000000 --- a/opensubdiv/osd/evalLimitContext.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/evalLimitContext.h" -#include "../osd/vertexDescriptor.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -EvalLimitContext::EvalLimitContext(Far::PatchTables const & patchTables) { - - _adaptive = patchTables.IsFeatureAdaptive(); -} - -EvalLimitContext::~EvalLimitContext() { -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/evalLimitContext.h b/opensubdiv/osd/evalLimitContext.h deleted file mode 100644 index 6a28c8da..00000000 --- a/opensubdiv/osd/evalLimitContext.h +++ /dev/null @@ -1,100 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_EVAL_LIMIT_CONTEXT_H -#define OSD_EVAL_LIMIT_CONTEXT_H - -#include "../version.h" - -#include "../far/patchTables.h" - -#include "../osd/nonCopyable.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - - -/// \brief Coordinates set on a limit surface -/// -struct LimitLocation { - - LimitLocation() { } - - /// \brief Constructor - /// - /// @param f Ptex face id - /// - /// @param x parametric location on face - /// - /// @param y parametric location on face - /// - LimitLocation(int f, float x, float y) : ptexIndex(f), s(x), t(y) { } - - int ptexIndex; ///< ptex face index - - float s, t; ///< parametric location on face -}; - -class LimitLocationsArray { - -public: - - /// \brief Constructor - LimitLocationsArray() : ptexIndex(-1), numLocations(0), s(0), t(0) { } - - int ptexIndex, ///< ptex face index - numLocations; ///< number of (u,v) coordinates in the array - - float const * s, ///< array of u coordinates - * t; ///< array of v coordinates -}; - - -/// \brief LimitEval Context -/// -/// A stub class to derive LimitEval context classes. -/// -class EvalLimitContext : private NonCopyable { - -public: - /// \brief Destructor. - virtual ~EvalLimitContext(); - -protected: - explicit EvalLimitContext(Far::PatchTables const & patchTables); - -private: - bool _adaptive; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif /* OSD_EVAL_LIMIT_CONTEXT_H */ diff --git a/opensubdiv/osd/glComputeEvaluator.cpp b/opensubdiv/osd/glComputeEvaluator.cpp new file mode 100644 index 00000000..387c84fb --- /dev/null +++ b/opensubdiv/osd/glComputeEvaluator.cpp @@ -0,0 +1,224 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/glComputeEvaluator.h" + +#include +#include +#include +#include + +#include "../far/error.h" +#include "../far/stencilTables.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +static const char *shaderSource = +#include "../osd/glslComputeKernel.gen.h" +; + +template GLuint +createSSBO(std::vector const & src) { + GLuint devicePtr = 0; + glGenBuffers(1, &devicePtr); + +#if defined(GL_EXT_direct_state_access) + if (glNamedBufferDataEXT) { + glNamedBufferDataEXT(devicePtr, src.size()*sizeof(T), + &src.at(0), GL_STATIC_DRAW); + } else { +#else + { +#endif + GLint prev = 0; + glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &prev); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr); + glBufferData(GL_SHADER_STORAGE_BUFFER, src.size()*sizeof(T), + &src.at(0), GL_STATIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, prev); + } + + return devicePtr; +} + +GLStencilTablesSSBO::GLStencilTablesSSBO( + Far::StencilTables const *stencilTables) { + _numStencils = stencilTables->GetNumStencils(); + if (_numStencils > 0) { + _sizes = createSSBO(stencilTables->GetSizes()); + _offsets = createSSBO(stencilTables->GetOffsets()); + _indices = createSSBO(stencilTables->GetControlIndices()); + _weights = createSSBO(stencilTables->GetWeights()); + } else { + _sizes = _offsets = _indices = _weights = 0; + } +} + +GLStencilTablesSSBO::~GLStencilTablesSSBO() { + if (_sizes) glDeleteBuffers(1, &_sizes); + if (_offsets) glDeleteBuffers(1, &_offsets); + if (_weights) glDeleteBuffers(1, &_weights); + if (_indices) glDeleteBuffers(1, &_indices); +} + +// --------------------------------------------------------------------------- + + +GLComputeEvaluator::GLComputeEvaluator() : + _program(0), _workGroupSize(64) { +} + +GLComputeEvaluator::~GLComputeEvaluator() { + if (_program) { + glDeleteProgram(_program); + } +} + +bool +GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc) { + if (srcDesc.length > dstDesc.length) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "srcDesc length must be less than or equal to " + "dstDesc length.\n"); + return false; + } + + if (_program) { + glDeleteProgram(_program); + _program = 0; + } + _program = glCreateProgram(); + + GLuint shader = glCreateShader(GL_COMPUTE_SHADER); + + std::ostringstream defines; + defines << "#define LENGTH " << srcDesc.length << "\n" + << "#define SRC_STRIDE " << srcDesc.stride << "\n" + << "#define DST_STRIDE " << dstDesc.stride << "\n" + << "#define WORK_GROUP_SIZE " << _workGroupSize << "\n"; + std::string defineStr = defines.str(); + + const char *shaderSources[3] = {"#version 430\n", 0, 0}; + shaderSources[1] = defineStr.c_str(); + shaderSources[2] = shaderSource; + glShaderSource(shader, 3, shaderSources, NULL); + glCompileShader(shader); + glAttachShader(_program, shader); + + GLint linked = 0; + glLinkProgram(_program); + glGetProgramiv(_program, GL_LINK_STATUS, &linked); + + if (linked == GL_FALSE) { + char buffer[1024]; + glGetShaderInfoLog(shader, 1024, NULL, buffer); + Far::Error(Far::FAR_RUNTIME_ERROR, buffer); + + glGetProgramInfoLog(_program, 1024, NULL, buffer); + Far::Error(Far::FAR_RUNTIME_ERROR, buffer); + + glDeleteProgram(_program); + _program = 0; + return false; + } + + glDeleteShader(shader); + + // store uniform locations for the compute kernel program. + _uniformSizes = glGetUniformLocation(_program, "stencilSizes"); + _uniformOffsets = glGetUniformLocation(_program, "stencilOffsets"); + _uniformIndices = glGetUniformLocation(_program, "stencilIndices"); + _uniformWeights = glGetUniformLocation(_program, "stencilIWeights"); + + _uniformStart = glGetUniformLocation(_program, "batchStart"); + _uniformEnd = glGetUniformLocation(_program, "batchEnd"); + + _uniformSrcOffset = glGetUniformLocation(_program, "srcOffset"); + _uniformDstOffset = glGetUniformLocation(_program, "dstOffset"); + + return true; +} + +/* static */ +void +GLComputeEvaluator::Synchronize(void * /*kernel*/) { + // XXX: this is currently just for the performance measuring purpose. + // need to be reimplemented by fence and sync. + glFinish(); +} + +bool +GLComputeEvaluator::EvalStencils(GLuint srcBuffer, + VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, + VertexBufferDescriptor const &dstDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + int start, + int end) const { + if (!_program) return false; + int count = end - start; + if (count <= 0) { + return true; + } + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, sizesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, offsetsBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, indicesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, weightsBuffer); + + glUseProgram(_program); + + glUniform1i(_uniformStart, start); + glUniform1i(_uniformEnd, end); + glUniform1i(_uniformSrcOffset, srcDesc.offset); + glUniform1i(_uniformDstOffset, dstDesc.offset); + + glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1); + + glUseProgram(0); + + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0); + + return true; +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glComputeEvaluator.h b/opensubdiv/osd/glComputeEvaluator.h new file mode 100644 index 00000000..bea48d4c --- /dev/null +++ b/opensubdiv/osd/glComputeEvaluator.h @@ -0,0 +1,215 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_GL_COMPUTE_EVALUATOR_H +#define OPENSUBDIV_OSD_GL_COMPUTE_EVALUATOR_H + +#include "../version.h" + +#include "../osd/opengl.h" +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far { + class StencilTables; +} + +namespace Osd { + +/// \brief GL stencil tables (Shader Storage buffer) +/// +/// This class is a GLSL SSBO representation of Far::StencilTables. +/// +/// GLSLComputeKernel consumes this table to apply stencils +/// +class GLStencilTablesSSBO { +public: + static GLStencilTablesSSBO *Create(Far::StencilTables const *stencilTables, + void *deviceContext = NULL) { + (void)deviceContext; // unused + return new GLStencilTablesSSBO(stencilTables); + } + + explicit GLStencilTablesSSBO(Far::StencilTables const *stencilTables); + ~GLStencilTablesSSBO(); + + // interfaces needed for GLSLComputeKernel + GLuint GetSizesBuffer() const { return _sizes; } + GLuint GetOffsetsBuffer() const { return _offsets; } + GLuint GetIndicesBuffer() const { return _indices; } + GLuint GetWeightsBuffer() const { return _weights; } + int GetNumStencils() const { return _numStencils; } + +private: + GLuint _sizes; + GLuint _offsets; + GLuint _indices; + GLuint _weights; + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +class GLComputeEvaluator { +public: + typedef bool Instantiatable; + static GLComputeEvaluator * Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + void * deviceContext = NULL) { + (void)deviceContext; // not used + GLComputeEvaluator *instance = new GLComputeEvaluator(); + if (instance->Compile(srcDesc, dstDesc)) return instance; + delete instance; + return NULL; + } + + /// Constructor. + GLComputeEvaluator(); + + /// Destructor. note that the GL context must be made current. + ~GLComputeEvaluator(); + + /// \brief Generic static compute function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + /// @param evaluator cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLSL kernel + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + GLComputeEvaluator const *instance, + void * deviceContext = NULL) { + if (instance) { + return instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + } else { + // Create a kernel on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc); + if (instance) { + bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// Dispatch the GLSL compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + template + bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const { + return EvalStencils(srcVertexBuffer->BindVBO(), + srcDesc, + dstVertexBuffer->BindVBO(), + dstDesc, + stencilTable->GetSizesBuffer(), + stencilTable->GetOffsetsBuffer(), + stencilTable->GetIndicesBuffer(), + stencilTable->GetWeightsBuffer(), + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils()); + } + + /// Dispatch the GLSL compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + bool EvalStencils(GLuint srcBuffer, + VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, + VertexBufferDescriptor const &dstDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + int start, + int end) const; + + /// Configure GLSL kernel. A valid GL context must be made current before + /// calling this function. Returns false if it fails to compile the kernel. + bool Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc); + + /// Wait the dispatched kernel finishes. + static void Synchronize(void *deviceContext); + +private: + GLuint _program; + + GLuint _uniformSizes, // stencil tables + _uniformOffsets, + _uniformIndices, + _uniformWeights, + + _uniformStart, // range + _uniformEnd, + + _uniformSrcOffset, // src buffer offset (in elements) + _uniformDstOffset; // dst buffer offset (in elements) + + int _workGroupSize; +}; + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_GL_COMPUTE_EVALUATOR_H diff --git a/opensubdiv/osd/glVertexBuffer.h b/opensubdiv/osd/glVertexBuffer.h index 67709a60..37ee8917 100644 --- a/opensubdiv/osd/glVertexBuffer.h +++ b/opensubdiv/osd/glVertexBuffer.h @@ -39,8 +39,7 @@ namespace Osd { /// \brief Concrete vertex buffer class for GLSL subvision and OpenGL drawing. /// /// GLVertexBuffer implements GLVertexBufferInterface. An instance -/// of this buffer class can be passed to OsdGLComputeController -/// and OsdGLDrawController +/// of this buffer class can be passed to OsdGLComputeEvaluator. /// class GLVertexBuffer { public: diff --git a/opensubdiv/osd/glXFBEvaluator.cpp b/opensubdiv/osd/glXFBEvaluator.cpp new file mode 100644 index 00000000..14ef745b --- /dev/null +++ b/opensubdiv/osd/glXFBEvaluator.cpp @@ -0,0 +1,353 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/glXFBEvaluator.h" + +#include +#include +#include +#include + +#include "../far/error.h" +#include "../far/stencilTables.h" + +#if _MSC_VER + #define snprintf _snprintf +#endif + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +static const char *shaderSource = +#include "../osd/glslXFBKernel.gen.h" +; + +template GLuint +createGLTextureBuffer(std::vector const & src, GLenum type) { + GLint size = static_cast(src.size()*sizeof(T)); + void const * ptr = &src.at(0); + + GLuint buffer; + glGenBuffers(1, &buffer); + + GLuint devicePtr; + glGenTextures(1, &devicePtr); + +#if defined(GL_EXT_direct_state_access) + if (glNamedBufferDataEXT && glTextureBufferEXT) { + glNamedBufferDataEXT(buffer, size, ptr, GL_STATIC_DRAW); + glTextureBufferEXT(devicePtr, GL_TEXTURE_BUFFER, type, buffer); + } else { +#else + { +#endif + GLint prev = 0; + + glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &prev); + glBindBuffer(GL_ARRAY_BUFFER, buffer); + glBufferData(GL_ARRAY_BUFFER, size, ptr, GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, prev); + + glGetIntegerv(GL_TEXTURE_BINDING_BUFFER, &prev); + glBindTexture(GL_TEXTURE_BUFFER, devicePtr); + glTexBuffer(GL_TEXTURE_BUFFER, type, buffer); + glBindTexture(GL_TEXTURE_BUFFER, prev); + } + + glDeleteBuffers(1, &buffer); + + return devicePtr; +} + +GLStencilTablesTBO::GLStencilTablesTBO( + Far::StencilTables const *stencilTables) { + + _numStencils = stencilTables->GetNumStencils(); + if (_numStencils > 0) { + _sizes = createGLTextureBuffer(stencilTables->GetSizes(), GL_R8UI); + _offsets = createGLTextureBuffer( + stencilTables->GetOffsets(), GL_R32I); + _indices = createGLTextureBuffer( + stencilTables->GetControlIndices(), GL_R32I); + _weights = createGLTextureBuffer(stencilTables->GetWeights(), GL_R32F); + } else { + _sizes = _offsets = _indices = _weights = 0; + } +} + +GLStencilTablesTBO::~GLStencilTablesTBO() { + if (_sizes) glDeleteTextures(1, &_sizes); + if (_offsets) glDeleteTextures(1, &_offsets); + if (_weights) glDeleteTextures(1, &_weights); + if (_indices) glDeleteTextures(1, &_indices); +} + +// --------------------------------------------------------------------------- + + +GLXFBEvaluator::GLXFBEvaluator() : + _program(0), _srcBufferTexture(0), + _uniformSrcBufferTexture(0), _uniformSizesTexture(0), + _uniformOffsetsTexture(0), _uniformIndicesTexture(0), + _uniformWeightsTexture(0), _uniformStart(0), _uniformEnd(0), + _uniformSrcOffset(0) { +} + +GLXFBEvaluator::~GLXFBEvaluator() { + if (_program) { + glDeleteProgram(_program); + } + if (_srcBufferTexture) { + glDeleteTextures(1, &_srcBufferTexture); + } +} + +bool +GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc) { + if (_program) { + glDeleteProgram(_program); + _program = 0; + } + _program = glCreateProgram(); + + GLuint shader = glCreateShader(GL_VERTEX_SHADER); + + std::ostringstream defines; + defines << "#define LENGTH " << srcDesc.length << "\n" + << "#define SRC_STRIDE " << srcDesc.stride << "\n"; + std::string defineStr = defines.str(); + + const char *shaderSources[3] = {"#version 410\n", NULL, NULL}; + + shaderSources[1] = defineStr.c_str(); + shaderSources[2] = shaderSource; + glShaderSource(shader, 3, shaderSources, NULL); + glCompileShader(shader); + glAttachShader(_program, shader); + + std::vector outputs; + std::vector pOutputs; + { + // vertex data (may include custom vertex data) and varying data + // are stored into the same buffer, interleaved. + // + // (gl_SkipComponents1) + // outVertexData[0] + // outVertexData[1] + // outVertexData[2] + // (gl_SkipComponents1) + // + // note that "primvarOffset" in shader is still needed to read + // interleaved components even if gl_SkipComponents is used. + // + char attrName[32]; + int primvarOffset = (dstDesc.offset % dstDesc.stride); + for (int i = 0; i < primvarOffset; ++i) { + outputs.push_back("gl_SkipComponents1"); + } + for (int i = 0; i < dstDesc.length; ++i) { + snprintf(attrName, sizeof(attrName), "outVertexBuffer[%d]", i); + outputs.push_back(attrName); + } + for (int i = primvarOffset + dstDesc.length; i < dstDesc.stride; ++i) { + outputs.push_back("gl_SkipComponents1"); + } + + // convert to char* array + for (size_t i = 0; i < outputs.size(); ++i) { + pOutputs.push_back(&outputs[i][0]); + } + } + + glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(), + &pOutputs[0], GL_INTERLEAVED_ATTRIBS); + + GLint linked = 0; + glLinkProgram(_program); + glGetProgramiv(_program, GL_LINK_STATUS, &linked); + + if (linked == GL_FALSE) { + char buffer[1024]; + glGetShaderInfoLog(shader, 1024, NULL, buffer); + Far::Error(Far::FAR_RUNTIME_ERROR, buffer); + + glGetProgramInfoLog(_program, 1024, NULL, buffer); + Far::Error(Far::FAR_RUNTIME_ERROR, buffer); + + glDeleteProgram(_program); + _program = 0; + return false; + } + + glDeleteShader(shader); + + // set uniform locations for compute kernels + _uniformSrcBufferTexture = glGetUniformLocation(_program, "vertexBuffer"); + + _uniformSizesTexture = glGetUniformLocation(_program, "sizes"); + _uniformOffsetsTexture = glGetUniformLocation(_program, "offsets"); + _uniformIndicesTexture = glGetUniformLocation(_program, "indices"); + _uniformWeightsTexture = glGetUniformLocation(_program, "weights"); + + _uniformStart = glGetUniformLocation(_program, "batchStart"); + _uniformEnd = glGetUniformLocation(_program, "batchEnd"); + + _uniformSrcOffset = glGetUniformLocation(_program, "srcOffset"); + + // create a texture for input buffer + if (!_srcBufferTexture) { + glGenTextures(1, &_srcBufferTexture); + } + return true; +} + +/* static */ +void +GLXFBEvaluator::Synchronize(void * /*kernel*/) { + // XXX: this is currently just for the test purpose. + // need to be reimplemented by fence and sync. + glFinish(); +} + +static void +bindTexture(GLint sampler, GLuint texture, int unit) { + if (sampler == -1) { + return; + } + glUniform1i(sampler, unit); + glActiveTexture(GL_TEXTURE0 + unit); + glBindTexture(GL_TEXTURE_BUFFER, texture); + glActiveTexture(GL_TEXTURE0); +} + +bool +GLXFBEvaluator::EvalStencils(GLuint srcBuffer, + VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, + VertexBufferDescriptor const &dstDesc, + GLuint sizesTexture, + GLuint offsetsTexture, + GLuint indicesTexture, + GLuint weightsTexture, + int start, + int end) const { + if (!_program) return false; + int count = end - start; + if (count <= 0) { + return true; + } + + // bind vertex array + // always create new one, to be safe with multiple contexts (slow though) + GLuint vao = 0; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + + glEnable(GL_RASTERIZER_DISCARD); + glUseProgram(_program); + + // Set input VBO as a texture buffer. + glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture); + glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer); + glBindTexture(GL_TEXTURE_BUFFER, 0); + + bindTexture(_uniformSrcBufferTexture, _srcBufferTexture, 0); + + // bind stencil tables textures. + bindTexture(_uniformSizesTexture, sizesTexture, 1); + bindTexture(_uniformOffsetsTexture, offsetsTexture, 2); + bindTexture(_uniformIndicesTexture, indicesTexture, 3); + bindTexture(_uniformWeightsTexture, weightsTexture, 4); + + // set batch range + glUniform1i(_uniformStart, start); + glUniform1i(_uniformEnd, end); + glUniform1i(_uniformSrcOffset, srcDesc.offset); + + // The destination buffer is bound at vertex boundary. + // + // Example: When we have a batched and interleaved vertex buffer + // + // Obj X | Obj Y | + // -----------+-------------------------------------------+------- + // | vtx 0 | vtx 1 | | + // -----------+---------------+---------------+-----------+------- + // | x y z r g b a | x y z r g b a | .... | + // -----------+---------------+---------------+-----------+------- + // ^ + // srcDesc.offset for Obj Y color + // + // ^-------------------------------------------^ + // XFB destination buffer range + // S S S * * * * + // k k k + // i i i + // p p p + // + // We use gl_SkipComponents to skip the first 3 XYZ so the + // buffer itself needs to be bound for entire section of ObjY. + // + // Note that for the source buffer (texture) we bind the whole + // buffer (all VBO range) and use srcOffset=srcDesc.offset for + // indexing. + // + int dstBufferBindOffset = + dstDesc.offset - (dstDesc.offset % dstDesc.stride); + + // bind destination buffer + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, + 0, dstBuffer, + dstBufferBindOffset * sizeof(float), + count * dstDesc.stride * sizeof(float)); + + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, count); + glEndTransformFeedback(); + + glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0); + + for (int i = 0; i < 5; ++i) { + glActiveTexture(GL_TEXTURE0 + i); + glBindTexture(GL_TEXTURE_BUFFER, 0); + } + + glDisable(GL_RASTERIZER_DISCARD); + glUseProgram(0); + glActiveTexture(GL_TEXTURE0); + + // revert vao + glBindVertexArray(0); + glDeleteVertexArrays(1, &vao); + + + return true; +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glXFBEvaluator.h b/opensubdiv/osd/glXFBEvaluator.h new file mode 100644 index 00000000..c987eba1 --- /dev/null +++ b/opensubdiv/osd/glXFBEvaluator.h @@ -0,0 +1,215 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_GL_XFB_EVALUATOR_H +#define OPENSUBDIV_OSD_GL_XFB_EVALUATOR_H + +#include "../version.h" + +#include "../osd/opengl.h" +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far { + class StencilTables; +} + +namespace Osd { + +/// \brief GL TextureBuffer stencil tables +/// +/// This class is a GL Texture Buffer representation of Far::StencilTables. +/// +/// GLSLTransformFeedback consumes this table to apply stencils +/// +/// +class GLStencilTablesTBO { +public: + static GLStencilTablesTBO *Create( + Far::StencilTables const *stencilTables, void *deviceContext = NULL) { + (void)deviceContext; // unused + return new GLStencilTablesTBO(stencilTables); + } + + explicit GLStencilTablesTBO(Far::StencilTables const *stencilTables); + ~GLStencilTablesTBO(); + + // interfaces needed for GLSLTransformFeedbackKernel + GLuint GetSizesTexture() const { return _sizes; } + GLuint GetOffsetsTexture() const { return _offsets; } + GLuint GetIndicesTexture() const { return _indices; } + GLuint GetWeightsTexture() const { return _weights; } + int GetNumStencils() const { return _numStencils; } + +private: + GLuint _sizes; + GLuint _offsets; + GLuint _indices; + GLuint _weights; + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +class GLXFBEvaluator { +public: + typedef bool Instantiatable; + static GLXFBEvaluator * Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + void * deviceContext = NULL) { + (void)deviceContext; // not used + GLXFBEvaluator *instance = new GLXFBEvaluator(); + if (instance->Compile(srcDesc, dstDesc)) return instance; + delete instance; + return NULL; + } + + /// Constructor. + GLXFBEvaluator(); + + /// Destructor. note that the GL context must be made current. + ~GLXFBEvaluator(); + + /// \brief Generic static stencil function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// Texture Buffer Object interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLSLTransformFeedback kernel + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + GLXFBEvaluator const *instance, + void * deviceContext = NULL) { + if (instance) { + return instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + } else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc); + if (instance) { + bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// Dispatch the GLSL compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + template + bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const { + return EvalStencils(srcVertexBuffer->BindVBO(), + srcDesc, + dstVertexBuffer->BindVBO(), + dstDesc, + stencilTable->GetSizesTexture(), + stencilTable->GetOffsetsTexture(), + stencilTable->GetIndicesTexture(), + stencilTable->GetWeightsTexture(), + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils()); + } + + /// Dispatch the GLSL compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + bool EvalStencils(GLuint srcBuffer, + VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, + VertexBufferDescriptor const &dstDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + int start, + int end) const; + + /// Configure GLSL kernel. A valid GL context must be made current before + /// calling this function. Returns false if it fails to compile the kernel. + bool Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc); + + /// Wait the dispatched kernel finishes. + static void Synchronize(void *kernel); + +private: + GLuint _program; + + GLuint _srcBufferTexture; + + GLuint _uniformSrcBufferTexture; + GLuint _uniformSizesTexture; + GLuint _uniformOffsetsTexture; + GLuint _uniformIndicesTexture; + GLuint _uniformWeightsTexture; + + GLuint _uniformStart; // range + GLuint _uniformEnd; + GLuint _uniformSrcOffset; // src buffer offset (in elements) +}; + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_GL_XFB_EVALUATOR_H diff --git a/opensubdiv/osd/glslComputeContext.cpp b/opensubdiv/osd/glslComputeContext.cpp deleted file mode 100644 index 7a4f5ba3..00000000 --- a/opensubdiv/osd/glslComputeContext.cpp +++ /dev/null @@ -1,229 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" - -//#include "../osd/debug.h" -#include "../osd/glslComputeContext.h" -#include "../osd/opengl.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// ----------------------------------------------------------------------------- - -template GLuint -createGLSLBuffer(std::vector const & src) { - - GLuint devicePtr=0; - - glGenBuffers(1, &devicePtr); - -#if defined(GL_EXT_direct_state_access) - if (glNamedBufferDataEXT) { - glNamedBufferDataEXT(devicePtr, src.size()*sizeof(T), &src.at(0), GL_STATIC_DRAW); - } else { -#else - { -#endif - GLint prev = 0; - glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &prev); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr); - glBufferData(GL_SHADER_STORAGE_BUFFER, src.size()*sizeof(T), &src.at(0), GL_STATIC_DRAW); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, prev); - } - - //OSD_DEBUG_CHECK_GL_ERROR("createGLSLBuffer size %ld", src.size()); - return devicePtr; -} - -// ----------------------------------------------------------------------------- - -class GLSLComputeContext::GLSLStencilTables { - -public: - - GLSLStencilTables(Far::StencilTables const & stencilTables) { - _numStencils = stencilTables.GetNumStencils(); - if (_numStencils > 0) { - _sizes = createGLSLBuffer(stencilTables.GetSizes()); - _offsets = createGLSLBuffer(stencilTables.GetOffsets()); - _indices = createGLSLBuffer(stencilTables.GetControlIndices()); - _weights = createGLSLBuffer(stencilTables.GetWeights()); - } else { - _sizes = _offsets = _indices = _weights = 0; - } - } - - ~GLSLStencilTables() { - if (_sizes) glDeleteBuffers(1, &_sizes); - if (_offsets) glDeleteBuffers(1, &_offsets); - if (_weights) glDeleteBuffers(1, &_weights); - if (_indices) glDeleteBuffers(1, &_indices); - } - - bool IsValid() const { - return _sizes and _offsets and _indices and _weights; - } - - GLuint GetSizes() const { - return _sizes; - } - - GLuint GetOffsets() const { - return _offsets; - } - - GLuint GetIndices() const { - return _indices; - } - - GLuint GetWeights() const { - return _weights; - } - - int GetNumStencils() const { - return _numStencils; - } - - void Bind() const { - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, _sizes); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, _offsets); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, _indices); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, _weights); - } - - static void Unbind() { - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0); - - glUseProgram(0); - } - -private: - - GLuint _sizes, - _offsets, - _indices, - _weights; - int _numStencils; -}; - -// ----------------------------------------------------------------------------- - -GLSLComputeContext::GLSLComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables) : - _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0), - _numStencils(0) { - - if (vertexStencilTables) { - _vertexStencilTables = new GLSLStencilTables(*vertexStencilTables); - _numControlVertices = vertexStencilTables->GetNumControlVertices(); - } - - if (varyingStencilTables) { - _varyingStencilTables = new GLSLStencilTables(*varyingStencilTables); - - if (_numControlVertices) { - assert(_numControlVertices==varyingStencilTables->GetNumControlVertices()); - } else { - _numControlVertices = varyingStencilTables->GetNumControlVertices(); - } - } -} - -GLSLComputeContext::~GLSLComputeContext() { - delete _vertexStencilTables; - delete _varyingStencilTables; -} - -// ---------------------------------------------------------------------------- - -bool -GLSLComputeContext::HasVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->IsValid() : false; -} - -bool -GLSLComputeContext::HasVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; -} - -int -GLSLComputeContext::GetNumStencilsInVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : false; -} - -int -GLSLComputeContext::GetNumStencilsInVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : false; -} - -// ---------------------------------------------------------------------------- - -void -GLSLComputeContext::BindVertexStencilTables() const { - if (_vertexStencilTables) { - _vertexStencilTables->Bind(); - } -} - -void -GLSLComputeContext::BindVaryingStencilTables() const { - if (_varyingStencilTables) { - _varyingStencilTables->Bind(); - } -} - -void -GLSLComputeContext::UnbindStencilTables() const { - GLSLStencilTables::Unbind(); -} - - -// ----------------------------------------------------------------------------- - -GLSLComputeContext * -GLSLComputeContext::Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void * /*deviceContext*/) { - - GLSLComputeContext *result = - new GLSLComputeContext(vertexStencilTables, varyingStencilTables); - - return result; -} - -// ----------------------------------------------------------------------------- - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glslComputeContext.h b/opensubdiv/osd/glslComputeContext.h deleted file mode 100644 index 7f21869d..00000000 --- a/opensubdiv/osd/glslComputeContext.h +++ /dev/null @@ -1,126 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_GLSL_COMPUTE_CONTEXT_H -#define OSD_GLSL_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include - -#include "../osd/nonCopyable.h" -#include "../osd/opengl.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far{ class StencilTables; } - -namespace Osd { - -/// -/// \brief GLSL-Compute Refine Context -/// -/// The GLSL-Compute implementation of the Refine module contextual functionality. -/// -/// Contexts interface the serialized topological data pertaining to the -/// geometric primitives with the capabilities of the selected discrete -/// compute device. -/// -class GLSLComputeContext { - -public: - /// Creates an GLSLComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param deviceContext (not used) - /// - static GLSLComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void *deviceContext = NULL); - - /// Destructor - virtual ~GLSLComputeContext(); - - /// Returns true if the Context has a 'vertex' interpolation stencil table - bool HasVertexStencilTables() const; - - /// Returns true if the Context has a 'varying' interpolation stencil table - bool HasVaryingStencilTables() const; - - /// Returns the number of control vertices - int GetNumControlVertices() const { - return _numControlVertices; - } - - /// Returns the number of stencils in vertex stencil table - int GetNumStencilsInVertexStencilTables() const; - - /// Returns the number of stencils in varying stencil table - int GetNumStencilsInVaryingStencilTables() const; - - /// Returns the GL buffer containing vertex-stencil stencil sizes - GLuint GetVertexStencilTablesSizes() const; - - /// Returns the GL buffer containing vertex-stencil stencil offsets - GLuint GetVertexStencilTablesOffsets() const; - - /// Binds GL buffers containing stencils for 'vertex' interpolation - void BindVertexStencilTables() const; - - /// Binds GL buffers containing stencils for 'varying' interpolation - void BindVaryingStencilTables() const; - - /// Unbinds GL stencil buffers - void UnbindStencilTables() const; - -protected: - explicit GLSLComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables); - -private: - class GLSLStencilTables; - - GLSLStencilTables * _vertexStencilTables, - * _varyingStencilTables; - - int _numControlVertices; - int _numStencils; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_GLSL_COMPUTE_CONTEXT_H diff --git a/opensubdiv/osd/glslComputeController.cpp b/opensubdiv/osd/glslComputeController.cpp deleted file mode 100644 index 5609372d..00000000 --- a/opensubdiv/osd/glslComputeController.cpp +++ /dev/null @@ -1,292 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/glslComputeController.h" -#include "../osd/vertexDescriptor.h" -#include "../osd/opengl.h" -#include "../far/error.h" - -#include -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -static const char *shaderSource = -#include "../osd/glslComputeKernel.gen.h" -; - -// ---------------------------------------------------------------------------- - -class GLSLComputeController::KernelBundle : - NonCopyable { - -public: - - KernelBundle() : - _program(0), - _uniformSizes(0), - _uniformOffsets(0), - _uniformIndices(0), - _uniformWeights(0), - _uniformStart(0), - _uniformEnd(0), - _uniformSrcOffset(0), - _uniformDstOffset(0), - _workGroupSize(64) { } - - ~KernelBundle() { - if (_program) { - glDeleteProgram(_program); - } - } - - void UseProgram() const { - glUseProgram(_program); - } - - bool Compile(VertexBufferDescriptor const &srcDesc, - VertexBufferDescriptor const &dstDesc) { - - // XXX: only store srcDesc. - // this is ok since currently this kernel doesn't get called with - // different strides for src and dst. This function will be - // refactored soon. - _desc = VertexBufferDescriptor(0, srcDesc.length, srcDesc.stride); - - if (_program) { - glDeleteProgram(_program); - _program=0; - } - _program = glCreateProgram(); - - GLuint shader = glCreateShader(GL_COMPUTE_SHADER); - - std::ostringstream defines; - defines << "#define LENGTH " << srcDesc.length << "\n" - << "#define SRC_STRIDE " << srcDesc.stride << "\n" - << "#define DST_STRIDE " << dstDesc.stride << "\n" - << "#define WORK_GROUP_SIZE " << _workGroupSize << "\n"; - std::string defineStr = defines.str(); - - const char *shaderSources[3] = {"#version 430\n", 0, 0}; - shaderSources[1] = defineStr.c_str(); - shaderSources[2] = shaderSource; - glShaderSource(shader, 3, shaderSources, NULL); - glCompileShader(shader); - glAttachShader(_program, shader); - - GLint linked = 0; - glLinkProgram(_program); - glGetProgramiv(_program, GL_LINK_STATUS, &linked); - - if (linked == GL_FALSE) { - char buffer[1024]; - glGetShaderInfoLog(shader, 1024, NULL, buffer); - Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - - glGetProgramInfoLog(_program, 1024, NULL, buffer); - Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - - glDeleteProgram(_program); - _program = 0; - return false; - } - - glDeleteShader(shader); - - // set uniform locations for compute kernels - _uniformSizes = glGetUniformLocation(_program, "stencilSizes"); - _uniformOffsets = glGetUniformLocation(_program, "stencilOffsets"); - _uniformIndices = glGetUniformLocation(_program, "stencilIndices"); - _uniformWeights = glGetUniformLocation(_program, "stencilIWeights"); - - _uniformStart = glGetUniformLocation(_program, "batchStart"); - _uniformEnd = glGetUniformLocation(_program, "batchEnd"); - - _uniformSrcOffset = glGetUniformLocation(_program, "srcOffset"); - _uniformDstOffset = glGetUniformLocation(_program, "dstOffset"); - - return true; - } - - void ApplyStencilTableKernel(int srcOffset, int dstOffset, - int start, int end) const { - - dispatchCompute(srcOffset, dstOffset, start, end); - } - - struct Match { - - Match(VertexBufferDescriptor const & d) : desc(d) { } - - bool operator() (KernelBundle const * kernel) { - return (desc.length==kernel->_desc.length and - desc.stride==kernel->_desc.stride); - } - - VertexBufferDescriptor desc; - }; - -protected: - - void dispatchCompute(int srcOffset, int dstOffset, int start, int end) const { - - int count = end - start; - if (count<=0) { - return; - } - - - glUniform1i(_uniformStart, start); - glUniform1i(_uniformEnd, end); - - glUniform1i(_uniformSrcOffset, srcOffset); - glUniform1i(_uniformDstOffset, dstOffset); - - glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1); - - // sync for later reading. - // XXX: in theory, just SHADER_STORAGE_BARRIER is needed here. However - // we found a problem (issue #295) with nvidia driver 331.49 / Quadro4000 - // resulting in invalid vertices. - // Apparently adding TEXTURE_FETCH_BARRIER after a kernel fixes it. - // The workaroud is commented out, since it looks fixed as of driver 334.xx. - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); - - //OSD_DEBUG_CHECK_GL_ERROR("dispatchCompute"); - } - -private: - - GLuint _program; - - GLuint _uniformSizes, // uniform paramaeters for kernels - _uniformOffsets, - _uniformIndices, - _uniformWeights, - - _uniformStart, // batch - _uniformEnd, - - _uniformSrcOffset, // src buffer offset (in elements) - _uniformDstOffset; // dst buffer offset (in elements) - - VertexBufferDescriptor _desc; // primvar buffer descriptor - - int _workGroupSize; -}; - -// ---------------------------------------------------------------------------- - -void -GLSLComputeController::ApplyStencilTableKernel( - ComputeContext const *context, int numStencils) const { - - assert(context); - - // Note: GLSLComputeContext has a state, knowing whether vertex or - // varying stencil tables are being bound. GetNumStencils() reflects it. - // This structure will likely be revisited. - - int start = 0; - int end = numStencils; - - _currentBindState.kernelBundle->ApplyStencilTableKernel( - _currentBindState.desc.offset, - _currentBindState.desc.offset + context->GetNumControlVertices() * _currentBindState.desc.stride, - start, end); -} - -// ---------------------------------------------------------------------------- - -GLSLComputeController::GLSLComputeController() { } - -GLSLComputeController::~GLSLComputeController() { - for (KernelRegistry::iterator it = _kernelRegistry.begin(); - it != _kernelRegistry.end(); ++it) { - delete *it; - } -} - -// ---------------------------------------------------------------------------- - -void -GLSLComputeController::Synchronize() { - - glFinish(); -} - -// ---------------------------------------------------------------------------- -GLSLComputeController::KernelBundle const * -GLSLComputeController::getKernel(VertexBufferDescriptor const &desc) { - - KernelRegistry::iterator it = - std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(), - KernelBundle::Match(desc)); - - if (it != _kernelRegistry.end()) { - return *it; - } else { - KernelBundle * kernelBundle = new KernelBundle(); - kernelBundle->Compile(desc, desc); - _kernelRegistry.push_back(kernelBundle); - return kernelBundle; - } -} - -void -GLSLComputeController::bindBufferAndProgram() { - - if (_currentBindState.buffer) { - // src - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, _currentBindState.buffer); - // dst - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, _currentBindState.buffer); - } - - _currentBindState.kernelBundle->UseProgram(); - - glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); -} - -void -GLSLComputeController::unbindBufferAndProgram() { - - glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); - glUseProgram(0); -} - -// ---------------------------------------------------------------------------- - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glslComputeController.h b/opensubdiv/osd/glslComputeController.h deleted file mode 100644 index 8c87b37d..00000000 --- a/opensubdiv/osd/glslComputeController.h +++ /dev/null @@ -1,210 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_GLSL_COMPUTE_CONTROLLER_H -#define OSD_GLSL_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/glslComputeContext.h" -#include "../osd/vertexDescriptor.h" - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching GLSL Compute subdivision kernels. -/// -/// GLSLComputeController is a compute controller class to launch -/// GLSLCompute transfrom feedback subdivision kernels. It requires -/// GLVertexBufferInterface as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class GLSLComputeController { -public: - typedef GLSLComputeContext ComputeContext; - - /// Constructor. - GLSLComputeController(); - - /// Destructor. - ~GLSLComputeController(); - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The OsdGLSLContext to apply refinement operations to - /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( GLSLComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - if (vertexBuffer) { - bind(vertexBuffer, vertexDesc); - - context->BindVertexStencilTables(); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVertexStencilTables()); - } - - if (varyingBuffer) { - bind(varyingBuffer, varyingDesc); - - context->BindVaryingStencilTables(); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVaryingStencilTables()); - } - - context->UnbindStencilTables(); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The OsdGLSLContext to apply refinement operations to - /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(GLSLComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context, - int numStencils) const; - - template - void bind( BUFFER * buffer, - VertexBufferDescriptor const * desc ) { - - assert(buffer); - - // if the vertex buffer descriptor is specified, use it - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (desc) { - _currentBindState.desc = *desc; - } else { - int numElements = buffer ? buffer->GetNumElements() : 0; - _currentBindState.desc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.buffer = buffer->BindVBO(); - - _currentBindState.kernelBundle = getKernel(_currentBindState.desc); - - bindBufferAndProgram(); - } - - - // Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - unbindBufferAndProgram(); - } - - // binds the primvar data buffer and compute program - void bindBufferAndProgram(); - - // unbinds the primvar data buffer and compute program - void unbindBufferAndProgram(); - - -private: - - class KernelBundle; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : buffer(0), kernelBundle(0) { } - - void Reset() { - buffer = 0; - desc.Reset(); - kernelBundle = 0; - } - - GLuint buffer; - - VertexBufferDescriptor desc; - - KernelBundle const * kernelBundle; - }; - - BindState _currentBindState; - - typedef std::vector KernelRegistry; - - KernelBundle const * getKernel(VertexBufferDescriptor const &desc); - - KernelRegistry _kernelRegistry; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_GLSL_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp b/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp deleted file mode 100644 index 2d763e7a..00000000 --- a/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp +++ /dev/null @@ -1,253 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" - -//#define OSD_DEBUG_BUILD -//#include "../osd/debug.h" -#include "../osd/glslTransformFeedbackComputeContext.h" -#include "../osd/opengl.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// ----------------------------------------------------------------------------- - -template GLuint -createGLTextureBuffer(std::vector const & src, GLenum type) { - - int size = (int)src.size()*sizeof(T); - void const * ptr = &src.at(0); - - GLuint buffer; - glGenBuffers(1, &buffer); - - GLuint devicePtr; - glGenTextures(1, &devicePtr); - -#if defined(GL_EXT_direct_state_access) - if (glNamedBufferDataEXT and glTextureBufferEXT) { - glNamedBufferDataEXT(buffer, size, ptr, GL_STATIC_DRAW); - glTextureBufferEXT(devicePtr, GL_TEXTURE_BUFFER, type, buffer); - } else { -#else - { -#endif - GLint prev = 0; - - glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &prev); - glBindBuffer(GL_ARRAY_BUFFER, buffer); - glBufferData(GL_ARRAY_BUFFER, size, ptr, GL_STATIC_DRAW); - glBindBuffer(GL_ARRAY_BUFFER, prev); - - glGetIntegerv(GL_TEXTURE_BINDING_BUFFER, &prev); - glBindTexture(GL_TEXTURE_BUFFER, devicePtr); - glTexBuffer(GL_TEXTURE_BUFFER, type, buffer); - glBindTexture(GL_TEXTURE_BUFFER, prev); - } - - glDeleteBuffers(1, &buffer); - - //OSD_DEBUG_CHECK_GL_ERROR("createGLTextureBuffer end\n"); - return devicePtr; -} - -// ----------------------------------------------------------------------------- - -class GLSLTransformFeedbackComputeContext::GLStencilTables { - -public: - - GLStencilTables(Far::StencilTables const & stencilTables) { - _numStencils = stencilTables.GetNumStencils(); - if (_numStencils > 0) { - _sizes = createGLTextureBuffer(stencilTables.GetSizes(), GL_R8UI); - _offsets = createGLTextureBuffer(stencilTables.GetOffsets(), GL_R32I); - _indices = createGLTextureBuffer(stencilTables.GetControlIndices(), GL_R32I); - _weights = createGLTextureBuffer(stencilTables.GetWeights(), GL_R32F); - } else { - _sizes = _offsets = _indices = _weights = 0; - } - } - - ~GLStencilTables() { - if (_sizes) glDeleteTextures(1, &_sizes); - if (_offsets) glDeleteTextures(1, &_offsets); - if (_weights) glDeleteTextures(1, &_weights); - if (_indices) glDeleteTextures(1, &_indices); - } - - bool IsValid() const { - return _sizes and _offsets and _indices and _weights; - } - - GLuint GetSizes() const { - return _sizes; - } - - GLuint GetOffsets() const { - return _offsets; - } - - GLuint GetIndices() const { - return _indices; - } - - GLuint GetWeights() const { - return _weights; - } - - int GetNumStencils() const { - return _numStencils; - } - -private: - - GLuint _sizes, - _offsets, - _indices, - _weights; - - int _numStencils; -}; - -// ----------------------------------------------------------------------------- - -GLSLTransformFeedbackComputeContext::GLSLTransformFeedbackComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables) : - _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0) { - - if (vertexStencilTables) { - _vertexStencilTables = new GLStencilTables(*vertexStencilTables); - _numControlVertices = vertexStencilTables->GetNumControlVertices(); - } - - if (varyingStencilTables) { - _varyingStencilTables = new GLStencilTables(*varyingStencilTables); - - if (_numControlVertices) { - assert(_numControlVertices==varyingStencilTables->GetNumControlVertices()); - } else { - _numControlVertices = varyingStencilTables->GetNumControlVertices(); - } - } -} - -GLSLTransformFeedbackComputeContext::~GLSLTransformFeedbackComputeContext() { - delete _vertexStencilTables; - delete _varyingStencilTables; -} - -// ---------------------------------------------------------------------------- - -bool -GLSLTransformFeedbackComputeContext::HasVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->IsValid() : false; -} - -bool -GLSLTransformFeedbackComputeContext::HasVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; -} - -int -GLSLTransformFeedbackComputeContext::GetNumStencilsInVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; -} - -int -GLSLTransformFeedbackComputeContext::GetNumStencilsInVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; -} - -// ---------------------------------------------------------------------------- -GLuint -GLSLTransformFeedbackComputeContext::GetVertexStencilTablesSizes() const { - return _vertexStencilTables ? _vertexStencilTables->GetSizes() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVertexStencilTablesOffsets() const { - return _vertexStencilTables ? _vertexStencilTables->GetOffsets() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVertexStencilTablesIndices() const { - return _vertexStencilTables ? _vertexStencilTables->GetIndices() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVertexStencilTablesWeights() const { - return _vertexStencilTables ? _vertexStencilTables->GetWeights() : 0; -} - -// ---------------------------------------------------------------------------- - -GLuint -GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesSizes() const { - return _varyingStencilTables ? _varyingStencilTables->GetSizes() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesOffsets() const { - return _varyingStencilTables ? _varyingStencilTables->GetOffsets() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesIndices() const { - return _varyingStencilTables ? _varyingStencilTables->GetIndices() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesWeights() const { - return _varyingStencilTables ? _varyingStencilTables->GetWeights() : 0; -} - - -// ----------------------------------------------------------------------------- - -GLSLTransformFeedbackComputeContext * -GLSLTransformFeedbackComputeContext::Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void * /*deviceContext*/) { - - GLSLTransformFeedbackComputeContext *result = - new GLSLTransformFeedbackComputeContext( - vertexStencilTables, varyingStencilTables); - - return result; -} - - -// ----------------------------------------------------------------------------- - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glslTransformFeedbackComputeContext.h b/opensubdiv/osd/glslTransformFeedbackComputeContext.h deleted file mode 100644 index 5a51e55f..00000000 --- a/opensubdiv/osd/glslTransformFeedbackComputeContext.h +++ /dev/null @@ -1,134 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTEXT_H -#define OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include - -#include "../osd/nonCopyable.h" -#include "../osd/opengl.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far{ class StencilTables; } - -namespace Osd { - -/// -/// \brief GLSL-Compute(transform-feedback) Refine Context -/// -/// The GLSL (transform-feedback) implementation of the Refine module contextual functionality. -/// -/// Contexts interface the serialized topological data pertaining to the -/// geometric primitives with the capabilities of the selected discrete -/// compute device. -/// -class GLSLTransformFeedbackComputeContext { -public: - /// Creates an GLSLTransformFeedbackComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - static GLSLTransformFeedbackComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void *deviceContext = NULL); - - /// Destructor - virtual ~GLSLTransformFeedbackComputeContext(); - - /// Returns true if the Context has a 'vertex' interpolation stencil table - bool HasVertexStencilTables() const; - - /// Returns true if the Context has a 'varying' interpolation stencil table - bool HasVaryingStencilTables() const; - - /// Returns the number of control vertices - int GetNumControlVertices() const { - return _numControlVertices; - } - - /// Returns the number of stencils in vertex stencil table - int GetNumStencilsInVertexStencilTables() const; - - /// Returns the number of stencils in varying stencil table - int GetNumStencilsInVaryingStencilTables() const; - - /// Returns the GL texture buffer containing vertex-stencil stencil sizes - GLuint GetVertexStencilTablesSizes() const; - - /// Returns the GL texture buffer containing vertex-stencil stencil offsets - GLuint GetVertexStencilTablesOffsets() const; - - /// Returns the GL texture buffer containing vertex-stencil stencil indices - GLuint GetVertexStencilTablesIndices() const; - - /// Returns the GL texture buffer containing vertex-stencil stencil weights - GLuint GetVertexStencilTablesWeights() const; - - - /// Returns the GL texture buffer containing Varying-stencil stencil sizes - GLuint GetVaryingStencilTablesSizes() const; - - /// Returns the GL texture buffer containing Varying-stencil stencil offsets - GLuint GetVaryingStencilTablesOffsets() const; - - /// Returns the GL texture buffer containing Varying-stencil stencil indices - GLuint GetVaryingStencilTablesIndices() const; - - /// Returns the GL texture buffer containing Varying-stencil stencil weights - GLuint GetVaryingStencilTablesWeights() const; - - -protected: - - explicit GLSLTransformFeedbackComputeContext(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables); - -private: - - class GLStencilTables; - - GLStencilTables * _vertexStencilTables, - * _varyingStencilTables; - - int _numControlVertices; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTEXT_H diff --git a/opensubdiv/osd/glslTransformFeedbackComputeController.cpp b/opensubdiv/osd/glslTransformFeedbackComputeController.cpp deleted file mode 100644 index d13b70dc..00000000 --- a/opensubdiv/osd/glslTransformFeedbackComputeController.cpp +++ /dev/null @@ -1,456 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -//#define OSD_DEBUG_BUILD -#include "../osd/debug.h" -#include "../osd/glslTransformFeedbackComputeController.h" -#include "../osd/glslTransformFeedbackComputeContext.h" -#include "../osd/opengl.h" -#include "../far/error.h" - -#include -#include -#include -#include -#include - -#if _MSC_VER - #define snprintf _snprintf -#endif - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -static const char *shaderSource = -#include "../osd/glslTransformFeedbackKernel.gen.h" -; - -// ---------------------------------------------------------------------------- -static void -bindTexture(GLint sampler, GLuint texture, int unit) { - if (sampler==-1) { - return; - } - glUniform1i(sampler, unit); - glActiveTexture(GL_TEXTURE0 + unit); - glBindTexture(GL_TEXTURE_BUFFER, texture); - glActiveTexture(GL_TEXTURE0); -} - -// ---------------------------------------------------------------------------- - -class GLSLTransformFeedbackComputeController::KernelBundle : - NonCopyable { - -public: - - KernelBundle() : - _program(0), - _uniformSizes(0), - _uniformOffsets(0), - _uniformIndices(0), - _uniformWeights(0), - _uniformStart(0), - _uniformEnd(0), - _uniformSrcOffset(0) { } - - ~KernelBundle() { - if (_program) { - glDeleteProgram(_program); - } - } - - void UseProgram() const { - glUseProgram(_program); - } - - bool Compile(VertexBufferDescriptor const & srcDesc, - VertexBufferDescriptor const & dstDesc) { - - // XXX: only store srcDesc. - // this is ok since currently this kernel doesn't get called with - // different strides for src and dst. This function will be - // refactored soon. - _desc = VertexBufferDescriptor(0, srcDesc.length, dstDesc.stride); - - if (_program) { - glDeleteProgram(_program); - _program=0; - } - _program = glCreateProgram(); - - GLuint shader = glCreateShader(GL_VERTEX_SHADER); - - std::ostringstream defines; - defines << "#define LENGTH " << srcDesc.length << "\n" - << "#define SRC_STRIDE " << srcDesc.stride << "\n"; - std::string defineStr = defines.str(); - - const char *shaderSources[3] = {"#version 410\n", 0, 0}; - - shaderSources[1] = defineStr.c_str(); - shaderSources[2] = shaderSource; - glShaderSource(shader, 3, shaderSources, NULL); - glCompileShader(shader); - glAttachShader(_program, shader); - - std::vector outputs; - std::vector pOutputs; - { - // vertex data (may include custom vertex data) and varying data - // are stored into the same buffer, interleaved. - // - // (gl_SkipComponents1) - // outVertexData[0] - // outVertexData[1] - // outVertexData[2] - // (gl_SkipComponents1) - // - // note that "primvarOffset" in shader is still needed to read - // interleaved components even if gl_SkipComponents is used. - // - char attrName[32]; - int primvarOffset = (dstDesc.offset % dstDesc.stride); - for (int i = 0; i < primvarOffset; ++i) { - outputs.push_back("gl_SkipComponents1"); - } - for (int i = 0; i < dstDesc.length; ++i) { - snprintf(attrName, 32, "outVertexBuffer[%d]", i); - outputs.push_back(attrName); - } - for (int i = primvarOffset + dstDesc.length; i < dstDesc.stride; ++i) { - outputs.push_back("gl_SkipComponents1"); - } - - // convert to char* array - for (size_t i = 0; i < outputs.size(); ++i) { - pOutputs.push_back(&outputs[i][0]); - } - } - - glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(), - &pOutputs[0], GL_INTERLEAVED_ATTRIBS); - - GLint linked = 0; - glLinkProgram(_program); - glGetProgramiv(_program, GL_LINK_STATUS, &linked); - - if (linked == GL_FALSE) { - char buffer[1024]; - glGetShaderInfoLog(shader, 1024, NULL, buffer); - Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - - glGetProgramInfoLog(_program, 1024, NULL, buffer); - Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - - glDeleteProgram(_program); - _program = 0; - return false; - } - - glDeleteShader(shader); - - // set uniform locations for compute kernels - _primvarBuffer = glGetUniformLocation(_program, "vertexBuffer"); - - _uniformSizes = glGetUniformLocation(_program, "sizes"); - _uniformOffsets = glGetUniformLocation(_program, "offsets"); - _uniformIndices = glGetUniformLocation(_program, "indices"); - _uniformWeights = glGetUniformLocation(_program, "weights"); - - _uniformStart = glGetUniformLocation(_program, "batchStart"); - _uniformEnd = glGetUniformLocation(_program, "batchEnd"); - - _uniformSrcOffset = glGetUniformLocation(_program, "srcOffset"); - - OSD_DEBUG_CHECK_GL_ERROR("KernelBundle::Compile"); - - return true; - } - - GLint GetPrimvarBufferLocation() const { - return _primvarBuffer; - } - - GLint GetSizesLocation() const { - return _uniformSizes; - } - - GLint GetOffsetsLocation() const { - return _uniformOffsets; - } - GLint GetIndicesLocation() const { - return _uniformIndices; - } - GLint GetWeightsLocation() const { - return _uniformWeights; - } - - void ApplyStencilTableKernel(GLuint srcBuffer, - VertexBufferDescriptor const &srcDesc, - GLuint dstBuffer, - VertexBufferDescriptor const &dstDesc, - int start, int end) const { - - assert(end >= start); - - (void)srcBuffer; // already bound in bindBufferAndProgram(). - - // set batch range - glUniform1i(_uniformStart, start); - glUniform1i(_uniformEnd, end); - glUniform1i(_uniformSrcOffset, srcDesc.offset); - - int count = end - start; - - // The destination buffer is bound at vertex boundary. - // - // Example: When we have a batched and interleaved vertex buffer - // - // Obj X | Obj Y | - // -----------+-------------------------------------------+------- - // | vtx 0 | vtx 1 | | - // -----------+---------------+---------------+-----------+------- - // | x y z r g b a | x y z r g b a | .... | - // -----------+---------------+---------------+-----------+------- - // ^ - // srcDesc.offset for Obj Y color - // - // ^-------------------------------------------^ - // XFB destination buffer range - // S S S * * * * - // k k k - // i i i - // p p p - // - // We use gl_SkipComponents to skip the first 3 XYZ so the - // buffer itself needs to be bound for entire section of ObjY. - // - // Note that for the source buffer (texture) we bind the whole - // buffer (all VBO range) and use srcOffset=srcDesc.offset for - // indexing. - // - int dstBufferBindOffset = - dstDesc.offset - (dstDesc.offset % dstDesc.stride); - - // bind destination buffer - glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, - 0, dstBuffer, - dstBufferBindOffset * sizeof(float), - count * dstDesc.stride * sizeof(float)); - - glBeginTransformFeedback(GL_POINTS); - glDrawArrays(GL_POINTS, 0, count); - glEndTransformFeedback(); - - glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0); - - //OSD_DEBUG_CHECK_GL_ERROR("TransformPrimvarBuffer\n"); - } - - - struct Match { - - Match(VertexBufferDescriptor const & d) : desc(d) { } - - bool operator() (KernelBundle const * kernel) { - return (desc.length==kernel->_desc.length and - desc.stride==kernel->_desc.stride); - } - - VertexBufferDescriptor desc; - }; - -private: - - GLuint _program; - - GLint _primvarBuffer; - - GLint _uniformSizes, // uniform paramaeters for kernels - _uniformOffsets, - _uniformIndices, - _uniformWeights, - - _uniformStart, // batch - _uniformEnd, - - _uniformSrcOffset; - - VertexBufferDescriptor _desc; // primvar buffer descriptor -}; - -// ---------------------------------------------------------------------------- -void -GLSLTransformFeedbackComputeController::bindBufferAndProgram( - GLuint & feedbackTexture) { - - glEnable(GL_RASTERIZER_DISCARD); - _currentBindState.kernelBundle->UseProgram(); - - if (not feedbackTexture) { - glGenTextures(1, &feedbackTexture); -#if defined(GL_EXT_direct_state_access) - if (glTextureBufferEXT) { - glTextureBufferEXT(feedbackTexture, GL_TEXTURE_BUFFER, GL_R32F, - _currentBindState.buffer); - } else { -#else - { -#endif - glBindTexture(GL_TEXTURE_BUFFER, feedbackTexture); - glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.buffer); - glBindTexture(GL_TEXTURE_BUFFER, 0); - } - } - - bindTexture( - _currentBindState.kernelBundle->GetPrimvarBufferLocation(), feedbackTexture, 0); - - // bind vertex array - // always create new one, to be safe with multiple contexts. - glGenVertexArrays(1, &_vao); - glBindVertexArray(_vao); -} - -// ---------------------------------------------------------------------------- - -void -GLSLTransformFeedbackComputeController::bindContextStencilTables( - ComputeContext const *context, bool varying) { - - GLint sizesLocation = _currentBindState.kernelBundle->GetSizesLocation(), - offsetsLocation = _currentBindState.kernelBundle->GetOffsetsLocation(), - indicesLocation = _currentBindState.kernelBundle->GetIndicesLocation(), - weightsLocation = _currentBindState.kernelBundle->GetWeightsLocation(); - - if (not varying) { - bindTexture(sizesLocation, context->GetVertexStencilTablesSizes(), 1); - bindTexture(offsetsLocation, context->GetVertexStencilTablesOffsets(), 2); - bindTexture(indicesLocation, context->GetVertexStencilTablesIndices(), 3); - bindTexture(weightsLocation, context->GetVertexStencilTablesWeights(), 4); - } else { - bindTexture(sizesLocation, context->GetVaryingStencilTablesSizes(), 1); - bindTexture(offsetsLocation, context->GetVaryingStencilTablesOffsets(), 2); - bindTexture(indicesLocation, context->GetVaryingStencilTablesIndices(), 3); - bindTexture(weightsLocation, context->GetVaryingStencilTablesWeights(), 4); - } -} - -// ---------------------------------------------------------------------------- - -void -GLSLTransformFeedbackComputeController::unbindResources() { - - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_BUFFER, 0); - - glDisable(GL_RASTERIZER_DISCARD); - glUseProgram(0); - glActiveTexture(GL_TEXTURE0); - - glBindVertexArray(0); - glDeleteVertexArrays(1, &_vao); -} - -// ---------------------------------------------------------------------------- - -GLSLTransformFeedbackComputeController::KernelBundle const * -GLSLTransformFeedbackComputeController::getKernel( - VertexBufferDescriptor const &desc) { - - KernelRegistry::iterator it = - std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(), - KernelBundle::Match(desc)); - - if (it != _kernelRegistry.end()) { - return *it; - } else { - KernelBundle * kernelBundle = new KernelBundle(); - kernelBundle->Compile(desc, desc); - _kernelRegistry.push_back(kernelBundle); - return kernelBundle; - } -} - -// ---------------------------------------------------------------------------- - -void -GLSLTransformFeedbackComputeController::ApplyStencilTableKernel( - GLSLTransformFeedbackComputeContext const *context, int numStencils) const { - - assert(context); - - int start = 0; - int end = numStencils; - - VertexBufferDescriptor srcDesc = _currentBindState.desc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride; - - _currentBindState.kernelBundle->ApplyStencilTableKernel( - _currentBindState.buffer, - srcDesc, - _currentBindState.buffer, - dstDesc, - start, - end); -} - - -// ---------------------------------------------------------------------------- - -GLSLTransformFeedbackComputeController::GLSLTransformFeedbackComputeController() : - _vertexTexture(0), _varyingTexture(0), _vao(0) { -} - -GLSLTransformFeedbackComputeController::~GLSLTransformFeedbackComputeController() { - - for (KernelRegistry::iterator it = _kernelRegistry.begin(); - it != _kernelRegistry.end(); ++it) { - delete *it; - } - if (_vertexTexture) { - glDeleteTextures(1, &_vertexTexture); - } - if (_varyingTexture) { - glDeleteTextures(1, &_varyingTexture); - } -} - -// ---------------------------------------------------------------------------- - -void -GLSLTransformFeedbackComputeController::Synchronize() { - glFinish(); -} - - - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glslTransformFeedbackComputeController.h b/opensubdiv/osd/glslTransformFeedbackComputeController.h deleted file mode 100644 index 3a9c92ce..00000000 --- a/opensubdiv/osd/glslTransformFeedbackComputeController.h +++ /dev/null @@ -1,214 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTROLLER_H -#define OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/glslTransformFeedbackComputeContext.h" -#include "../osd/vertexDescriptor.h" - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -class GLSLTransformFeedbackKernelBundle; - -/// \brief Compute controller for launching GLSLTransformFeedback transform feedback -/// subdivision kernels. -/// -/// GLSLTransformFeedbackComputeController is a compute controller class to launch -/// GLSLTransformFeedback transfrom feedback subdivision kernels. It requires -/// GLVertexBufferInterface as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class GLSLTransformFeedbackComputeController { -public: - typedef GLSLTransformFeedbackComputeContext ComputeContext; - - /// Constructor. - GLSLTransformFeedbackComputeController(); - - /// Destructor. - ~GLSLTransformFeedbackComputeController(); - - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The GLSLTransformFeedbackComputeContext to apply - /// refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( GLSLTransformFeedbackComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - if (vertexBuffer) { - - bind(vertexBuffer, vertexDesc, _vertexTexture); - - bindContextStencilTables(context, false); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVertexStencilTables()); - } - - if (varyingBuffer) { - - bind(varyingBuffer, varyingDesc, _varyingTexture); - - bindContextStencilTables(context, true); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVaryingStencilTables()); - } - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The GLSLTransformFeedbackComputeContext to apply - /// refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(GLSLTransformFeedbackComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context, - int numStencils) const; - - template - void bind( BUFFER * buffer, VertexBufferDescriptor const * desc, - GLuint feedbackTexture ) { - - assert(buffer); - - // if the vertex buffer descriptor is specified, use it - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (desc) { - _currentBindState.desc = *desc; - } else { - int numElements = buffer ? buffer->GetNumElements() : 0; - _currentBindState.desc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.buffer = buffer->BindVBO(); - - _currentBindState.kernelBundle = getKernel(_currentBindState.desc); - - bindBufferAndProgram(feedbackTexture); - } - - // Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - unbindResources(); - } - - // binds the primvar data buffer and compute program - void bindBufferAndProgram(GLuint & texture); - - // binds the stencil tables for 'vertex' interpolation - void bindContextStencilTables(ComputeContext const *context, bool varying=false); - - // unbinds the primvar data buffer and compute program - void unbindResources(); - -private: - - class KernelBundle; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : buffer(0), kernelBundle(0) { } - - void Reset() { - buffer = 0; - desc.Reset(); - kernelBundle = 0; - } - - GLuint buffer; - - VertexBufferDescriptor desc; - - KernelBundle const * kernelBundle; - }; - - BindState _currentBindState; - - typedef std::vector KernelRegistry; - - KernelBundle const * getKernel(VertexBufferDescriptor const &desc); - - KernelRegistry _kernelRegistry; - - GLuint _vertexTexture, - _varyingTexture, - _vao; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/glslTransformFeedbackKernel.glsl b/opensubdiv/osd/glslXFBKernel.glsl similarity index 100% rename from opensubdiv/osd/glslTransformFeedbackKernel.glsl rename to opensubdiv/osd/glslXFBKernel.glsl diff --git a/opensubdiv/osd/mesh.h b/opensubdiv/osd/mesh.h index e867cae5..d2c77549 100644 --- a/opensubdiv/osd/mesh.h +++ b/opensubdiv/osd/mesh.h @@ -39,6 +39,8 @@ #include "../osd/vertexDescriptor.h" +struct ID3D11DeviceContext; + namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { @@ -80,13 +82,6 @@ public: virtual void Refine() = 0; - virtual void Refine(VertexBufferDescriptor const *vertexDesc, - VertexBufferDescriptor const *varyingDesc) = 0; - - virtual void Refine(VertexBufferDescriptor const *vertexDesc, - VertexBufferDescriptor const *varyingDesc, - bool interleaved) = 0; - virtual void Synchronize() = 0; virtual DrawContext * GetDrawContext() = 0; @@ -119,25 +114,143 @@ protected: // --------------------------------------------------------------------------- -template +template +STENCIL_TABLES const * +convertToCompatibleStencilTables( + Far::StencilTables const *table, DEVICE_CONTEXT *context) { + if (not table) return NULL; + return STENCIL_TABLES::Create(table, context); +} + +template <> +Far::StencilTables const * +convertToCompatibleStencilTables( + Far::StencilTables const *table, void * /*context*/) { + // no need for conversion + // XXX: We don't want to even copy. + if (not table) return NULL; + return new Far::StencilTables(*table); +} + +template <> +Far::StencilTables const * +convertToCompatibleStencilTables( + Far::StencilTables const *table, ID3D11DeviceContext * /*context*/) { + // no need for conversion + // XXX: We don't want to even copy. + if (not table) return NULL; + return new Far::StencilTables(*table); +} + +// --------------------------------------------------------------------------- + +template +class EvaluatorCacheT { +public: + ~EvaluatorCacheT() { + for(typename Evaluators::iterator it = _evaluators.begin(); + it != _evaluators.end(); ++it) { + delete it->evaluator; + } + } + + // XXX: FIXME, linear search + struct Entry { + Entry(VertexBufferDescriptor const &sd, + VertexBufferDescriptor const &dd, + EVALUATOR *e) : srcDesc(sd), dstDesc(dd), evaluator(e) {} + VertexBufferDescriptor srcDesc, dstDesc; + EVALUATOR *evaluator; + }; + typedef std::vector Evaluators; + + template + EVALUATOR *GetEvaluator(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + DEVICE_CONTEXT *deviceContext) { + + for(typename Evaluators::iterator it = _evaluators.begin(); + it != _evaluators.end(); ++it) { + if (it->srcDesc.length == srcDesc.length and + it->srcDesc.stride == srcDesc.stride and + it->dstDesc.length == dstDesc.length and + it->dstDesc.stride == dstDesc.stride) { + return it->evaluator; + } + } + EVALUATOR *e = EVALUATOR::Create(srcDesc, dstDesc, deviceContext); + _evaluators.push_back(Entry(srcDesc, dstDesc, e)); + return e; + } + +private: + Evaluators _evaluators; +}; + + +// template helpers to see if the evaluator is instantiatable or not. +template +struct instantiatable +{ + typedef char yes[1]; + typedef char no[2]; + template static yes &chk(typename C::Instantiatable *t=0); + template static no &chk(...); + static bool const value = sizeof(chk(0)) == sizeof(yes); +}; +template +struct enable_if { typedef T type; }; +template +struct enable_if { }; + +// extract a kernel from cache if available +template +static EVALUATOR *GetEvaluator( + EvaluatorCacheT *cache, + VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + DEVICE_CONTEXT deviceContext, + typename enable_if::value, void>::type*t=0) { + (void)t; + if (cache == NULL) return NULL; + return cache->GetEvaluator(srcDesc, dstDesc, deviceContext); +} + +// fallback +template +static EVALUATOR *GetEvaluator( + EvaluatorCacheT *, + VertexBufferDescriptor const &, + VertexBufferDescriptor const &, + DEVICE_CONTEXT, + typename enable_if::value, void>::type*t=0) { + (void)t; + return NULL; +} + +// --------------------------------------------------------------------------- + +template class Mesh : public MeshInterface { public: typedef VERTEX_BUFFER VertexBuffer; - typedef COMPUTE_CONTROLLER ComputeController; + typedef EVALUATOR Evaluator; + typedef STENCIL_TABLES StencilTables; typedef DRAW_CONTEXT DrawContext; typedef DEVICE_CONTEXT DeviceContext; - typedef typename ComputeController::ComputeContext ComputeContext; + typedef EvaluatorCacheT EvaluatorCache; typedef typename DrawContext::VertexBufferBinding VertexBufferBinding; - Mesh(ComputeController * computeController, - Far::TopologyRefiner * refiner, + Mesh(Far::TopologyRefiner * refiner, int numVertexElements, int numVaryingElements, int level, MeshBitset bits = MeshBitset(), + EvaluatorCache * evaluatorCache = NULL, DeviceContext * deviceContext = NULL) : _refiner(refiner), @@ -145,8 +258,9 @@ public: _numVertices(0), _vertexBuffer(NULL), _varyingBuffer(NULL), - _computeContext(NULL), - _computeController(computeController), + _vertexStencilTables(NULL), + _varyingStencilTables(NULL), + _evaluatorCache(evaluatorCache), _drawContext(NULL), _deviceContext(deviceContext) { @@ -157,18 +271,34 @@ public: bits.test(MeshAdaptive), bits.test(MeshUseSingleCreasePatch)); - int numVertexElementsInterleaved = numVertexElements + + int vertexBufferStride = numVertexElements + (bits.test(MeshInterleaveVarying) ? numVaryingElements : 0); - int numVaryingElementsNonInterleaved = + int varyingBufferStride = (bits.test(MeshInterleaveVarying) ? 0 : numVaryingElements); initializeContext(numVertexElements, numVaryingElements, - numVertexElementsInterleaved, level, bits); + vertexBufferStride, level, bits); initializeVertexBuffers(_numVertices, - numVertexElementsInterleaved, - numVaryingElementsNonInterleaved); + vertexBufferStride, + varyingBufferStride); + + // configure vertex buffer descriptor + _vertexDesc = VertexBufferDescriptor(0, + numVertexElements, + vertexBufferStride); + if (bits.test(MeshInterleaveVarying)) { + _varyingDesc = VertexBufferDescriptor(numVertexElements, + numVaryingElements, + vertexBufferStride); + } else { + _varyingDesc = VertexBufferDescriptor(0, + numVaryingElements, + varyingBufferStride); + } + + // will retire _drawContext->UpdateVertexTexture(_vertexBuffer, _deviceContext); @@ -179,9 +309,10 @@ public: delete _patchTables; delete _vertexBuffer; delete _varyingBuffer; - delete _computeContext; + delete _vertexStencilTables; + delete _varyingStencilTables; delete _drawContext; - // devicecontext and computecontroller are not owned by this class. + // deviceContext and evaluatorCache are not owned by this class. } virtual void UpdateVertexBuffer(float const *vertexData, @@ -197,29 +328,50 @@ public: } virtual void Refine() { - _computeController->Compute(_computeContext, - _vertexBuffer, _varyingBuffer); - } - virtual void Refine(VertexBufferDescriptor const *vertexDesc, - VertexBufferDescriptor const *varyingDesc) { - _computeController->Compute(_computeContext, - _vertexBuffer, _varyingBuffer, - vertexDesc, varyingDesc); - } + int numControlVertices = _refiner->GetNumVertices(0); - virtual void Refine(VertexBufferDescriptor const *vertexDesc, - VertexBufferDescriptor const *varyingDesc, - bool interleaved) { - _computeController->Compute(_computeContext, - _vertexBuffer, - (interleaved ? - _vertexBuffer : _varyingBuffer), - vertexDesc, varyingDesc); + VertexBufferDescriptor srcDesc = _vertexDesc; + VertexBufferDescriptor dstDesc(srcDesc); + dstDesc.offset += numControlVertices * dstDesc.stride; + + // note that the _evaluatorCache can be NULL and thus + // the evaluatorInstance can be NULL + // (for uninstantiatable kernels CPU,TBB etc) + Evaluator const *instance = GetEvaluator( + _evaluatorCache, srcDesc, dstDesc, _deviceContext); + + Evaluator::EvalStencils(_vertexBuffer, srcDesc, + _vertexBuffer, dstDesc, + _vertexStencilTables, + instance, _deviceContext); + + if (_varyingDesc.length > 0) { + VertexBufferDescriptor srcDesc = _varyingDesc; + VertexBufferDescriptor dstDesc(srcDesc); + dstDesc.offset += numControlVertices * dstDesc.stride; + + instance = GetEvaluator( + _evaluatorCache, srcDesc, dstDesc, _deviceContext); + + if (_varyingBuffer) { + // non-interleaved + Evaluator::EvalStencils(_varyingBuffer, srcDesc, + _varyingBuffer, dstDesc, + _varyingStencilTables, + instance, _deviceContext); + } else { + // interleaved + Evaluator::EvalStencils(_vertexBuffer, srcDesc, + _vertexBuffer, dstDesc, + _varyingStencilTables, + instance, _deviceContext); + } + } } virtual void Synchronize() { - _computeController->Synchronize(); + Evaluator::Synchronize(_deviceContext); } virtual DrawContext * GetDrawContext() { @@ -333,14 +485,20 @@ private: _drawContext = DrawContext::Create(_patchTables, numElements, _deviceContext); - _computeContext = ComputeContext::Create(vertexStencils, - varyingStencils, - _deviceContext); // numvertices = coarse verts + refined verts + gregory basis verts _numVertices = vertexStencils->GetNumControlVertices() + vertexStencils->GetNumStencils(); + // convert to device stenciltables if necessary. + _vertexStencilTables = + convertToCompatibleStencilTables( + vertexStencils, _deviceContext); + _varyingStencilTables = + convertToCompatibleStencilTables( + varyingStencils, _deviceContext); + + // FIXME: we do extra copyings for Far::Stencils. delete vertexStencils; delete varyingStencils; } @@ -365,14 +523,17 @@ private: int _numVertices; - VertexBuffer * _vertexBuffer, - * _varyingBuffer; + VertexBuffer * _vertexBuffer; + VertexBuffer * _varyingBuffer; - ComputeContext * _computeContext; - ComputeController * _computeController; + VertexBufferDescriptor _vertexDesc; + VertexBufferDescriptor _varyingDesc; + + StencilTables const * _vertexStencilTables; + StencilTables const * _varyingStencilTables; + EvaluatorCache * _evaluatorCache; DrawContext *_drawContext; - DeviceContext *_deviceContext; }; diff --git a/opensubdiv/osd/ompComputeController.cpp b/opensubdiv/osd/ompComputeController.cpp deleted file mode 100644 index fd074689..00000000 --- a/opensubdiv/osd/ompComputeController.cpp +++ /dev/null @@ -1,104 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" -#include "../osd/ompComputeController.h" -#include "../osd/ompKernel.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -OmpComputeController::OmpComputeController(int numThreads) { - - _numThreads = (numThreads == -1) ? omp_get_max_threads() : numThreads; -} - -void -OmpComputeController::ApplyStencilTableKernel( - ComputeContext const *context) const { - - assert(context); - - Far::StencilTables const * vertexStencils = context->GetVertexStencilTables(); - - if (vertexStencils and _currentBindState.vertexBuffer) { - VertexBufferDescriptor srcDesc = _currentBindState.vertexDesc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += vertexStencils->GetNumControlVertices() * dstDesc.stride; - - int start = 0; - int end = vertexStencils->GetNumStencils(); - - if (end > start) { - OmpComputeStencils(_currentBindState.vertexBuffer, - srcDesc, - _currentBindState.vertexBuffer, - dstDesc, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - start, - end); - } - } - - Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); - - if (varyingStencils and _currentBindState.varyingBuffer) { - VertexBufferDescriptor srcDesc = _currentBindState.varyingDesc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += varyingStencils->GetNumControlVertices() * dstDesc.stride; - - int start = 0; - int end = varyingStencils->GetNumStencils(); - - if (end > start) { - OmpComputeStencils(_currentBindState.varyingBuffer, - srcDesc, - _currentBindState.varyingBuffer, - dstDesc, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - start, - end); - } - } -} - -void -OmpComputeController::Synchronize() { - // XXX: -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/ompComputeController.h b/opensubdiv/osd/ompComputeController.h deleted file mode 100644 index 29831d4c..00000000 --- a/opensubdiv/osd/ompComputeController.h +++ /dev/null @@ -1,184 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_OMP_COMPUTE_CONTROLLER_H -#define OSD_OMP_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/cpuComputeContext.h" -#include "../osd/vertexDescriptor.h" - -#ifdef OPENSUBDIV_HAS_OPENMP - #include -#endif - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching OpenMP subdivision kernels. -/// -/// OmpComputeController is a compute controller class to launch OpenMP -/// threaded subdivision kernels. It requires CpuVertexBufferInterface -/// as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class OmpComputeController { -public: - typedef CpuComputeContext ComputeContext; - - /// Constructor. - /// - /// @param numThreads specifies how many openmp parallel threads to use. - /// -1 attempts to use all available processors. - /// - explicit OmpComputeController(int numThreads=-1); - - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( CpuComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - omp_set_num_threads(_numThreads); - - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - - ApplyStencilTableKernel(context); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(CpuComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context) const; - - template - void bind( VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc, - VertexBufferDescriptor const * varyingDesc ) { - - // if the vertex buffer descriptor is specified, use it. - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (vertexDesc) { - _currentBindState.vertexDesc = *vertexDesc; - } else { - int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0; - _currentBindState.vertexDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - if (varyingDesc) { - _currentBindState.varyingDesc = *varyingDesc; - } else { - int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0; - _currentBindState.varyingDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.vertexBuffer = vertexBuffer ? - vertexBuffer->BindCpuBuffer() : 0; - - _currentBindState.varyingBuffer = varyingBuffer ? - varyingBuffer->BindCpuBuffer() : 0; - } - - - void unbind() { - _currentBindState.Reset(); - } - -private: - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : vertexBuffer(0), varyingBuffer(0) { } - - void Reset() { - vertexBuffer = varyingBuffer = 0; - vertexDesc.Reset(); - varyingDesc.Reset(); - } - - float * vertexBuffer, - * varyingBuffer; - - VertexBufferDescriptor vertexDesc, - varyingDesc; - }; - - BindState _currentBindState; - int _numThreads; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_OMP_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/ompEvalStencilsController.cpp b/opensubdiv/osd/ompEvalStencilsController.cpp deleted file mode 100644 index c2f12a2b..00000000 --- a/opensubdiv/osd/ompEvalStencilsController.cpp +++ /dev/null @@ -1,155 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/ompEvalStencilsController.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -OmpEvalStencilsController::OmpEvalStencilsController(int numThreads) { - - _numThreads = (numThreads == -1) ? omp_get_num_procs() : numThreads; -} - -OmpEvalStencilsController::~OmpEvalStencilsController() { -} - -int -OmpEvalStencilsController::_UpdateValues( CpuEvalStencilsContext * context ) { - - int result=0; - - Far::LimitStencilTables const * stencils = context->GetStencilTables(); - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return result; - - VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc, - outDesc = _currentBindState.outputDataDesc; - - // make sure that we have control data to work with - if (not ctrlDesc.CanEval(outDesc)) - return 0; - - float const * ctrl = _currentBindState.controlData + ctrlDesc.offset; - - if (not ctrl) - return result; - -#pragma omp parallel for - for (int i=0; iGetSizes()[i]; - Far::Index offset = stencils->GetOffsets()[i]; - - Far::Index const * index = &stencils->GetControlIndices().at(offset); - - float const * weight = &stencils->GetWeights().at(offset); - - float * out = _currentBindState.outputData + i * outDesc.stride + outDesc.offset; - - memset(out, 0, outDesc.length*sizeof(float)); - - for (int j=0; jGetStencilTables(); - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return result; - - VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc, - duDesc = _currentBindState.outputDuDesc, - dvDesc = _currentBindState.outputDvDesc; - - // make sure that we have control data to work with - if (not (ctrlDesc.CanEval(duDesc) and ctrlDesc.CanEval(dvDesc))) - return 0; - - float const * ctrl = _currentBindState.controlData + ctrlDesc.offset; - - if (not ctrl) - return result; - -#pragma omp parallel for - for (int i=0; iGetSizes()[i]; - Far::Index offset = stencils->GetOffsets()[i]; - - Far::Index const * index = &stencils->GetControlIndices().at(offset); - - float const * duweight = &stencils->GetDuWeights().at(offset), - * dvweight = &stencils->GetDvWeights().at(offset); - - float * du = _currentBindState.outputUDeriv + i * duDesc.stride + duDesc.offset, - * dv = _currentBindState.outputVDeriv + i * dvDesc.stride + dvDesc.offset; - - memset(du, 0, duDesc.length*sizeof(float)); - memset(dv, 0, dvDesc.length*sizeof(float)); - - for (int j=0; j -#endif - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// -/// \brief CPU stencils evaluation controller -/// -/// CpuStencilsController is a compute controller class to launch -/// single threaded CPU stencil evalution kernels. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class OmpEvalStencilsController { -public: - - /// \brief Constructor. - /// - /// @param numThreads specifies how many openmp parallel threads to use. - /// -1 attempts to use all available processors. - /// - OmpEvalStencilsController(int numThreads=-1); - - /// \brief Destructor. - ~OmpEvalStencilsController(); - - - /// \brief Applies stencil weights to the control vertex data - /// - /// Applies the stencil weights to the control vertex data to evaluate the - /// interpolated limit positions at the parametric locations of the stencils - /// - /// @param context the CpuEvalStencilsContext with the stencil weights - /// - /// @param controlDataDesc vertex buffer descriptor for the control vertex data - /// - /// @param controlVertices vertex buffer with the control vertices data - /// - /// @param outputDataDesc vertex buffer descriptor for the output vertex data - /// - /// @param outputData output vertex buffer for the interpolated data - /// - template - int UpdateValues( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDataDesc, OUTPUT_BUFFER *outputData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - omp_set_num_threads(_numThreads); - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputData( outputDataDesc, outputData ); - - int n = _UpdateValues( context ); - - unbind(); - - return n; - } - - /// \brief Applies derivative stencil weights to the control vertex data - /// - /// Computes the U and V derivative stencils to the control vertex data at - /// the parametric locations contained in each stencil - /// - /// @param context the CpuEvalStencilsContext with the stencil weights - /// - /// @param controlDataDesc vertex buffer descriptor for the control vertex data - /// - /// @param controlVertices vertex buffer with the control vertices data - /// - /// @param outputDuDesc vertex buffer descriptor for the U derivative output data - /// - /// @param outputDuData output vertex buffer for the U derivative data - /// - /// @param outputDvDesc vertex buffer descriptor for the V deriv output data - /// - /// @param outputDvData output vertex buffer for the V derivative data - /// - template - int UpdateDerivs( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDuDesc, OUTPUT_BUFFER *outputDuData, - VertexBufferDescriptor const & outputDvDesc, OUTPUT_BUFFER *outputDvData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputDerivData( outputDuDesc, outputDuData, outputDvDesc, outputDvData ); - - int n = _UpdateDerivs( context ); - - unbind(); - - return n; - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - /// \brief Binds control vertex data buffer - template - void bindControlData(VertexBufferDescriptor const & controlDataDesc, VERTEX_BUFFER *controlData ) { - - _currentBindState.controlData = controlData ? controlData->BindCpuBuffer() : 0; - _currentBindState.controlDataDesc = controlDataDesc; - - } - - /// \brief Binds output vertex data buffer - template - void bindOutputData( VertexBufferDescriptor const & outputDataDesc, VERTEX_BUFFER *outputData ) { - - _currentBindState.outputData = outputData ? outputData->BindCpuBuffer() : 0; - _currentBindState.outputDataDesc = outputDataDesc; - } - - /// \brief Binds output derivative vertex data buffer - template - void bindOutputDerivData( VertexBufferDescriptor const & outputDuDesc, VERTEX_BUFFER *outputDu, - VertexBufferDescriptor const & outputDvDesc, VERTEX_BUFFER *outputDv ) { - - _currentBindState.outputUDeriv = outputDu ? outputDu ->BindCpuBuffer() : 0; - _currentBindState.outputVDeriv = outputDv ? outputDv->BindCpuBuffer() : 0; - _currentBindState.outputDuDesc = outputDuDesc; - _currentBindState.outputDvDesc = outputDvDesc; - } - - /// \brief Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - } - -private: - - int _UpdateValues( CpuEvalStencilsContext * context ); - int _UpdateDerivs( CpuEvalStencilsContext * context ); - - int _numThreads; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of vertex buffers. - struct BindState { - - BindState() : controlData(0), outputData(0), outputUDeriv(0), outputVDeriv(0) { } - - void Reset() { - controlData = outputData = outputUDeriv = outputVDeriv = NULL; - controlDataDesc.Reset(); - outputDataDesc.Reset(); - outputDuDesc.Reset(); - outputDvDesc.Reset(); - } - - // transient mesh data - VertexBufferDescriptor controlDataDesc, - outputDataDesc, - outputDuDesc, - outputDvDesc; - - float * controlData, - * outputData, - * outputUDeriv, - * outputVDeriv; - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // FAR_OMP_EVALSTENCILS_CONTROLLER_H diff --git a/opensubdiv/osd/cpuEvalStencilsContext.cpp b/opensubdiv/osd/ompEvaluator.cpp similarity index 53% rename from opensubdiv/osd/cpuEvalStencilsContext.cpp rename to opensubdiv/osd/ompEvaluator.cpp index 40f2af54..36db0c1b 100644 --- a/opensubdiv/osd/cpuEvalStencilsContext.cpp +++ b/opensubdiv/osd/ompEvaluator.cpp @@ -1,5 +1,5 @@ // -// Copyright 2013 Pixar +// Copyright 2015 Pixar // // Licensed under the Apache License, Version 2.0 (the "Apache License") // with the following modification; you may not use this file except in @@ -22,23 +22,48 @@ // language governing permissions and limitations under the Apache License. // -#include "../osd/cpuEvalStencilsContext.h" +#include "../osd/ompEvaluator.h" +#include "../osd/ompKernel.h" +#include namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Osd { -CpuEvalStencilsContext::CpuEvalStencilsContext(Far::LimitStencilTables const *stencils) : - _stencils(stencils) { +/* static */ +bool +OmpEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end) { + if (end <= start) return true; + + // we can probably expand cpuKernel.cpp to here. + OmpEvalStencils(src, srcDesc, dst, dstDesc, + sizes, offsets, indices, weights, start, end); + + return true; } -CpuEvalStencilsContext * -CpuEvalStencilsContext::Create(Far::LimitStencilTables const *stencils) { - return new CpuEvalStencilsContext(stencils); +/* static */ +void +OmpEvaluator::Synchronize(void * /*deviceContext*/) { + // we use "omp parallel for" and it synchronizes by itself } -} // end namespace Osd +/* static */ +void +OmpEvaluator::SetNumThreads(int numThreads) { + omp_set_num_threads(numThreads); +} + +} // end namespace Osd } // end namespace OPENSUBDIV_VERSION } // end namespace OpenSubdiv diff --git a/opensubdiv/osd/ompEvaluator.h b/opensubdiv/osd/ompEvaluator.h new file mode 100644 index 00000000..06c492d8 --- /dev/null +++ b/opensubdiv/osd/ompEvaluator.h @@ -0,0 +1,114 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_OMP_EVALUATOR_H +#define OPENSUBDIV_OSD_OMP_EVALUATOR_H + +#include "../version.h" + +#include + +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +class OmpEvaluator { +public: + /// \brief Generic static compute function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. + /// + /// @param instance not used in the omp kernel + /// (declared as a typed pointer to prevent + /// undesirable template resolution) + /// + /// @param deviceContext not used in the omp kernel + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + OmpEvaluator const * instance = NULL, + void * deviceContext = NULL) { + (void)instance; // unused; + (void)deviceContext; // unused; + + return EvalStencils(srcVertexBuffer->BindCpuBuffer(), + srcDesc, + dstVertexBuffer->BindCpuBuffer(), + dstDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + /// stencil compute function. + static bool EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end); + + static void Synchronize(void *deviceContext = NULL); + + static void SetNumThreads(int numThreads); +}; + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_OMP_EVALUATOR_H diff --git a/opensubdiv/osd/ompKernel.cpp b/opensubdiv/osd/ompKernel.cpp index 100dbf01..606578c6 100644 --- a/opensubdiv/osd/ompKernel.cpp +++ b/opensubdiv/osd/ompKernel.cpp @@ -73,16 +73,15 @@ copy(float *dst, int dstIndex, const float *src, // XXXX manuelk this should be optimized further by using SIMD - considering // OMP is somewhat obsolete - this is probably not worth it. void -OmpComputeStencils(float const * src, - VertexBufferDescriptor const &srcDesc, - float * dst, - VertexBufferDescriptor const &dstDesc, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end) { - +OmpEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end) { if (start > 0) { sizes += start; indices += offsets[start]; diff --git a/opensubdiv/osd/ompKernel.h b/opensubdiv/osd/ompKernel.h index 905217f8..efaf81d0 100644 --- a/opensubdiv/osd/ompKernel.h +++ b/opensubdiv/osd/ompKernel.h @@ -22,8 +22,8 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_OMP_KERNEL_H -#define OSD_OMP_KERNEL_H +#ifndef OPENSUBDIV_OSD_OMP_KERNEL_H +#define OPENSUBDIV_OSD_OMP_KERNEL_H #include "../version.h" @@ -35,15 +35,15 @@ namespace Osd { struct VertexBufferDescriptor; void -OmpComputeStencils(float const * src, - VertexBufferDescriptor const &srcDesc, - float * dst, - VertexBufferDescriptor const &dstDesc, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end); +OmpEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end); } // end namespace Osd @@ -52,4 +52,4 @@ using namespace OPENSUBDIV_VERSION; } // end namespace OpenSubdiv -#endif // OSD_OMP_KERNEL_H +#endif // OPENSUBDIV_OSD_OMP_KERNEL_H diff --git a/opensubdiv/osd/ompSmoothNormalController.cpp b/opensubdiv/osd/ompSmoothNormalController.cpp deleted file mode 100644 index 8de17494..00000000 --- a/opensubdiv/osd/ompSmoothNormalController.cpp +++ /dev/null @@ -1,185 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/ompSmoothNormalController.h" - -#ifdef OPENSUBDIV_HAS_OPENMP - #include -#endif - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -inline void -cross(float *n, const float *p0, const float *p1, const float *p2) { - - float a[3] = { p1[0]-p0[0], p1[1]-p0[1], p1[2]-p0[2] }; - float b[3] = { p2[0]-p0[0], p2[1]-p0[1], p2[2]-p0[2] }; - n[0] = a[1]*b[2]-a[2]*b[1]; - n[1] = a[2]*b[0]-a[0]*b[2]; - n[2] = a[0]*b[1]-a[1]*b[0]; - - float rn = 1.0f/sqrtf(n[0]*n[0] + n[1]*n[1] + n[2]*n[2]); - n[0] *= rn; - n[1] *= rn; - n[2] *= rn; -} - -void OmpSmoothNormalController::_smootheNormals( - CpuSmoothNormalContext * context) { - - VertexBufferDescriptor const & iDesc = context->GetInputVertexDescriptor(), - & oDesc = context->GetOutputVertexDescriptor(); - - assert(iDesc.length==3 and oDesc.length==3); - - float * oBuffer = context->GetCurrentOutputVertexBuffer() + oDesc.offset; - if (context->GetResetMemory()) { - -#pragma omp parallel for - for (int j=0; jGetNumVertices(); ++j) { - float * ptr = oBuffer + j * oDesc.stride; - memset(ptr, 0, oDesc.length*sizeof(float)); - } - } - - { // note: quads only ! - float const * iBuffer = context->GetCurrentInputVertexBuffer() + iDesc.offset; - - Far::Index const * fverts = context->GetFaceVertices(); - - int nfaces = context->GetNumFaces(); - -#pragma omp parallel for - for (int i=0; iGetInputVertexDescriptor(), - & oDesc = context->GetOutputVertexDescriptor(); - - assert(iDesc.length==3 and oDesc.length==3); - - float const * iBuffer = context->GetCurrentInputVertexBuffer() + iDesc.offset; - float * oBuffer = context->GetCurrentOutputVertexBuffer() + oDesc.offset; - - Far::PatchTables::PTable const & verts = context->GetControlVertices(); - - Far::PatchTables::PatchArrayVector const & parrays = context->GetPatchArrayVector(); - - if (verts.empty() or parrays.empty() or (not iBuffer) or (not oBuffer)) { - return; - } - - for (int i=0; i<(int)parrays.size(); ++i) { - - Far::PatchTables::PatchArray const & pa = parrays[i]; - - Far::PatchTables::Type type = pa.GetDescriptor().GetType(); - - - if (type==Far::PatchTables::QUADS or type==Far::PatchTables::TRIANGLES) { - - int nv = Far::PatchTables::Descriptor::GetNumControlVertices(type); - - // if necessary, reset all normal values to 0 - if (context->GetResetMemory()) { -#pragma omp parallel for - for (int j=0; jGetNumVertices(); ++j) { - float * ptr = oBuffer + j * oDesc.stride; - memset(ptr, 0, oDesc.length*sizeof(float)); - } - } - - -#pragma omp parallel for - for (int j=0; j<(int)pa.GetNumPatches(); ++j) { - - int idx = pa.GetVertIndex() + j*nv; - - float const * p0 = iBuffer + verts[idx+0]*iDesc.stride, - * p1 = iBuffer + verts[idx+1]*iDesc.stride, - * p2 = iBuffer + verts[idx+2]*iDesc.stride; - - // compute face normal - float n[3]; - cross( n, p0, p1, p2 ); - - // add normal to all vertices of the face - for (int k=0; k - void SmootheNormals( CpuSmoothNormalContext * context, - VERTEX_BUFFER * iBuffer, int iOfs, - VERTEX_BUFFER * oBuffer, int oOfs ) { - - if (not context) return; - - context->Bind(iBuffer, iOfs, oBuffer, oOfs); - - _smootheNormals(context); - - context->Unbind(); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -private: - - void _smootheNormals(CpuSmoothNormalContext * context); -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_OMP_SMOOTHNORMAL_CONTROLLER_H diff --git a/opensubdiv/osd/tbbComputeController.cpp b/opensubdiv/osd/tbbComputeController.cpp deleted file mode 100644 index 10b6dd41..00000000 --- a/opensubdiv/osd/tbbComputeController.cpp +++ /dev/null @@ -1,114 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include - -#include "../far/stencilTables.h" -#include "../osd/cpuComputeContext.h" -#include "../osd/tbbComputeController.h" -#include "../osd/tbbKernel.h" - -#ifdef OPENSUBDIV_HAS_TBB - #include -#endif - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -TbbComputeController::TbbComputeController(int numThreads) - : _numThreads(numThreads) { - - if(_numThreads == -1) - tbb::task_scheduler_init init; - else - tbb::task_scheduler_init init(numThreads); -} - -void -TbbComputeController::ApplyStencilTableKernel( - ComputeContext const *context) const { - - assert(context); - - Far::StencilTables const * vertexStencils = context->GetVertexStencilTables(); - - if (vertexStencils and _currentBindState.vertexBuffer) { - VertexBufferDescriptor srcDesc = _currentBindState.vertexDesc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += vertexStencils->GetNumControlVertices() * dstDesc.stride; - - int start = 0; - int end = vertexStencils->GetNumStencils(); - - if (end > start) { - TbbComputeStencils(_currentBindState.vertexBuffer, - srcDesc, - _currentBindState.vertexBuffer, - dstDesc, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - start, - end); - } - } - - Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); - - if (varyingStencils and _currentBindState.varyingBuffer) { - VertexBufferDescriptor srcDesc = _currentBindState.varyingDesc; - VertexBufferDescriptor dstDesc(srcDesc); - dstDesc.offset += varyingStencils->GetNumControlVertices() * dstDesc.stride; - - int start = 0; - int end = varyingStencils->GetNumStencils(); - - if (end > start) { - TbbComputeStencils(_currentBindState.varyingBuffer, - srcDesc, - _currentBindState.varyingBuffer, - dstDesc, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - start, - end); - } - } -} - -void -TbbComputeController::Synchronize() { - // XXX: -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv - diff --git a/opensubdiv/osd/tbbComputeController.h b/opensubdiv/osd/tbbComputeController.h deleted file mode 100644 index 6bb13d87..00000000 --- a/opensubdiv/osd/tbbComputeController.h +++ /dev/null @@ -1,177 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_TBB_COMPUTE_CONTROLLER_H -#define OSD_TBB_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/cpuComputeContext.h" -#include "../osd/vertexDescriptor.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching TBB subdivision kernels. -/// -/// TbbComputeController is a compute controller class to launch TBB -/// threaded subdivision kernels. It requires CpuVertexBufferInterface -/// as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class TbbComputeController { -public: - typedef CpuComputeContext ComputeContext; - - /// Constructor. - /// - /// @param numThreads specifies how many openmp parallel threads to use. - /// -1 attempts to use all available processors. - /// - explicit TbbComputeController(int numThreads=-1); - - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( CpuComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - - ApplyStencilTableKernel(context); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(CpuComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context) const; - - template - void bind( VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc, - VertexBufferDescriptor const * varyingDesc ) { - - // if the vertex buffer descriptor is specified, use it. - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (vertexDesc) { - _currentBindState.vertexDesc = *vertexDesc; - } else { - int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0; - _currentBindState.vertexDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - if (varyingDesc) { - _currentBindState.varyingDesc = *varyingDesc; - } else { - int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0; - _currentBindState.varyingDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.vertexBuffer = vertexBuffer ? - vertexBuffer->BindCpuBuffer(): 0; - - _currentBindState.varyingBuffer = varyingBuffer ? - varyingBuffer->BindCpuBuffer() : 0; - } - - void unbind() { - _currentBindState.Reset(); - } - -private: - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : vertexBuffer(0), varyingBuffer(0) { } - - void Reset() { - vertexBuffer = varyingBuffer = 0; - vertexDesc.Reset(); - varyingDesc.Reset(); - } - - float * vertexBuffer, - * varyingBuffer; - - VertexBufferDescriptor vertexDesc, - varyingDesc; - }; - - BindState _currentBindState; - int _numThreads; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_TBB_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/tbbEvalStencilsController.cpp b/opensubdiv/osd/tbbEvalStencilsController.cpp deleted file mode 100644 index ecaffaf1..00000000 --- a/opensubdiv/osd/tbbEvalStencilsController.cpp +++ /dev/null @@ -1,200 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/tbbEvalStencilsController.h" - -#include -#include - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -#define grain_size 200 - -TbbEvalStencilsController::TbbEvalStencilsController(int numThreads) { - - _numThreads = numThreads > 0 ? numThreads : tbb::task_scheduler_init::automatic; - - tbb::task_scheduler_init init(numThreads); -} - -TbbEvalStencilsController::~TbbEvalStencilsController() { -} - - -class StencilKernel { - -public: - enum Mode { UNDEFINED, POINT, U_DERIV, V_DERIV }; - - StencilKernel( Far::LimitStencilTables const * stencils, - VertexBufferDescriptor ctrlDesc, - float const * ctrlData ) : - _stencils(stencils), - _mode(UNDEFINED), - _ctrlDesc(ctrlDesc), - _length(0), - _outStride(0), - _outData(0) { - - _ctrlData = ctrlData + ctrlDesc.offset; - } - - bool SetOutput(Mode mode, VertexBufferDescriptor outDesc, float * outData) { - - if (_ctrlDesc.CanEval(outDesc)) { - _mode = mode; - _length = outDesc.length; - _outStride = outDesc.stride; - _outData = outData + outDesc.offset; - return true; - } - return false; - } - - void operator() (tbb::blocked_range const &r) const { - - assert(_stencils and _ctrlData and _length and _outStride and _outData); - - Far::Index offset = _stencils->GetOffsets()[r.begin()]; - - unsigned char const * sizes = &_stencils->GetSizes()[r.begin()]; - Far::Index const * index = &_stencils->GetControlIndices()[offset]; - - float const * weight; - - switch (_mode) { - case POINT : weight = &_stencils->GetWeights()[offset]; break; - case U_DERIV : weight = &_stencils->GetDuWeights()[offset]; break; - case V_DERIV : weight = &_stencils->GetDvWeights()[offset]; break; - default: - return; - } - assert( weight); - - float * out = _outData + r.begin() * _outStride; - - for (int i=r.begin(); iGetStencilTables(); - if (not stencils) - return 0; - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return 0; - - StencilKernel kernel( stencils, _currentBindState.controlDataDesc, - _currentBindState.controlData ); - - - if (not kernel.SetOutput( StencilKernel::POINT, - _currentBindState.outputDataDesc, - _currentBindState.outputData )) - return 0; - - tbb::blocked_range range(0, nstencils, grain_size); - - tbb::parallel_for(range, kernel); - - return nstencils; -} - -int -TbbEvalStencilsController::_UpdateDerivs( CpuEvalStencilsContext * context ) { - - Far::LimitStencilTables const * stencils = context->GetStencilTables(); - if (not stencils) - return 0; - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return 0; - - tbb::blocked_range range(0, nstencils, grain_size); - - StencilKernel kernel( stencils, _currentBindState.controlDataDesc, - _currentBindState.controlData ); - - if (not kernel.SetOutput( StencilKernel::U_DERIV, - _currentBindState.outputDuDesc, - _currentBindState.outputUDeriv ) ) - return 0; - - tbb::parallel_for(range, kernel); - - if (not kernel.SetOutput( StencilKernel::V_DERIV, - _currentBindState.outputDvDesc, - _currentBindState.outputVDeriv ) ) - return 0; - - tbb::parallel_for(range, kernel); - - return nstencils; -} - -void -TbbEvalStencilsController::Synchronize() { -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/tbbEvalStencilsController.h b/opensubdiv/osd/tbbEvalStencilsController.h deleted file mode 100644 index ce2cac0c..00000000 --- a/opensubdiv/osd/tbbEvalStencilsController.h +++ /dev/null @@ -1,216 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef FAR_TBB_EVALSTENCILS_CONTROLLER_H -#define FAR_TBB_EVALSTENCILS_CONTROLLER_H - -#include "../version.h" - -#include "../osd/cpuEvalStencilsContext.h" - - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// -/// \brief CPU stencils evaluation controller -/// -/// CpuStencilsController is a compute controller class to launch -/// single threaded CPU stencil evalution kernels. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class TbbEvalStencilsController { -public: - - /// \brief Constructor. - /// - /// @param numThreads specifies how many openmp parallel threads to use. - /// -1 attempts to use all available processors. - /// - TbbEvalStencilsController(int numThreads=-1); - - /// \brief Destructor. - ~TbbEvalStencilsController(); - - - /// \brief Applies stencil weights to the control vertex data - /// - /// Applies the stencil weights to the control vertex data to evaluate the - /// interpolated limit positions at the parametric locations of the stencils - /// - /// @param context the CpuEvalStencilsContext with the stencil weights - /// - /// @param controlDataDesc vertex buffer descriptor for the control vertex data - /// - /// @param controlVertices vertex buffer with the control vertices data - /// - /// @param outputDataDesc vertex buffer descriptor for the output vertex data - /// - /// @param outputData output vertex buffer for the interpolated data - /// - template - int UpdateValues( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDataDesc, OUTPUT_BUFFER *outputData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputData( outputDataDesc, outputData ); - - int n = _UpdateValues( context ); - - unbind(); - - return n; - } - - /// \brief Applies derivative stencil weights to the control vertex data - /// - /// Computes the U and V derivative stencils to the control vertex data at - /// the parametric locations contained in each stencil - /// - /// @param context the CpuEvalStencilsContext with the stencil weights - /// - /// @param controlDataDesc vertex buffer descriptor for the control vertex data - /// - /// @param controlVertices vertex buffer with the control vertices data - /// - /// @param outputDuDesc vertex buffer descriptor for the U derivative output data - /// - /// @param outputDuData output vertex buffer for the U derivative data - /// - /// @param outputDvDesc vertex buffer descriptor for the V deriv output data - /// - /// @param outputDvData output vertex buffer for the V derivative data - /// - template - int UpdateDerivs( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDuDesc, OUTPUT_BUFFER *outputDuData, - VertexBufferDescriptor const & outputDvDesc, OUTPUT_BUFFER *outputDvData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputDerivData( outputDuDesc, outputDuData, outputDvDesc, outputDvData ); - - int n = _UpdateDerivs( context ); - - unbind(); - - return n; - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - /// \brief Binds control vertex data buffer - template - void bindControlData(VertexBufferDescriptor const & controlDataDesc, VERTEX_BUFFER *controlData ) { - - _currentBindState.controlData = controlData ? controlData->BindCpuBuffer() : 0; - _currentBindState.controlDataDesc = controlDataDesc; - - } - - /// \brief Binds output vertex data buffer - template - void bindOutputData( VertexBufferDescriptor const & outputDataDesc, VERTEX_BUFFER *outputData ) { - - _currentBindState.outputData = outputData ? outputData->BindCpuBuffer() : 0; - _currentBindState.outputDataDesc = outputDataDesc; - } - - /// \brief Binds output derivative vertex data buffer - template - void bindOutputDerivData( VertexBufferDescriptor const & outputDuDesc, VERTEX_BUFFER *outputDu, - VertexBufferDescriptor const & outputDvDesc, VERTEX_BUFFER *outputDv ) { - - _currentBindState.outputUDeriv = outputDu ? outputDu ->BindCpuBuffer() : 0; - _currentBindState.outputVDeriv = outputDv ? outputDv->BindCpuBuffer() : 0; - _currentBindState.outputDuDesc = outputDuDesc; - _currentBindState.outputDvDesc = outputDvDesc; - } - - /// \brief Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - } - -private: - - int _UpdateValues( CpuEvalStencilsContext * context ); - int _UpdateDerivs( CpuEvalStencilsContext * context ); - - int _numThreads; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of vertex buffers. - struct BindState { - - BindState() : controlData(0), outputData(0), outputUDeriv(0), outputVDeriv(0) { } - - void Reset() { - controlData = outputData = outputUDeriv = outputVDeriv = NULL; - controlDataDesc.Reset(); - outputDataDesc.Reset(); - outputDuDesc.Reset(); - outputDvDesc.Reset(); - } - - // transient mesh data - VertexBufferDescriptor controlDataDesc, - outputDataDesc, - outputDuDesc, - outputDvDesc; - - float * controlData, - * outputData, - * outputUDeriv, - * outputVDeriv; - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // FAR_TBB_EVALSTENCILS_CONTROLLER_H diff --git a/opensubdiv/osd/computeController.h b/opensubdiv/osd/tbbEvaluator.cpp similarity index 54% rename from opensubdiv/osd/computeController.h rename to opensubdiv/osd/tbbEvaluator.cpp index b81ccaf5..71fc1528 100644 --- a/opensubdiv/osd/computeController.h +++ b/opensubdiv/osd/tbbEvaluator.cpp @@ -1,5 +1,5 @@ // -// Copyright 2013 Pixar +// Copyright 2015 Pixar // // Licensed under the Apache License, Version 2.0 (the "Apache License") // with the following modification; you may not use this file except in @@ -22,46 +22,51 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_COMPUTE_CONTROLLER_H -#define OSD_COMPUTE_CONTROLLER_H +#include "../osd/tbbEvaluator.h" +#include "../osd/tbbKernel.h" -#include "../version.h" +#include namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Osd { -/*! - \page sequence_page API sequence diagrams +/* static */ +bool +TbbEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end) { + if (end <= start) return true; - This section describes the typical sequence of initialization and drawing - animated prims using OpenSubdiv. + TbbEvalStencils(src, srcDesc, dst, dstDesc, + sizes, offsets, indices, weights, start, end); - \section init_sec Initialize + return true; +} - \image html OsdCreateSequence.png +/* static */ +void +TbbEvaluator::Synchronize(void *) { +} - \section draw_sec Refine and Draw - - \image html OsdRefineDrawSequence.png - - */ - -// XXX: do we really need this base class? -class ComputeController { -public: - virtual ~ComputeController() {} - -protected: - ComputeController() {} -}; +/* static */ +void +TbbEvaluator::SetNumThreads(int numThreads) { + if (numThreads == -1) { + tbb::task_scheduler_init init; + } else { + tbb::task_scheduler_init init(numThreads); + } +} } // end namespace Osd } // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - } // end namespace OpenSubdiv - -#endif // OSD_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/tbbEvaluator.h b/opensubdiv/osd/tbbEvaluator.h new file mode 100644 index 00000000..8521cc82 --- /dev/null +++ b/opensubdiv/osd/tbbEvaluator.h @@ -0,0 +1,112 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_TBB_EVALUATOR_H +#define OPENSUBDIV_OSD_TBB_EVALUATOR_H + +#include "../version.h" +#include "../osd/vertexDescriptor.h" + +#include + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +class TbbEvaluator { +public: + /// \brief Generic static stencil eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. + /// + /// @param instance not used in the tbb kernel + /// (declared as a typed pointer to prevent + /// undesirable template resolution) + /// + /// @param deviceContext not used in the tbb kernel + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + TbbEvaluator const *instance = NULL, + void *deviceContext = NULL) { + (void)instance; // unused + (void)deviceContext; // unused + + return EvalStencils(srcVertexBuffer->BindCpuBuffer(), + srcDesc, + dstVertexBuffer->BindCpuBuffer(), + dstDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + static bool EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char *sizes, + const int *offsets, + const int *indices, + const float *weights, + int start, + int end); + + static void Synchronize(void *deviceContext = NULL); + + static void SetNumThreads(int numThreads); +}; + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_TBB_EVALUATOR_H diff --git a/opensubdiv/osd/tbbKernel.cpp b/opensubdiv/osd/tbbKernel.cpp index 86e910aa..cf82cdb3 100644 --- a/opensubdiv/osd/tbbKernel.cpp +++ b/opensubdiv/osd/tbbKernel.cpp @@ -161,15 +161,15 @@ public: }; void -TbbComputeStencils(float const * src, - VertexBufferDescriptor const &srcDesc, - float * dst, - VertexBufferDescriptor const &dstDesc, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end) { +TbbEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end) { if (start > 0) { sizes += start; diff --git a/opensubdiv/osd/tbbKernel.h b/opensubdiv/osd/tbbKernel.h index c0a821b6..7d7649ad 100644 --- a/opensubdiv/osd/tbbKernel.h +++ b/opensubdiv/osd/tbbKernel.h @@ -22,8 +22,8 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_TBB_KERNEL_H -#define OSD_TBB_KERNEL_H +#ifndef OPENSUBDIV_OSD_TBB_KERNEL_H +#define OPENSUBIDV_OSD_TBB_KERNEL_H #include "../version.h" @@ -35,15 +35,15 @@ namespace Osd { struct VertexBufferDescriptor; void -TbbComputeStencils(float const * src, - VertexBufferDescriptor const &srcDesc, - float * dst, - VertexBufferDescriptor const &dstDesc, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end); +TbbEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end); } // end namespace Osd @@ -52,4 +52,4 @@ using namespace OPENSUBDIV_VERSION; } // end namespace OpenSubdiv -#endif // OSD_TBB_KERNEL_H +#endif // OPENSUBDIV_OSD_TBB_KERNEL_H diff --git a/regression/osd_regression/main.cpp b/regression/osd_regression/main.cpp index aacbbbf2..552c60da 100644 --- a/regression/osd_regression/main.cpp +++ b/regression/osd_regression/main.cpp @@ -46,9 +46,9 @@ GLFWwindow* g_window=0; #include #include +#include #include -#include -#include + #include @@ -268,17 +268,10 @@ static int checkMeshCPU( FarTopologyRefiner *refiner, const std::vector& coarseverts, xyzmesh * refmesh) { - - static Osd::CpuComputeController *controller = - new Osd::CpuComputeController(); - Far::StencilTables const *vertexStencils; Far::StencilTables const *varyingStencils; buildStencilTables(*refiner, &vertexStencils, &varyingStencils); - Osd::CpuComputeContext *context = Osd::CpuComputeContext::Create( - vertexStencils, varyingStencils); - assert(coarseverts.size() == (size_t)refiner->GetNumVerticesTotal()); @@ -287,12 +280,16 @@ checkMeshCPU( FarTopologyRefiner *refiner, vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size() ); - controller->Compute( context, vb ); + Osd::CpuEvaluator::EvalStencils( + vb, + Osd::VertexBufferDescriptor(0, 3, 3), + vb, + Osd::VertexBufferDescriptor(refiner->GetNumVertices(0)*3, 3, 3), + vertexStencils); int result = checkVertexBuffer(*refiner, refmesh, vb->BindCpuBuffer(), vb->GetNumElements()); - delete context; delete vertexStencils; delete varyingStencils; delete vb; @@ -305,28 +302,26 @@ static int checkMeshCPUGL(FarTopologyRefiner *refiner, const std::vector& coarseverts, xyzmesh * refmesh) { - - static Osd::CpuComputeController *controller = - new Osd::CpuComputeController(); - + Far::StencilTables const *vertexStencils; Far::StencilTables const *varyingStencils; buildStencilTables(*refiner, &vertexStencils, &varyingStencils); - Osd::CpuComputeContext *context = Osd::CpuComputeContext::Create( - vertexStencils, varyingStencils); - Osd::CpuGLVertexBuffer *vb = Osd::CpuGLVertexBuffer::Create(3, refiner->GetNumVerticesTotal()); vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size() ); - controller->Compute( context, vb ); - + Osd::CpuEvaluator::EvalStencils( + vb, + Osd::VertexBufferDescriptor(0, 3, 3), + vb, + Osd::VertexBufferDescriptor(refiner->GetNumVertices(0)*3, 3, 3), + vertexStencils); + int result = checkVertexBuffer(*refiner, refmesh, vb->BindCpuBuffer(), vb->GetNumElements()); - delete context; delete vertexStencils; delete varyingStencils; delete vb; diff --git a/tutorials/osd/tutorial_0/osd_tutorial_0.cpp b/tutorials/osd/tutorial_0/osd_tutorial_0.cpp index 2b7ee656..94a77040 100644 --- a/tutorials/osd/tutorial_0/osd_tutorial_0.cpp +++ b/tutorials/osd/tutorial_0/osd_tutorial_0.cpp @@ -32,8 +32,7 @@ #include #include -#include -#include +#include #include #include @@ -73,11 +72,10 @@ int main(int, char **) { nCoarseVerts=0, nRefinedVerts=0; - Osd::CpuComputeContext * context=0; - // // Setup phase // + Far::StencilTables const * stencilTables = NULL; { // Setup Context Far::TopologyRefiner const * refiner = createTopologyRefiner(maxlevel); @@ -87,24 +85,15 @@ int main(int, char **) { options.generateOffsets=true; options.generateIntermediateLevels=false; - Far::StencilTables const * stencilTables = - Far::StencilTablesFactory::Create(*refiner, options); - - // Create an Osd Compute Context from the stencil tables - context = Osd::CpuComputeContext::Create(stencilTables, - /*vayingStencil=*/NULL); + stencilTables = Far::StencilTablesFactory::Create(*refiner, options); nCoarseVerts = refiner->GetNumVertices(0); nRefinedVerts = stencilTables->GetNumStencils(); // We are done with Far: cleanup tables delete refiner; - delete stencilTables; } - // Setup Controller - Osd::CpuComputeController controller; - // Setup a buffer for vertex primvar data: Osd::CpuVertexBuffer * vbuffer = Osd::CpuVertexBuffer::Create(3, nCoarseVerts + nRefinedVerts); @@ -117,8 +106,14 @@ int main(int, char **) { // and update every time control data changes vbuffer->UpdateData(g_verts, 0, nCoarseVerts); + + Osd::VertexBufferDescriptor srcDesc(0, 3, 3); + Osd::VertexBufferDescriptor dstDesc(nCoarseVerts*3, 3, 3); + // Launch the computation - controller.Compute(context, vbuffer); + Osd::CpuEvaluator::EvalStencils(vbuffer, srcDesc, + vbuffer, dstDesc, + stencilTables); } { // Visualization with Maya : print a MEL script that generates particles @@ -133,8 +128,8 @@ int main(int, char **) { printf("-c 1;\n"); } + delete stencilTables; delete vbuffer; - delete context; } //------------------------------------------------------------------------------