Refurbish osd layer API.

In OpenSubdiv 2.x, we encapsulated subdivision tables into
compute context in osd layer since those tables are order-dependent
and have to be applied in a certain manner. In 3.0, we adopted stencil
table based refinement. It's more simple and such an encapsulation is
no longer needed. Also 2.0 API has several ownership issues of GPU
kernel caching, and forces unnecessary instantiation of controllers
even though the cpu kernels typically don't need instances unlike GPU ones.

This change completely revisit osd client facing APIs. All contexts and
controllers were replaced with device-specific tables and evaluators.
While we can still use consistent API across various device backends,
unnecessary complexities have been removed. For example, cpu evaluator
is just a set of static functions and also there's no need to replicate
FarStencilTables to ComputeContext.

Also the new API delegates the ownership of compiled GPU kernels
to clients, for the better management of resources especially in multiple
GPU environment.

In addition to integrating ComputeController and EvalStencilController into
a single function Evaluator::EvalStencils(), EvalLimit API is also added
into Evaluator. This is working but still in progress, and we'll make a followup
change for the complete implementation.

-some naming convention changes:
GLSLTransformFeedback to GLXFBEvaluator
GLSLCompute to GLComputeEvaluator

-move LimitLocation struct into examples/glEvalLimit.
We're still discussing patch evaluation interface. Basically we'd like
to tease all ptex-specific parametrization out of far/osd layer.

TODO:
-implments EvalPatches() in the right way
-derivative evaluation API is still interim.
-VertexBufferDescriptor needs a better API to advance its location
-synchronization mechanism is not ideal (too global).
-OsdMesh class is hacky. need to fix it.
This commit is contained in:
Takahito Tejima 2015-05-08 17:31:26 -07:00
parent 2aead56868
commit 33bfbf699b
93 changed files with 3963 additions and 10207 deletions

View File

@ -30,41 +30,28 @@
#include <far/error.h>
#include <osd/cpuD3D11VertexBuffer.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL;
#include <osd/cpuEvaluator.h>
#ifdef OPENSUBDIV_HAS_OPENMP
#include <osd/ompComputeController.h>
OpenSubdiv::Osd::OmpComputeController * g_ompComputeController = NULL;
#include <osd/ompEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clD3D11VertexBuffer.h>
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
#include <osd/clEvaluator.h>
#include "../common/clDeviceContext.h"
CLD3D11DeviceContext g_clDeviceContext;
OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
#include <osd/cudaD3D11VertexBuffer.h>
#include <osd/cudaComputeContext.h>
#include <osd/cudaComputeController.h>
#include <osd/cudaEvaluator.h>
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL;
#endif
#include <osd/d3d11VertexBuffer.h>
#include <osd/d3d11ComputeContext.h>
#include <osd/d3d11ComputeController.h>
OpenSubdiv::Osd::D3D11ComputeController * g_d3d11ComputeController = NULL;
#include <osd/d3d11ComputeEvaluator.h>
#include <osd/d3d11Mesh.h>
OpenSubdiv::Osd::D3D11MeshInterface *g_mesh;
@ -635,6 +622,8 @@ createPtex(const char *filename) {
//------------------------------------------------------------------------------
void
createOsdMesh(int level, int kernel) {
using namespace OpenSubdiv;
Ptex::String ptexError;
PtexTexture *ptexColor = PtexTexture::open(g_ptexColorFilename, ptexError, true);
if (ptexColor == NULL) {
@ -691,79 +680,81 @@ createOsdMesh(int level, int kernel) {
int numVertexElements = 6; //g_adaptive ? 3 : 6;
int numVaryingElements = 0;
if (kernel == kCPU) {
if (not g_cpuComputeController) {
g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuD3D11VertexBuffer,
OpenSubdiv::Osd::CpuComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
ID3D11DeviceContext>(
g_cpuComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, g_pd3dDeviceContext);
if (g_kernel == kCPU) {
g_mesh = new Osd::Mesh<Osd::CpuD3D11VertexBuffer,
Far::StencilTables,
Osd::CpuEvaluator,
Osd::D3D11DrawContext,
ID3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits, NULL, g_pd3dDeviceContext);
#ifdef OPENSUBDIV_HAS_OPENMP
} else if (kernel == kOPENMP) {
if (not g_ompComputeController) {
g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuD3D11VertexBuffer,
OpenSubdiv::Osd::OmpComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
ID3D11DeviceContext>(
g_ompComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, g_pd3dDeviceContext);
g_mesh = new Osd::Mesh<Osd::CpuD3D11VertexBuffer,
Far::StencilTables,
Osd::OmpEvaluator,
Osd::D3D11DrawContext,
ID3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits, NULL, g_pd3dDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_TBB
} else if (kernel == kTBB) {
g_mesh = new Osd::Mesh<Osd::CpuD3D11VertexBuffer,
Far::StencilTables,
Osd::TbbEvaluator,
Osd::D3D11DrawContext,
ID3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits, NULL, g_pd3dDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
} else if (kernel == kCL) {
if (not g_clComputeController) {
g_clComputeController = new OpenSubdiv::Osd::CLComputeController(
g_clDeviceContext.GetContext(),
g_clDeviceContext.GetCommandQueue());
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CLD3D11VertexBuffer,
OpenSubdiv::Osd::CLComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
CLD3D11DeviceContext>(
g_clComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, &g_clDeviceContext);
} else if(kernel == kCL) {
static Osd::EvaluatorCacheT<Osd::CLEvaluator> clEvaluatorCache;
g_mesh = new Osd::Mesh<Osd::CLD3D11VertexBuffer,
Osd::CLStencilTables,
Osd::CLEvaluator,
Osd::D3D11DrawContext,
CLD3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits,
&clEvaluatorCache,
&g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if (kernel == kCUDA) {
if (not g_cudaComputeController) {
g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CudaD3D11VertexBuffer,
OpenSubdiv::Osd::CudaComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
ID3D11DeviceContext>(
g_cudaComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, g_pd3dDeviceContext);
} else if (g_kernel == kCUDA) {
g_mesh = new Osd::Mesh<Osd::CudaD3D11VertexBuffer,
Osd::CudaStencilTables,
Osd::CudaEvaluator,
Osd::D3D11DrawContext,
ID3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits, NULL, g_pd3dDeviceContext);
#endif
} else if (g_kernel == kDirectCompute) {
if (not g_d3d11ComputeController) {
g_d3d11ComputeController = new OpenSubdiv::Osd::D3D11ComputeController(g_pd3dDeviceContext);
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::D3D11VertexBuffer,
OpenSubdiv::Osd::D3D11ComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
ID3D11DeviceContext>(
g_d3d11ComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, g_pd3dDeviceContext);
static Osd::EvaluatorCacheT<Osd::D3D11ComputeEvaluator> d3d11ComputeEvaluatorCache;
g_mesh = new Osd::Mesh<Osd::D3D11VertexBuffer,
Osd::D3D11StencilTables,
Osd::D3D11ComputeEvaluator,
Osd::D3D11DrawContext,
ID3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits,
&d3d11ComputeEvaluatorCache,
g_pd3dDeviceContext);
} else {
printf("Unsupported kernel %s\n", getKernelName(kernel));
}
@ -1122,22 +1113,6 @@ quit() {
SAFE_RELEASE(g_pd3dDeviceContext);
SAFE_RELEASE(g_pd3dDevice);
delete g_cpuComputeController;
#ifdef OPENSUBDIV_HAS_OPENMP
delete g_ompComputeController;
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
delete g_clComputeController;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
delete g_cudaComputeController;
#endif
delete g_d3d11ComputeController;
PostQuitMessage(0);
exit(0);
}

View File

@ -30,46 +30,32 @@
#include <far/error.h>
#include <osd/cpuD3D11VertexBuffer.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL;
#include <osd/cpuEvaluator.h>
#ifdef OPENSUBDIV_HAS_OPENMP
#include <osd/ompComputeController.h>
OpenSubdiv::Osd::OmpComputeController * g_ompComputeController = NULL;
#include <osd/ompEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_TBB
#include <osd/tbbComputeController.h>
OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL;
#include <osd/tbbEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clD3D11VertexBuffer.h>
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
#include <osd/clEvaluator.h>
#include "../common/clDeviceContext.h"
CLD3D11DeviceContext g_clDeviceContext;
OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
#include <osd/cudaD3D11VertexBuffer.h>
#include <osd/cudaComputeContext.h>
#include <osd/cudaComputeController.h>
#include <osd/cudaEvaluator.h>
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL;
#endif
#include <osd/d3d11VertexBuffer.h>
#include <osd/d3d11ComputeContext.h>
#include <osd/d3d11ComputeController.h>
OpenSubdiv::Osd::D3D11ComputeController * g_d3d11ComputeController = NULL;
#include <osd/d3d11ComputeEvaluator.h>
#include <osd/d3d11Mesh.h>
OpenSubdiv::Osd::D3D11MeshInterface *g_mesh;
@ -272,17 +258,18 @@ getKernelName(int kernel) {
static void
createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=kCatmark) {
typedef OpenSubdiv::Far::ConstIndexArray IndexArray;
using namespace OpenSubdiv;
typedef Far::ConstIndexArray IndexArray;
Shape * shape = Shape::parseObj(shapeDesc.data.c_str(), shapeDesc.scheme);
// create Vtr mesh (topology)
OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape);
OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape);
Sdc::SchemeType sdctype = GetSdcType(*shape);
Sdc::Options sdcoptions = GetSdcOptions(*shape);
OpenSubdiv::Far::TopologyRefiner * refiner =
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Create(*shape,
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Options(sdctype, sdcoptions));
Far::TopologyRefiner * refiner =
Far::TopologyRefinerFactory<Shape>::Create(*shape,
Far::TopologyRefinerFactory<Shape>::Options(sdctype, sdcoptions));
// save coarse topology (used for coarse mesh drawing)
int nedges = refiner->GetNumEdges(0),
@ -316,104 +303,90 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=
bool doAdaptive = (g_adaptive!=0 and g_scheme==kCatmark),
doSingleCreasePatch = (g_singleCreasePatch!=0 and g_scheme==kCatmark);
OpenSubdiv::Osd::MeshBitset bits;
bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive);
bits.set(OpenSubdiv::Osd::MeshUseSingleCreasePatch, doSingleCreasePatch);
Osd::MeshBitset bits;
bits.set(Osd::MeshAdaptive, doAdaptive);
bits.set(Osd::MeshUseSingleCreasePatch, doSingleCreasePatch);
// gregory basis hasn't supported yet in D3D11Mesh
bits.set(OpenSubdiv::Osd::MeshEndCapLegacyGregory, true);
bits.set(Osd::MeshEndCapLegacyGregory, true);
int numVertexElements = 6;
int numVaryingElements = 0;
if (g_kernel == kCPU) {
if (not g_cpuComputeController) {
g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuD3D11VertexBuffer,
OpenSubdiv::Osd::CpuComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
ID3D11DeviceContext>(
g_cpuComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, g_pd3dDeviceContext);
g_mesh = new Osd::Mesh<Osd::CpuD3D11VertexBuffer,
Far::StencilTables,
Osd::CpuEvaluator,
Osd::D3D11DrawContext,
ID3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits, NULL, g_pd3dDeviceContext);
#ifdef OPENSUBDIV_HAS_OPENMP
} else if (kernel == kOPENMP) {
if (not g_ompComputeController) {
g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuD3D11VertexBuffer,
OpenSubdiv::Osd::OmpComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
ID3D11DeviceContext>(
g_ompComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, g_pd3dDeviceContext);
g_mesh = new Osd::Mesh<Osd::CpuD3D11VertexBuffer,
Far::StencilTables,
Osd::OmpEvaluator,
Osd::D3D11DrawContext,
ID3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits, NULL, g_pd3dDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_TBB
} else if (kernel == kTBB) {
if (not g_tbbComputeController) {
g_tbbComputeController = new OpenSubdiv::Osd::TbbComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuD3D11VertexBuffer,
OpenSubdiv::Osd::TbbComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
ID3D11DeviceContext>(
g_tbbComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, g_pd3dDeviceContext);
g_mesh = new Osd::Mesh<Osd::CpuD3D11VertexBuffer,
Far::StencilTables,
Osd::TbbEvaluator,
Osd::D3D11DrawContext,
ID3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits, NULL, g_pd3dDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
} else if(kernel == kCL) {
if (not g_clComputeController) {
g_clComputeController = new OpenSubdiv::Osd::CLComputeController(
g_clDeviceContext.GetContext(),
g_clDeviceContext.GetCommandQueue());
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CLD3D11VertexBuffer,
OpenSubdiv::Osd::CLComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
CLD3D11DeviceContext>(
g_clComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits,
&g_clDeviceContext);
static Osd::EvaluatorCacheT<Osd::CLEvaluator> clEvaluatorCache;
g_mesh = new Osd::Mesh<Osd::CLD3D11VertexBuffer,
Osd::CLStencilTables,
Osd::CLEvaluator,
Osd::D3D11DrawContext,
CLD3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits,
&clEvaluatorCache,
&g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if (g_kernel == kCUDA) {
if (not g_cudaComputeController) {
g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CudaD3D11VertexBuffer,
OpenSubdiv::Osd::CudaComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
ID3D11DeviceContext>(
g_cudaComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, g_pd3dDeviceContext);
g_mesh = new Osd::Mesh<Osd::CudaD3D11VertexBuffer,
Osd::CudaStencilTables,
Osd::CudaEvaluator,
Osd::D3D11DrawContext,
ID3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits, NULL, g_pd3dDeviceContext);
#endif
} else if (g_kernel == kDirectCompute) {
if (not g_d3d11ComputeController) {
g_d3d11ComputeController = new OpenSubdiv::Osd::D3D11ComputeController(g_pd3dDeviceContext);
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::D3D11VertexBuffer,
OpenSubdiv::Osd::D3D11ComputeController,
OpenSubdiv::Osd::D3D11DrawContext,
ID3D11DeviceContext>(
g_d3d11ComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, g_pd3dDeviceContext);
static Osd::EvaluatorCacheT<Osd::D3D11ComputeEvaluator> d3d11ComputeEvaluatorCache;
g_mesh = new Osd::Mesh<Osd::D3D11VertexBuffer,
Osd::D3D11StencilTables,
Osd::D3D11ComputeEvaluator,
Osd::D3D11DrawContext,
ID3D11DeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits,
&d3d11ComputeEvaluatorCache,
g_pd3dDeviceContext);
} else {
printf("Unsupported kernel %s\n", getKernelName(kernel));
}
@ -995,26 +968,6 @@ quit() {
SAFE_RELEASE(g_pd3dDeviceContext);
SAFE_RELEASE(g_pd3dDevice);
delete g_cpuComputeController;
#ifdef OPENSUBDIV_HAS_OPENMP
delete g_ompComputeController;
#endif
#ifdef OPENSUBDIV_HAS_TBB
delete g_tbbComputeController;
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
delete g_clComputeController;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
delete g_cudaComputeController;
#endif
delete g_d3d11ComputeController;
PostQuitMessage(0);
exit(0);
}

View File

@ -42,19 +42,18 @@
GLFWwindow* g_window=0;
GLFWmonitor* g_primary=0;
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
#include <osd/cpuEvalLimitContext.h>
#include <osd/cpuEvalLimitController.h>
#include <osd/cpuEvaluator.h>
#include <osd/cpuVertexBuffer.h>
#include <osd/cpuGLVertexBuffer.h>
#include <osd/drawContext.h>
#include <osd/mesh.h>
#include <far/gregoryBasis.h>
#include <far/endCapGregoryBasisPatchFactory.h>
#include <far/topologyRefiner.h>
#include <far/stencilTablesFactory.h>
#include <far/patchTablesFactory.h>
#include <far/patchMap.h>
#include <far/error.h>
@ -198,13 +197,12 @@ Far::TopologyRefiner * g_topologyRefiner = 0;
Osd::CpuVertexBuffer * g_vertexData = 0,
* g_varyingData = 0;
Osd::CpuComputeContext * g_computeCtx = 0;
Far::StencilTables const * g_vertexStencils = NULL;
Far::StencilTables const * g_varyingStencils = NULL;
Osd::CpuComputeController g_computeCtrl;
Osd::CpuEvalLimitContext * g_evalCtx = 0;
Osd::CpuEvalLimitController g_evalCtrl;
Far::PatchTables const * g_patchTables = NULL;
Far::PatchMap const * g_patchMap = NULL;
Osd::PatchCoordArray g_patchCoords;
Osd::VertexBufferDescriptor g_idesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 3 ),
g_odesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6 ),
@ -245,7 +243,25 @@ updateGeom() {
g_vertexData->UpdateData( &g_positions[0], 0, nverts);
g_computeCtrl.Compute(g_computeCtx, g_vertexData, g_varyingData);
if (! g_topologyRefiner) return;
// note that for patch eval we need coarse+refined combined buffer.
int nCoarseVertices = g_topologyRefiner->GetNumVertices(0);
Osd::CpuEvaluator::EvalStencils(g_vertexData,
Osd::VertexBufferDescriptor(0, 3, 3),
g_vertexData,
Osd::VertexBufferDescriptor(
nCoarseVertices*3, 3, 3),
g_vertexStencils);
if (g_varyingData) {
Osd::CpuEvaluator::EvalStencils(g_varyingData,
Osd::VertexBufferDescriptor(0, 3, 3),
g_varyingData,
Osd::VertexBufferDescriptor(
nCoarseVertices*3, 3, 3),
g_varyingStencils);
}
s.Stop();
g_computeTime = float(s.GetElapsed() * 1000.0f);
@ -255,66 +271,37 @@ updateGeom() {
s.Start();
// The varying data ends-up interleaved in the same g_Q output buffer because
// g_Q has a stride of 6 and g_vdesc sets the offset to 3, while g_odesc sets
// the offset to 0
switch (g_drawMode) {
case kVARYING : g_evalCtrl.BindVaryingBuffers( g_idesc, g_varyingData, g_vdesc, g_Q ); break;
case kFACEVARYING : //g_evalCtrl.BindFacevaryingBuffers( g_fvidesc, g_fvodesc, g_Q ); break;
case kRANDOM :
case kUV :
default : g_evalCtrl.Unbind(); break;
}
// Bind/Unbind of the vertex buffers to the context needs to happen
// outside of the parallel loop
g_evalCtrl.BindVertexBuffers( g_idesc, g_vertexData, g_odesc, g_Q, g_dQs, g_dQt );
// Apply 'dynamics' update
assert(g_particles);
g_particles->Update(g_evalTime); // XXXX g_evalTime is not really elapsed time...
// Evaluate the positions of the samples on the limit surface
g_nsamplesFound=0;
#define USE_OPENMP
#if defined(OPENSUBDIV_HAS_OPENMP) and defined(USE_OPENMP)
#pragma omp parallel for
#endif
for (int i=0; i<g_nparticles; ++i) {
Osd::LimitLocation & coord = g_particles->GetPositions()[i];
int n = g_evalCtrl.EvalLimitSample( coord, g_evalCtx, i );
if (n) {
// point colors
switch (g_drawMode) {
case kUV : { float * color = g_Q->BindCpuBuffer() + i*g_Q->GetNumElements() + 3;
color[0] = coord.s;
color[1] = 0.0f;
color[2] = coord.t; } break;
case kRANDOM : // no update needed
case kVARYING :
case kFACEVARYING : break;
default : break;
}
#if defined(OPENSUBDIV_HAS_OPENMP) and defined(USE_OPENMP)
#pragma omp atomic
#endif
g_nsamplesFound += n;
} else {
// "hide" unfound samples (hole tags...) as a black dot at the origin
float * sample = g_Q->BindCpuBuffer() + i*g_Q->GetNumElements();
memset(sample, 0, g_Q->GetNumElements() * sizeof(float));
// resolve particle positions into patch handles
// XXX: this process should be handled by OsdKernel in parallel
g_patchCoords.clear();
for (int i = 0; i < g_particles->GetNumParticles(); ++i) {
STParticles::Position const &position = g_particles->GetPositions()[i];
Far::PatchTables::PatchHandle const *handle =
g_patchMap->FindPatch(position.ptexIndex, position.s, position.t);
if (handle) {
g_patchCoords.push_back(Osd::PatchCoord(
*handle, position.s, position.t));
}
}
g_evalCtrl.Unbind();
// Evaluate the positions of the samples on the limit surface
g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(g_vertexData, g_idesc,
g_Q, g_odesc,
g_patchCoords,
g_patchTables, NULL);
// varying
if (g_drawMode == kVARYING) {
Osd::CpuEvaluator::EvalPatches(g_varyingData, g_idesc,
g_Q, g_vdesc,
g_patchCoords,
g_patchTables, NULL);
}
g_Q->BindVBO();
@ -335,7 +322,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape);
delete g_topologyRefiner;
OpenSubdiv::Far::TopologyRefiner * g_topologyRefiner =
g_topologyRefiner =
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Create(*shape,
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Options(sdctype, sdcoptions));
@ -409,18 +396,17 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
nverts = vertexStencils->GetNumControlVertices() +
vertexStencils->GetNumStencils();
// Create an Osd Compute context, used to "pose" the vertices with
// the stencils tables
delete g_computeCtx;
g_computeCtx = Osd::CpuComputeContext::Create(vertexStencils,
varyingStencils);
if (g_vertexStencils) delete g_vertexStencils;
g_vertexStencils = vertexStencils;
if (g_varyingStencils) delete g_varyingStencils;
g_varyingStencils = varyingStencils;
// Create a limit Eval context with the patch tables
delete g_evalCtx;
g_evalCtx = Osd::CpuEvalLimitContext::Create(*patchTables);
if (g_patchTables) delete g_patchTables;
g_patchTables = patchTables;
delete vertexStencils;
delete varyingStencils;
// Create a far patch map
if (g_patchMap) delete g_patchMap;
g_patchMap = new Far::PatchMap(*g_patchTables);
}
{ // Create vertex primvar buffer for the CVs

View File

@ -25,7 +25,6 @@
#ifndef ST_PARTICLES_H
#define ST_PARTICLES_H
#include <osd/evalLimitContext.h>
#include <far/topologyRefiner.h>
#include <iostream>
@ -54,8 +53,25 @@
class STParticles {
public:
/// \brief Coordinates set on a limit surface
///
struct Position {
Position() { }
/// \brief Constructor
///
/// @param f Ptex face id
///
/// @param x parametric location on face
///
/// @param y parametric location on face
///
Position(int f, float x, float y) : ptexIndex(f), s(x), t(y) { }
int ptexIndex; ///< ptex face index
float s, t; ///< parametric location on face
};
typedef OpenSubdiv::Osd::LimitLocation Position;
typedef OpenSubdiv::Far::TopologyRefiner Refiner;
STParticles(Refiner const & refiner, int nparticles, bool centered=false);

View File

@ -46,10 +46,8 @@ GLFWmonitor* g_primary = 0;
#include <osd/glDrawRegistry.h>
#include <far/error.h>
#include <osd/cpuEvaluator.h>
#include <osd/cpuGLVertexBuffer.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
#include <osd/glMesh.h>
OpenSubdiv::Osd::GLMeshInterface *g_mesh = NULL;
@ -358,20 +356,15 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, Scheme scheme = kCatmark)
int numVertexElements = 3;
int numVaryingElements = 0;
if (not g_cpuComputeController) {
g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController();
}
delete g_mesh;
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuGLVertexBuffer,
OpenSubdiv::Osd::CpuComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_cpuComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
OpenSubdiv::Far::StencilTables,
OpenSubdiv::Osd::CpuEvaluator,
OpenSubdiv::Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
std::vector<float> fvarData;
@ -1008,8 +1001,6 @@ uninitGL() {
if (g_mesh)
delete g_mesh;
delete g_cpuComputeController;
}
//------------------------------------------------------------------------------

View File

@ -46,55 +46,40 @@
#include <limits>
#include <GLFW/glfw3.h>
#include <osd/cpuEvaluator.h>
#include <osd/cpuGLVertexBuffer.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
#ifdef OPENSUBDIV_HAS_OPENMP
#include <osd/ompComputeController.h>
OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL;
#include <osd/ompEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_TBB
#include <osd/tbbComputeController.h>
OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL;
#include <osd/tbbEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clEvaluator.h>
#include <osd/clGLVertexBuffer.h>
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
#include "../common/clDeviceContext.h"
CLDeviceContext g_clDeviceContext;
OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
#include <osd/cudaEvaluator.h>
#include <osd/cudaGLVertexBuffer.h>
#include <osd/cudaComputeContext.h>
#include <osd/cudaComputeController.h>
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
#include <osd/glslTransformFeedbackComputeContext.h>
#include <osd/glslTransformFeedbackComputeController.h>
#include <osd/glXFBEvaluator.h>
#include <osd/glVertexBuffer.h>
OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslTransformFeedbackComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
#include <osd/glslComputeContext.h>
#include <osd/glslComputeController.h>
#include <osd/glComputeEvaluator.h>
#include <osd/glVertexBuffer.h>
OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL;
#endif
#include <osd/glDrawContext.h>
@ -244,103 +229,82 @@ createOsdMesh(std::string const &kernel,
Osd::MeshBitset bits)
{
if (kernel == "CPU") {
if (not g_cpuComputeController) {
g_cpuComputeController = new Osd::CpuComputeController();
}
return new Osd::Mesh<Osd::CpuGLVertexBuffer,
Osd::CpuComputeController,
Osd::GLDrawContext>(
g_cpuComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
Far::StencilTables,
Osd::CpuEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#ifdef OPENSUBDIV_HAS_OPENMP
} else if (kernel == "OPENMP") {
if (not g_ompComputeController) {
g_ompComputeController = new Osd::OmpComputeController();
}
return new Osd::Mesh<Osd::CpuGLVertexBuffer,
Osd::OmpComputeController,
Osd::GLDrawContext>(
g_ompComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
Far::StencilTables,
Osd::OmpEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#endif
#ifdef OPENSUBDIV_HAS_TBB
} else if (kernel == "TBB") {
if (not g_tbbComputeController) {
g_tbbComputeController = new Osd::TbbComputeController();
}
return new Osd::Mesh<Osd::CpuGLVertexBuffer,
Osd::TbbComputeController,
Osd::GLDrawContext>(
g_tbbComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
Far::StencilTables,
Osd::TbbEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
} else if(kernel == "CL") {
if (not g_clComputeController) {
g_clComputeController = new Osd::CLComputeController(
g_clDeviceContext.GetContext(),
g_clDeviceContext.GetCommandQueue());
}
return new Osd::Mesh<Osd::CLGLVertexBuffer,
Osd::CLComputeController,
Osd::GLDrawContext,
CLDeviceContext>(
g_clComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, &g_clDeviceContext);
Osd::CLStencilTables,
Osd::CLEvaluator,
Osd::GLDrawContext,
CLDeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits,
NULL,
&g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if(kernel == "CUDA") {
if (not g_cudaComputeController) {
g_cudaComputeController = new Osd::CudaComputeController();
}
return new Osd::Mesh<Osd::CudaGLVertexBuffer,
Osd::CudaComputeController,
Osd::GLDrawContext>(
g_cudaComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
Osd::CudaStencilTables,
Osd::CudaEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
} else if(kernel == "XFB") {
if (not g_glslTransformFeedbackComputeController) {
g_glslTransformFeedbackComputeController = new Osd::GLSLTransformFeedbackComputeController();
}
return new Osd::Mesh<Osd::GLVertexBuffer,
Osd::GLSLTransformFeedbackComputeController,
Osd::GLDrawContext>(
g_glslTransformFeedbackComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
Osd::GLStencilTablesTBO,
Osd::GLXFBEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
} else if(kernel == "GLSL") {
if (not g_glslComputeController) {
g_glslComputeController = new Osd::GLSLComputeController();
}
return new Osd::Mesh<Osd::GLVertexBuffer,
Osd::GLSLComputeController,
Osd::GLDrawContext>(
g_glslComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
Osd::GLStencilTablesSSBO,
Osd::GLComputeEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#endif
}

View File

@ -47,10 +47,8 @@ GLFWmonitor* g_primary=0;
#include <far/error.h>
#include <far/ptexIndices.h>
#include <osd/cpuEvaluator.h>
#include <osd/cpuGLVertexBuffer.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
#include <osd/glMesh.h>
OpenSubdiv::Osd::GLMeshInterface *g_mesh;
@ -240,14 +238,11 @@ createOsdMesh() {
bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive);
bits.set(OpenSubdiv::Osd::MeshPtexData, true);
if (not g_cpuComputeController) {
g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuGLVertexBuffer,
OpenSubdiv::Osd::CpuComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_cpuComputeController,
refiner, 3, 0, g_level, bits);
OpenSubdiv::Far::StencilTables,
OpenSubdiv::Osd::CpuEvaluator,
OpenSubdiv::Osd::GLDrawContext>(
refiner, 3, 0, g_level, bits);
// compute model bounding
float min[3] = { FLT_MAX, FLT_MAX, FLT_MAX};
@ -1095,8 +1090,6 @@ uninitGL() {
if (g_mesh)
delete g_mesh;
delete g_cpuComputeController;
}
//------------------------------------------------------------------------------

View File

@ -54,56 +54,39 @@ GLFWmonitor* g_primary = 0;
#include <osd/glDrawRegistry.h>
#include <far/error.h>
#include <osd/cpuEvaluator.h>
#include <osd/cpuGLVertexBuffer.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL;
#ifdef OPENSUBDIV_HAS_OPENMP
#include <osd/ompComputeController.h>
OpenSubdiv::Osd::OmpComputeController * g_ompComputeController = NULL;
#include <osd/ompEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_TBB
#include <osd/tbbComputeController.h>
OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL;
#include <osd/tbbEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clEvaluator.h>
#include <osd/clGLVertexBuffer.h>
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
#include "../common/clDeviceContext.h"
CLDeviceContext g_clDeviceContext;
OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
#include <osd/cudaEvaluator.h>
#include <osd/cudaGLVertexBuffer.h>
#include <osd/cudaComputeContext.h>
#include <osd/cudaComputeController.h>
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
#include <osd/glslTransformFeedbackComputeContext.h>
#include <osd/glslTransformFeedbackComputeController.h>
#include <osd/glXFBEvaluator.h>
#include <osd/glVertexBuffer.h>
OpenSubdiv::Osd::GLSLTransformFeedbackComputeController
*g_glslTransformFeedbackComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
#include <osd/glslComputeContext.h>
#include <osd/glslComputeController.h>
#include <osd/glComputeEvaluator.h>
#include <osd/glVertexBuffer.h>
OpenSubdiv::Osd::GLSLComputeController * g_glslComputeController = NULL;
#endif
#include <osd/glMesh.h>
@ -1029,26 +1012,20 @@ createOsdMesh(int level, int kernel) {
int numVaryingElements = 0;
if (kernel == kCPU) {
if (not g_cpuComputeController) {
g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuGLVertexBuffer,
OpenSubdiv::Osd::CpuComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_cpuComputeController,
OpenSubdiv::Far::StencilTables,
OpenSubdiv::Osd::CpuEvaluator,
OpenSubdiv::Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#ifdef OPENSUBDIV_HAS_OPENMP
} else if (kernel == kOPENMP) {
if (not g_ompComputeController) {
g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuGLVertexBuffer,
OpenSubdiv::Osd::OmpComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_ompComputeController,
OpenSubdiv::Far::StencilTables,
OpenSubdiv::Osd::OmpEvaluator,
OpenSubdiv::Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
@ -1056,13 +1033,10 @@ createOsdMesh(int level, int kernel) {
#endif
#ifdef OPENSUBDIV_HAS_TBB
} else if (kernel == kTBB) {
if (not g_tbbComputeController) {
g_tbbComputeController = new OpenSubdiv::Osd::TbbComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuGLVertexBuffer,
OpenSubdiv::Osd::TbbComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_tbbComputeController,
OpenSubdiv::Far::StencilTables,
OpenSubdiv::Osd::TbbEvaluator,
OpenSubdiv::Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
@ -1070,30 +1044,25 @@ createOsdMesh(int level, int kernel) {
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
} else if (kernel == kCL) {
if (not g_clComputeController) {
g_clComputeController = new OpenSubdiv::Osd::CLComputeController(
g_clDeviceContext.GetContext(),
g_clDeviceContext.GetCommandQueue());
}
static OpenSubdiv::Osd::EvaluatorCacheT<OpenSubdiv::Osd::CLEvaluator> clEvaluatorCache;
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CLGLVertexBuffer,
OpenSubdiv::Osd::CLComputeController,
OpenSubdiv::Osd::GLDrawContext,
CLDeviceContext>(
g_clComputeController,
OpenSubdiv::Osd::CLStencilTables,
OpenSubdiv::Osd::CLEvaluator,
OpenSubdiv::Osd::GLDrawContext,
CLDeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits, &g_clDeviceContext);
level, bits,
&clEvaluatorCache,
&g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if (kernel == kCUDA) {
if (not g_cudaComputeController) {
g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CudaGLVertexBuffer,
OpenSubdiv::Osd::CudaComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_cudaComputeController,
OpenSubdiv::Osd::CudaStencilTables,
OpenSubdiv::Osd::CudaEvaluator,
OpenSubdiv::Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
@ -1101,32 +1070,29 @@ createOsdMesh(int level, int kernel) {
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
} else if (kernel == kGLSL) {
if (not g_glslTransformFeedbackComputeController) {
g_glslTransformFeedbackComputeController =
new OpenSubdiv::Osd::GLSLTransformFeedbackComputeController();
}
static OpenSubdiv::Osd::EvaluatorCacheT<OpenSubdiv::Osd::GLXFBEvaluator> glXFBEvaluatorCache;
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::GLVertexBuffer,
OpenSubdiv::Osd::GLSLTransformFeedbackComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_glslTransformFeedbackComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
OpenSubdiv::Osd::GLStencilTablesTBO,
OpenSubdiv::Osd::GLXFBEvaluator,
OpenSubdiv::Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits,
&glXFBEvaluatorCache);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
} else if (kernel == kGLSLCompute) {
if (not g_glslComputeController) {
g_glslComputeController = new OpenSubdiv::Osd::GLSLComputeController();
}
static OpenSubdiv::Osd::EvaluatorCacheT<OpenSubdiv::Osd::GLComputeEvaluator> glComputeEvaluatorCache;
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::GLVertexBuffer,
OpenSubdiv::Osd::GLSLComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_glslComputeController,
OpenSubdiv::Osd::GLStencilTablesSSBO,
OpenSubdiv::Osd::GLComputeEvaluator,
OpenSubdiv::Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
level, bits,
&glComputeEvaluatorCache);
#endif
} else {
printf("Unsupported kernel %s\n", getKernelName(kernel));
@ -1977,32 +1943,6 @@ void uninitGL() {
if (g_mesh)
delete g_mesh;
delete g_cpuComputeController;
#ifdef OPENSUBDIV_HAS_OPENMP
delete g_ompComputeController;
#endif
#ifdef OPENSUBDIV_HAS_TBB
delete g_tbbComputeController;
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
delete g_clComputeController;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
delete g_cudaComputeController;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
delete g_glslTransformFeedbackComputeController;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
delete g_glslComputeController;
#endif
if (g_diffuseEnvironmentMap)
glDeleteTextures(1, &g_diffuseEnvironmentMap);
if (g_specularEnvironmentMap)

View File

@ -46,55 +46,42 @@ GLFWmonitor* g_primary=0;
#include <osd/glDrawRegistry.h>
#include <osd/glMesh.h>
#include <far/error.h>
#include <far/stencilTables.h>
#include <far/ptexIndices.h>
#include <osd/mesh.h>
#include <osd/glVertexBuffer.h>
#include <osd/cpuGLVertexBuffer.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
#include <osd/cpuEvaluator.h>
#ifdef OPENSUBDIV_HAS_OPENMP
#include <osd/ompComputeController.h>
OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL;
#include <osd/ompEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_TBB
#include <osd/tbbComputeController.h>
OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL;
#include <osd/tbbEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clGLVertexBuffer.h>
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL;
#include <osd/clEvaluator.h>
#include "../common/clDeviceContext.h"
CLDeviceContext g_clDeviceContext;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
#include <osd/cudaGLVertexBuffer.h>
#include <osd/cudaComputeContext.h>
#include <osd/cudaComputeController.h>
OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL;
#include <osd/cudaEvaluator.h>
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
#include <osd/glslTransformFeedbackComputeContext.h>
#include <osd/glslTransformFeedbackComputeController.h>
#include <osd/glVertexBuffer.h>
OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslXFBComputeController = NULL;
#include <osd/glXFBEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
#include <osd/glslComputeContext.h>
#include <osd/glslComputeController.h>
#include <osd/glVertexBuffer.h>
OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL;
#include <osd/glComputeEvaluator.h>
#endif
@ -171,7 +158,6 @@ public:
if (interleaved) {
assert(vertexDesc.stride == varyingDesc.stride);
_vertexBuffer = createVertexBuffer(
vertexDesc.stride, numInstances * numVertices);
} else {
@ -259,9 +245,12 @@ public:
return _restPosition;
}
int GetNumVertices() const {
int GetNumVertices() const { // total (control + refined)
return _numVertices;
}
int GetNumControlVertices() const {
return _numControlVertices;
}
protected:
@ -281,40 +270,47 @@ protected:
}
int _numVertices;
int _numControlVertices;
private:
Osd::GLDrawContext *_drawContext;
std::vector<float> _restPosition;
};
template <class COMPUTE_CONTROLLER, class VERTEX_BUFFER,
template <class EVALUATOR,
class VERTEX_BUFFER,
class STENCIL_TABLES,
class DEVICE_CONTEXT=void>
class Topology : public TopologyBase {
public:
typedef COMPUTE_CONTROLLER ComputeController;
typedef typename COMPUTE_CONTROLLER::ComputeContext ComputeContext;
typedef EVALUATOR Evaluator;
typedef STENCIL_TABLES StencilTables;
typedef DEVICE_CONTEXT DeviceContext;
typedef Osd::EvaluatorCacheT<Evaluator> EvaluatorCache;
Topology(ComputeController * computeController,
Far::PatchTables const * patchTables,
Far::StencilTables const * vertexStencils,
Topology(Far::PatchTables const * patchTables,
Far::StencilTables const * vertexStencils, //XXX: takes ownership
Far::StencilTables const * varyingStencils,
int numControlVertices,
EvaluatorCache * evaluatorCache = NULL,
DeviceContext * deviceContext = NULL)
: TopologyBase(patchTables),
_computeController(computeController),
_evaluatorCache(evaluatorCache),
_deviceContext(deviceContext) {
_computeContext = ComputeContext::Create(
vertexStencils, varyingStencils, deviceContext);
_numControlVertices = numControlVertices;
_numVertices = numControlVertices + vertexStencils->GetNumStencils();
_vertexStencils = Osd::convertToCompatibleStencilTables<StencilTables>(
vertexStencils, deviceContext);
_varyingStencils = Osd::convertToCompatibleStencilTables<StencilTables>(
varyingStencils, deviceContext);
_numVertices = vertexStencils->GetNumStencils() +
vertexStencils->GetNumControlVertices();
}
~Topology() {
delete _computeContext;
delete _vertexStencils;
delete _varyingStencils;
}
void Refine(InstancesBase *instance, int numInstances) {
@ -329,21 +325,59 @@ public:
for (int i = 0; i < numInstances; ++i) {
Osd::VertexBufferDescriptor vertexDesc(
globalVertexDesc.offset + _numVertices*globalVertexDesc.stride*i,
Osd::VertexBufferDescriptor vertexSrcDesc(
globalVertexDesc.offset + _numVertices*i*globalVertexDesc.stride,
globalVertexDesc.length,
globalVertexDesc.stride);
Osd::VertexBufferDescriptor varyingDesc(
globalVaryingDesc.offset + _numVertices*globalVaryingDesc.stride*i,
globalVaryingDesc.length,
globalVaryingDesc.stride);
Osd::VertexBufferDescriptor vertexDstDesc(
globalVertexDesc.offset + (_numVertices*i + _numControlVertices)*globalVertexDesc.stride,
globalVertexDesc.length,
globalVertexDesc.stride);
_computeController->Compute(_computeContext,
typedInstance->GetVertexBuffer(),
typedInstance->GetVaryingBuffer(),
&vertexDesc,
&varyingDesc);
// vertex
Evaluator const *evalInstance = Osd::GetEvaluator<Evaluator>(
_evaluatorCache, vertexSrcDesc, vertexDstDesc, _deviceContext);
Evaluator::EvalStencils(typedInstance->GetVertexBuffer(), vertexSrcDesc,
typedInstance->GetVertexBuffer(), vertexDstDesc,
_vertexStencils,
evalInstance,
_deviceContext);
// varying
if (_varyingStencils) {
Osd::VertexBufferDescriptor varyingSrcDesc(
globalVaryingDesc.offset + _numVertices*i*globalVaryingDesc.stride,
globalVaryingDesc.length,
globalVaryingDesc.stride);
Osd::VertexBufferDescriptor varyingDstDesc(
globalVaryingDesc.offset + (_numVertices*i + _numControlVertices)*globalVaryingDesc.stride,
globalVaryingDesc.length,
globalVaryingDesc.stride);
evalInstance = Osd::GetEvaluator<Evaluator>(
_evaluatorCache, varyingSrcDesc, varyingDstDesc, _deviceContext);
if (typedInstance->GetVaryingBuffer()) {
// non interleaved
Evaluator::EvalStencils(
typedInstance->GetVaryingBuffer(), varyingSrcDesc,
typedInstance->GetVaryingBuffer(), varyingDstDesc,
_varyingStencils,
evalInstance,
_deviceContext);
} else {
// interleaved
Evaluator::EvalStencils(
typedInstance->GetVertexBuffer(), varyingSrcDesc,
typedInstance->GetVertexBuffer(), varyingDstDesc,
_varyingStencils,
evalInstance,
_deviceContext);
}
}
}
}
@ -359,7 +393,7 @@ public:
}
virtual void Synchronize() {
_computeController->Synchronize();
Evaluator::Synchronize(_deviceContext);
}
virtual void UpdateVertexTexture(InstancesBase *instances) {
@ -371,8 +405,9 @@ public:
}
private:
ComputeController *_computeController;
ComputeContext *_computeContext;
StencilTables const *_vertexStencils;
StencilTables const *_varyingStencils;
EvaluatorCache * _evaluatorCache;
DeviceContext *_deviceContext;
};
@ -644,81 +679,85 @@ createOsdMesh( const std::string &shapeStr, int level, Scheme scheme=kCatmark )
}
}
int numControlVertices = refiner->GetNumVertices(0);
// create partitioned patcharray
TopologyBase *topology = NULL;
if (g_kernel == kCPU) {
if (not g_cpuComputeController)
g_cpuComputeController = new Osd::CpuComputeController();
topology = new Topology<Osd::CpuComputeController,
Osd::CpuGLVertexBuffer>(g_cpuComputeController,
topology = new Topology<Osd::CpuEvaluator,
Osd::CpuGLVertexBuffer,
Far::StencilTables>(
patchTables,
vertexStencils, varyingStencils);
vertexStencils, varyingStencils,
numControlVertices);
#ifdef OPENSUBDIV_HAS_OPENMP
} else if (g_kernel == kOPENMP) {
if (not g_ompComputeController)
g_ompComputeController = new Osd::OmpComputeController();
topology = new Topology<Osd::OmpComputeController,
Osd::CpuGLVertexBuffer>(g_ompComputeController,
topology = new Topology<Osd::OmpEvaluator,
Osd::CpuGLVertexBuffer,
Far::StencilTables>(
patchTables,
vertexStencils, varyingStencils);
vertexStencils, varyingStencils,
numControlVertices);
#endif
#ifdef OPENSUBDIV_HAS_TBB
} else if (g_kernel == kTBB) {
if (not g_tbbComputeController)
g_tbbComputeController = new Osd::TbbComputeController();
topology = new Topology<Osd::TbbComputeController,
Osd::CpuGLVertexBuffer>(g_tbbComputeController,
topology = new Topology<Osd::TbbEvaluator,
Osd::CpuGLVertexBuffer,
Far::StencilTables>(
patchTables,
vertexStencils, varyingStencils);
vertexStencils, varyingStencils,
numControlVertices);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if (g_kernel == kCUDA) {
if (not g_cudaComputeController)
g_cudaComputeController = new Osd::CudaComputeController();
topology = new Topology<Osd::CudaComputeController,
Osd::CudaGLVertexBuffer>(g_cudaComputeController,
topology = new Topology<Osd::CudaEvaluator,
Osd::CudaGLVertexBuffer,
Osd::CudaStencilTables>(
patchTables,
vertexStencils, varyingStencils);
vertexStencils, varyingStencils,
numControlVertices);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
} else if (g_kernel == kCL) {
if (not g_clComputeController)
g_clComputeController = new Osd::CLComputeController(
g_clDeviceContext.GetContext(),
g_clDeviceContext.GetCommandQueue());
topology = new Topology<Osd::CLComputeController,
Osd::CLGLVertexBuffer,
CLDeviceContext>(g_clComputeController,
patchTables,
vertexStencils, varyingStencils,
&g_clDeviceContext);
static Osd::EvaluatorCacheT<Osd::CLEvaluator> clEvaluatorCache;
topology = new Topology<Osd::CLEvaluator,
Osd::CLGLVertexBuffer,
Osd::CLStencilTables,
CLDeviceContext>(
patchTables,
vertexStencils, varyingStencils,
numControlVertices,
&clEvaluatorCache,
&g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
} else if (g_kernel == kGLSL) {
if (not g_glslXFBComputeController)
g_glslXFBComputeController = new Osd::GLSLTransformFeedbackComputeController();
topology = new Topology<Osd::GLSLTransformFeedbackComputeController,
Osd::GLVertexBuffer>(g_glslXFBComputeController,
patchTables,
vertexStencils, varyingStencils);
static Osd::EvaluatorCacheT<Osd::GLXFBEvaluator> glXFBEvaluatorCache;
topology = new Topology<Osd::GLXFBEvaluator,
Osd::GLVertexBuffer,
Osd::GLStencilTablesTBO>(
patchTables,
vertexStencils, varyingStencils,
numControlVertices);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
} else if (g_kernel == kGLSLCompute) {
if (not g_glslComputeController)
g_glslComputeController = new Osd::GLSLComputeController();
topology = new Topology<Osd::GLSLComputeController,
Osd::GLVertexBuffer>(g_glslComputeController,
patchTables,
vertexStencils, varyingStencils);
static Osd::EvaluatorCacheT<Osd::GLComputeEvaluator> glComputeEvaluatorCache;
topology = new Topology<Osd::GLComputeEvaluator,
Osd::GLVertexBuffer,
Osd::GLStencilTablesSSBO>(
patchTables,
vertexStencils, varyingStencils,
numControlVertices);
#endif
} else {
}
delete refiner;
delete vertexStencils;
delete varyingStencils;
// XXX: Weired API. think again..
/// delete vertexStencils;
/// delete varyingStencils;
delete patchTables;
// centering rest position
@ -1291,28 +1330,6 @@ uninitGL() {
delete g_instances;
if (g_topology)
delete g_topology;
delete g_cpuComputeController;
#ifdef OPENSUBDIV_HAS_OPENMP
delete g_ompComputeController;
#endif
#ifdef OPENSUBDIV_HAS_TBB
delete g_tbbComputeController;
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
delete g_clComputeController;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
delete g_cudaComputeController;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
delete g_glslXFBComputeController;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
delete g_glslComputeController;
#endif
}
//------------------------------------------------------------------------------
@ -1453,7 +1470,7 @@ static void
callbackDisplayStyle(int b) {
g_displayStyle = b;
rebuildInstances();
rebuildOsdMesh();
}
static void

View File

@ -54,8 +54,7 @@ GLFWmonitor* g_primary=0;
#include <osd/cpuGLVertexBuffer.h>
#include <osd/cpuVertexBuffer.h>
#include <osd/cpuEvalStencilsContext.h>
#include <osd/cpuEvalStencilsController.h>
#include <osd/cpuEvaluator.h>
#include <cfloat>
#include <list>
@ -154,18 +153,12 @@ Osd::VertexBufferDescriptor g_controlDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*
g_outputDuDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 18 ),
g_outputDvDesc( /*offset*/ 9, /*legnth*/ 3, /*stride*/ 18 );
Osd::CpuEvalStencilsContext * g_evalCtx=0;
Osd::CpuEvalStencilsController g_evalCpuCtrl;
#if defined(OPENSUBDIV_HAS_OPENMP)
#include <osd/ompEvalStencilsController.h>
Osd::OmpEvalStencilsController g_evalOmpCtrl;
#include <osd/ompEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_TBB
#include <osd/tbbEvalStencilsController.h>
Osd::TbbEvalStencilsController g_evalTbbCtrl;
#include <osd/tbbEvaluator.h>
#endif
@ -200,48 +193,41 @@ updateGeom() {
float * ptr = g_stencilValues->BindCpuBuffer();
memset(ptr, 0, g_controlStencils->GetNumStencils() * 18 * sizeof(float));
// Uppdate random points by applying point & tangent stencils
// Update random points by applying point & tangent stencils
switch (g_kernel) {
case kCPU: {
g_evalCpuCtrl.UpdateValues<Osd::CpuVertexBuffer,Osd::CpuGLVertexBuffer>(
g_evalCtx,
g_controlDesc, g_controlValues,
g_outputDataDesc, g_stencilValues );
g_evalCpuCtrl.UpdateDerivs<Osd::CpuVertexBuffer,Osd::CpuGLVertexBuffer>(
g_evalCtx,
g_controlDesc, g_controlValues,
g_outputDuDesc, g_stencilValues,
g_outputDvDesc, g_stencilValues );
Osd::CpuEvaluator::EvalStencils(
g_controlValues, g_controlDesc, // input
g_stencilValues, g_outputDataDesc, // position
g_stencilValues, g_outputDuDesc, // Du
g_stencilValues, g_outputDvDesc, // Dv
// Normals will be filled afterwards
g_controlStencils);
} break;
#if defined(OPENSUBDIV_HAS_OPENMP)
case kOPENMP: {
g_evalOmpCtrl.UpdateValues<Osd::CpuVertexBuffer,Osd::CpuGLVertexBuffer>(
g_evalCtx,
g_controlDesc, g_controlValues,
g_outputDataDesc, g_stencilValues );
g_evalOmpCtrl.UpdateDerivs<Osd::CpuVertexBuffer,Osd::CpuGLVertexBuffer>(
g_evalCtx,
g_controlDesc, g_controlValues,
g_outputDuDesc, g_stencilValues,
g_outputDvDesc, g_stencilValues );
// FIXME: implements OmpEvaluator
Osd::CpuEvaluator::EvalStencils(
g_controlValues, g_controlDesc, // input
g_stencilValues, g_outputDataDesc, // position
g_stencilValues, g_outputDuDesc, // Du
g_stencilValues, g_outputDvDesc, // Dv
// Normals will be filled afterwards
g_controlStencils);
} break;
#endif
#if defined(OPENSUBDIV_HAS_TBB)
// FIXME: implements TbbEvaluator
case kTBB: {
g_evalTbbCtrl.UpdateValues<Osd::CpuVertexBuffer,Osd::CpuGLVertexBuffer>(
g_evalCtx,
g_controlDesc, g_controlValues,
g_outputDataDesc, g_stencilValues );
g_evalTbbCtrl.UpdateDerivs<Osd::CpuVertexBuffer,Osd::CpuGLVertexBuffer>(
g_evalCtx,
g_controlDesc, g_controlValues,
g_outputDuDesc, g_stencilValues,
g_outputDvDesc, g_stencilValues );
Osd::CpuEvaluator::EvalStencils(
g_controlValues, g_controlDesc, // input
g_stencilValues, g_outputDataDesc, // position
g_stencilValues, g_outputDuDesc, // Du
g_stencilValues, g_outputDvDesc, // Dv
// Normals will be filled afterwards
g_controlStencils);
} break;
#endif
default:
@ -364,9 +350,6 @@ createMesh(ShapeDesc const & shapeDesc, int level) {
g_controlValues = Osd::CpuVertexBuffer::Create(3, nverts);
// Create eval context & data buffers
delete g_evalCtx;
g_evalCtx = Osd::CpuEvalStencilsContext::Create(g_controlStencils);
delete g_stencilValues;
g_stencilValues = Osd::CpuGLVertexBuffer::Create(3, g_controlStencils->GetNumStencils() * 6 );
@ -674,6 +657,7 @@ drawStencils() {
g_samplesProgram.EnableVertexAttributes();
glDrawArrays(GL_POINTS, 0, numEdges*2);
glDrawArrays(GL_LINES, 0, numEdges*2);
glBindVertexArray(0);

View File

@ -46,55 +46,41 @@ GLFWmonitor* g_primary=0;
#include <osd/glDrawContext.h>
#include <osd/glDrawRegistry.h>
#include <osd/cpuEvaluator.h>
#include <osd/cpuGLVertexBuffer.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
#ifdef OPENSUBDIV_HAS_OPENMP
#include <osd/ompComputeController.h>
OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL;
#include <osd/ompEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_TBB
#include <osd/tbbComputeController.h>
OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL;
#include <osd/tbbEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clGLVertexBuffer.h>
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
#include <osd/clEvaluator.h>
#include "../common/clDeviceContext.h"
CLDeviceContext g_clDeviceContext;
OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
#include <osd/cudaGLVertexBuffer.h>
#include <osd/cudaComputeContext.h>
#include <osd/cudaComputeController.h>
#include <osd/cudaEvaluator.h>
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
#include <osd/glslTransformFeedbackComputeContext.h>
#include <osd/glslTransformFeedbackComputeController.h>
#include <osd/glXFBEvaluator.h>
#include <osd/glVertexBuffer.h>
OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslTransformFeedbackComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
#include <osd/glslComputeContext.h>
#include <osd/glslComputeController.h>
#include <osd/glComputeEvaluator.h>
#include <osd/glVertexBuffer.h>
OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL;
#endif
#include <osd/glMesh.h>
@ -438,13 +424,7 @@ updateGeom() {
Stopwatch s;
s.Start();
if (g_displayStyle == kInterleavedVaryingColor) {
OpenSubdiv::Osd::VertexBufferDescriptor vertexDesc(0, 3, 7);
OpenSubdiv::Osd::VertexBufferDescriptor varyingDesc(3, 4, 7);
g_mesh->Refine(&vertexDesc, &varyingDesc, true);
} else {
g_mesh->Refine();
}
g_mesh->Refine();
s.Stop();
g_cpuTime = float(s.GetElapsed() * 1000.0f);
@ -481,7 +461,8 @@ getKernelName(int kernel) {
static void
createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=kCatmark) {
typedef OpenSubdiv::Far::ConstIndexArray IndexArray;
using namespace OpenSubdiv;
typedef Far::ConstIndexArray IndexArray;
bool doAnim = g_objAnim and g_currentShape==0;
@ -493,12 +474,12 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=
}
// create Vtr mesh (topology)
OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape);
OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape);
Sdc::SchemeType sdctype = GetSdcType(*shape);
Sdc::Options sdcoptions = GetSdcOptions(*shape);
OpenSubdiv::Far::TopologyRefiner * refiner =
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Create(*shape,
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Options(sdctype, sdcoptions));
Far::TopologyRefiner * refiner =
Far::TopologyRefinerFactory<Shape>::Create(*shape,
Far::TopologyRefinerFactory<Shape>::Options(sdctype, sdcoptions));
// save coarse topology (used for coarse mesh drawing)
int nedges = refiner->GetNumEdges(0),
@ -533,117 +514,104 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=
interleaveVarying = g_displayStyle == kInterleavedVaryingColor,
doSingleCreasePatch = (g_singleCreasePatch!=0 and g_scheme==kCatmark);
OpenSubdiv::Osd::MeshBitset bits;
bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive);
bits.set(OpenSubdiv::Osd::MeshUseSingleCreasePatch, doSingleCreasePatch);
bits.set(OpenSubdiv::Osd::MeshInterleaveVarying, interleaveVarying);
bits.set(OpenSubdiv::Osd::MeshFVarData, g_displayStyle == kFaceVaryingColor);
bits.set(OpenSubdiv::Osd::MeshEndCapBSplineBasis, g_endCap == kEndCapBSplineBasis);
bits.set(OpenSubdiv::Osd::MeshEndCapGregoryBasis, g_endCap == kEndCapGregoryBasis);
bits.set(OpenSubdiv::Osd::MeshEndCapLegacyGregory, g_endCap == kEndCapLegacyGregory);
Osd::MeshBitset bits;
bits.set(Osd::MeshAdaptive, doAdaptive);
bits.set(Osd::MeshUseSingleCreasePatch, doSingleCreasePatch);
bits.set(Osd::MeshInterleaveVarying, interleaveVarying);
bits.set(Osd::MeshFVarData, g_displayStyle == kFaceVaryingColor);
bits.set(Osd::MeshEndCapBSplineBasis, g_endCap == kEndCapBSplineBasis);
bits.set(Osd::MeshEndCapGregoryBasis, g_endCap == kEndCapGregoryBasis);
bits.set(Osd::MeshEndCapLegacyGregory, g_endCap == kEndCapLegacyGregory);
int numVertexElements = 3;
int numVaryingElements =
(g_displayStyle == kVaryingColor or interleaveVarying) ? 4 : 0;
if (kernel == kCPU) {
if (not g_cpuComputeController) {
g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuGLVertexBuffer,
OpenSubdiv::Osd::CpuComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_cpuComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
g_mesh = new Osd::Mesh<Osd::CpuGLVertexBuffer,
Far::StencilTables,
Osd::CpuEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#ifdef OPENSUBDIV_HAS_OPENMP
} else if (kernel == kOPENMP) {
if (not g_ompComputeController) {
g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuGLVertexBuffer,
OpenSubdiv::Osd::OmpComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_ompComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
g_mesh = new Osd::Mesh<Osd::CpuGLVertexBuffer,
Far::StencilTables,
Osd::OmpEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#endif
#ifdef OPENSUBDIV_HAS_TBB
} else if (kernel == kTBB) {
if (not g_tbbComputeController) {
g_tbbComputeController = new OpenSubdiv::Osd::TbbComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CpuGLVertexBuffer,
OpenSubdiv::Osd::TbbComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_tbbComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
g_mesh = new Osd::Mesh<Osd::CpuGLVertexBuffer,
Far::StencilTables,
Osd::TbbEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
} else if(kernel == kCL) {
if (not g_clComputeController) {
g_clComputeController = new OpenSubdiv::Osd::CLComputeController(
g_clDeviceContext.GetContext(),
g_clDeviceContext.GetCommandQueue());
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CLGLVertexBuffer,
OpenSubdiv::Osd::CLComputeController,
OpenSubdiv::Osd::GLDrawContext,
CLDeviceContext>(
g_clComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, &g_clDeviceContext);
// CLKernel
static Osd::EvaluatorCacheT<Osd::CLEvaluator> clEvaluatorCache;
g_mesh = new Osd::Mesh<Osd::CLGLVertexBuffer,
Osd::CLStencilTables,
Osd::CLEvaluator,
Osd::GLDrawContext,
CLDeviceContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits,
&clEvaluatorCache,
&g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if(kernel == kCUDA) {
if (not g_cudaComputeController) {
g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CudaGLVertexBuffer,
OpenSubdiv::Osd::CudaComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_cudaComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
g_mesh = new Osd::Mesh<Osd::CudaGLVertexBuffer,
Osd::CudaStencilTables,
Osd::CudaEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
} else if(kernel == kGLSL) {
if (not g_glslTransformFeedbackComputeController) {
g_glslTransformFeedbackComputeController = new OpenSubdiv::Osd::GLSLTransformFeedbackComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::GLVertexBuffer,
OpenSubdiv::Osd::GLSLTransformFeedbackComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_glslTransformFeedbackComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
static Osd::EvaluatorCacheT<Osd::GLXFBEvaluator> glXFBEvaluatorCache;
g_mesh = new Osd::Mesh<Osd::GLVertexBuffer,
Osd::GLStencilTablesTBO,
Osd::GLXFBEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits,
&glXFBEvaluatorCache);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
} else if(kernel == kGLSLCompute) {
if (not g_glslComputeController) {
g_glslComputeController = new OpenSubdiv::Osd::GLSLComputeController();
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::GLVertexBuffer,
OpenSubdiv::Osd::GLSLComputeController,
OpenSubdiv::Osd::GLDrawContext>(
g_glslComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits);
static Osd::EvaluatorCacheT<Osd::GLComputeEvaluator> glComputeEvaluatorCache;
g_mesh = new Osd::Mesh<Osd::GLVertexBuffer,
Osd::GLStencilTablesSSBO,
Osd::GLComputeEvaluator,
Osd::GLDrawContext>(
refiner,
numVertexElements,
numVaryingElements,
level, bits,
&glComputeEvaluatorCache);
#endif
} else {
printf("Unsupported kernel %s\n", getKernelName(kernel));
@ -1384,28 +1352,6 @@ uninitGL() {
if (g_mesh)
delete g_mesh;
delete g_cpuComputeController;
#ifdef OPENSUBDIV_HAS_OPENMP
delete g_ompComputeController;
#endif
#ifdef OPENSUBDIV_HAS_TBB
delete g_tbbComputeController;
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
delete g_clComputeController;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
delete g_cudaComputeController;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
delete g_glslTransformFeedbackComputeController;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
delete g_glslComputeController;
#endif
}
//------------------------------------------------------------------------------

View File

@ -64,8 +64,6 @@
#include <far/stencilTablesFactory.h>
#include <osd/mesh.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
#include <osd/cpuVertexBuffer.h>

View File

@ -27,20 +27,11 @@
#-------------------------------------------------------------------------------
# source & headers
set(CPU_SOURCE_FILES
cpuEvaluator.cpp
cpuKernel.cpp
cpuComputeController.cpp
cpuComputeContext.cpp
cpuEvalLimitContext.cpp
cpuEvalLimitController.cpp
cpuEvalLimitKernel.cpp
cpuEvalStencilsContext.cpp
cpuEvalStencilsController.cpp
cpuSmoothNormalContext.cpp
cpuSmoothNormalController.cpp
cpuVertexBuffer.cpp
drawContext.cpp
drawRegistry.cpp
evalLimitContext.cpp
)
set(GPU_SOURCE_FILES )
@ -50,21 +41,11 @@ set(INC_FILES )
set(PRIVATE_HEADER_FILES
debug.h
cpuKernel.h
cpuEvalLimitKernel.h
)
set(PUBLIC_HEADER_FILES
computeController.h
cpuComputeContext.h
cpuComputeController.h
cpuEvalLimitContext.h
cpuEvalLimitController.h
cpuEvalStencilsContext.h
cpuEvalStencilsController.h
cpuSmoothNormalContext.h
cpuSmoothNormalController.h
cpuEvaluator.h
cpuVertexBuffer.h
evalLimitContext.h
mesh.h
nonCopyable.h
opengl.h
@ -75,21 +56,16 @@ set(PUBLIC_HEADER_FILES
set(DOXY_HEADER_FILES ${PUBLIC_HEADER_FILES})
#-------------------------------------------------------------------------------
set(OPENMP_PUBLIC_HEADERS
ompEvaluator.h
ompKernel.h
ompComputeController.h
ompEvalStencilsController.h
ompSmoothNormalController.h
)
if(OPENMP_FOUND )
list(APPEND CPU_SOURCE_FILES
ompEvaluator.cpp
ompKernel.cpp
ompComputeController.cpp
ompEvalStencilsController.cpp
ompSmoothNormalController.cpp
)
list(APPEND PUBLIC_HEADER_FILES ${OPENMP_PUBLIC_HEADERS})
@ -103,20 +79,16 @@ list(APPEND DOXY_HEADER_FILES ${OPENMP_PUBLIC_HEADERS})
#-------------------------------------------------------------------------------
set(TBB_PUBLIC_HEADERS
tbbEvaluator.h
tbbKernel.h
tbbComputeController.h
tbbEvalStencilsController.h
tbbSmoothNormalController.h
)
if( TBB_FOUND )
include_directories("${TBB_INCLUDE_DIR}")
list(APPEND CPU_SOURCE_FILES
tbbEvaluator.cpp
tbbKernel.cpp
tbbComputeController.cpp
tbbEvalStencilsController.cpp
tbbSmoothNormalController.cpp
)
list(APPEND PUBLIC_HEADER_FILES ${TBB_PUBLIC_HEADERS})
@ -166,18 +138,16 @@ list(APPEND DOXY_HEADER_FILES ${GL_PUBLIC_HEADERS})
# OpenGL 4.2 dependencies
# note : (GLSL transform feedback kernels require GL 4.2)
set(GL_4_2_PUBLIC_HEADERS
glslTransformFeedbackComputeContext.h
glslTransformFeedbackComputeController.h
glXFBEvaluator.h
)
if( OPENGL_4_2_FOUND )
list(APPEND GPU_SOURCE_FILES
glslTransformFeedbackComputeContext.cpp
glslTransformFeedbackComputeController.cpp
glXFBEvaluator.cpp
)
list(APPEND PUBLIC_HEADER_FILES ${GL_4_2_PUBLIC_HEADERS})
list(APPEND KERNEL_FILES
glslTransformFeedbackKernel.glsl
glslXFBKernel.glsl
)
list(APPEND PLATFORM_GPU_LIBRARIES
${GLEW_LIBRARY}
@ -191,14 +161,12 @@ list(APPEND DOXY_HEADER_FILES ${GL_4_2_PUBLIC_HEADERS})
# OpenGL 4.3 dependencies
# note : (GLSL compute shader kernels require GL 4.3)
set(GL_4_3_PUBLIC_HEADERS
glslComputeContext.h
glslComputeController.h
glComputeEvaluator.h
)
if( OPENGL_4_3_FOUND )
list(APPEND GPU_SOURCE_FILES
glslComputeContext.cpp
glslComputeController.cpp
glComputeEvaluator.cpp
)
list(APPEND PUBLIC_HEADER_FILES ${GL_4_3_PUBLIC_HEADERS})
list(APPEND KERNEL_FILES
@ -215,8 +183,7 @@ list(APPEND DOXY_HEADER_FILES ${GL_4_3_PUBLIC_HEADERS})
# DX11 code & dependencies
set(DXSDK_PUBLIC_HEADERS
cpuD3D11VertexBuffer.h
d3d11ComputeContext.h
d3d11ComputeController.h
d3d11ComputeEvaluator.h
d3d11DrawContext.h
d3d11DrawRegistry.h
d3d11VertexBuffer.h
@ -225,8 +192,7 @@ set(DXSDK_PUBLIC_HEADERS
if( DXSDK_FOUND )
list(APPEND GPU_SOURCE_FILES
cpuD3D11VertexBuffer.cpp
d3d11ComputeContext.cpp
d3d11ComputeController.cpp
d3d11ComputeEvaluator.cpp
d3d11DrawContext.cpp
d3d11DrawRegistry.cpp
d3d11VertexBuffer.cpp
@ -248,16 +214,14 @@ list(APPEND DOXY_HEADER_FILES ${DXSDK_PUBLIC_HEADERS})
#-------------------------------------------------------------------------------
# OpenCL code & dependencies
set(OPENCL_PUBLIC_HEADERS
clComputeContext.h
clComputeController.h
clEvaluator.h
clVertexBuffer.h
opencl.h
)
if ( OPENCL_FOUND )
list(APPEND GPU_SOURCE_FILES
clComputeContext.cpp
clComputeController.cpp
clEvaluator.cpp
clVertexBuffer.cpp
)
list(APPEND PUBLIC_HEADER_FILES ${OPENCL_PUBLIC_HEADERS})
@ -288,15 +252,13 @@ list(APPEND DOXY_HEADER_FILES ${OPENCL_PUBLIC_HEADERS})
#-------------------------------------------------------------------------------
# CUDA code & dependencies
set(CUDA_PUBLIC_HEADERS
cudaComputeContext.h
cudaComputeController.h
cudaEvaluator.h
cudaVertexBuffer.h
)
if( CUDA_FOUND )
list(APPEND GPU_SOURCE_FILES
cudaComputeController.cpp
cudaComputeContext.cpp
cudaEvaluator.cpp
cudaVertexBuffer.cpp
)
list(APPEND PUBLIC_HEADER_FILES ${CUDA_PUBLIC_HEADERS})

View File

@ -1,233 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/clComputeContext.h"
#include <vector>
#include "../far/stencilTables.h"
#include "../far/error.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
// -----------------------------------------------------------------------------
template <class T> cl_mem
createCLBuffer(std::vector<T> const & src, cl_context clContext) {
cl_mem devicePtr = 0;
cl_int errNum = 0;
devicePtr = clCreateBuffer(clContext,
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
src.size()*sizeof(T),
(void*)(&src.at(0)),
&errNum);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", errNum);
}
return devicePtr;
}
// -----------------------------------------------------------------------------
class CLComputeContext::CLStencilTables {
public:
CLStencilTables(Far::StencilTables const & stencilTables,
cl_context clContext) {
_numStencils = stencilTables.GetNumStencils();
if (_numStencils > 0) {
_sizes = createCLBuffer(stencilTables.GetSizes(), clContext);
_offsets = createCLBuffer(stencilTables.GetOffsets(), clContext);
_indices = createCLBuffer(stencilTables.GetControlIndices(),
clContext);
_weights = createCLBuffer(stencilTables.GetWeights(), clContext);
} else {
_sizes = _offsets = _indices = _weights = NULL;
}
}
~CLStencilTables() {
if (_sizes) clReleaseMemObject(_sizes);
if (_offsets) clReleaseMemObject(_offsets);
if (_indices) clReleaseMemObject(_indices);
if (_weights) clReleaseMemObject(_weights);
}
bool IsValid() const {
return _sizes and _offsets and _indices and _weights;
}
cl_mem GetSizes() const {
return _sizes;
}
cl_mem GetOffsets() const {
return _offsets;
}
cl_mem GetIndices() const {
return _indices;
}
cl_mem GetWeights() const {
return _weights;
}
int GetNumStencils() const {
return _numStencils;
}
private:
cl_mem _sizes,
_offsets,
_indices,
_weights;
int _numStencils;
};
// -----------------------------------------------------------------------------
CLComputeContext::CLComputeContext(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
cl_context clContext) :
_vertexStencilTables(0), _varyingStencilTables(0),
_numControlVertices(0) {
if (vertexStencilTables) {
_vertexStencilTables = new CLStencilTables(*vertexStencilTables,
clContext);
_numControlVertices = vertexStencilTables->GetNumControlVertices();
}
if (varyingStencilTables) {
_varyingStencilTables = new CLStencilTables(*varyingStencilTables,
clContext);
if (_numControlVertices) {
assert(_numControlVertices
== varyingStencilTables->GetNumControlVertices());
} else {
_numControlVertices = varyingStencilTables->GetNumControlVertices();
}
}
}
CLComputeContext::~CLComputeContext() {
delete _vertexStencilTables;
delete _varyingStencilTables;
}
// ----------------------------------------------------------------------------
bool
CLComputeContext::HasVertexStencilTables() const {
return _vertexStencilTables ? _vertexStencilTables->IsValid() : false;
}
bool
CLComputeContext::HasVaryingStencilTables() const {
return _varyingStencilTables ? _varyingStencilTables->IsValid() : false;
}
int
CLComputeContext::GetNumStencilsInVertexStencilTables() const {
return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0;
}
int
CLComputeContext::GetNumStencilsInVaryingStencilTables() const {
return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0;
}
// ----------------------------------------------------------------------------
cl_mem
CLComputeContext::GetVertexStencilTablesSizes() const {
return _vertexStencilTables ? _vertexStencilTables->GetSizes() : 0;
}
cl_mem
CLComputeContext::GetVertexStencilTablesOffsets() const {
return _vertexStencilTables ? _vertexStencilTables->GetOffsets() : 0;
}
cl_mem
CLComputeContext::GetVertexStencilTablesIndices() const {
return _vertexStencilTables ? _vertexStencilTables->GetIndices() : 0;
}
cl_mem
CLComputeContext::GetVertexStencilTablesWeights() const {
return _vertexStencilTables ? _vertexStencilTables->GetWeights() : 0;
}
// ----------------------------------------------------------------------------
cl_mem
CLComputeContext::GetVaryingStencilTablesSizes() const {
return _varyingStencilTables ? _varyingStencilTables->GetSizes() : 0;
}
cl_mem
CLComputeContext::GetVaryingStencilTablesOffsets() const {
return _varyingStencilTables ? _varyingStencilTables->GetOffsets() : 0;
}
cl_mem
CLComputeContext::GetVaryingStencilTablesIndices() const {
return _varyingStencilTables ? _varyingStencilTables->GetIndices() : 0;
}
cl_mem
CLComputeContext::GetVaryingStencilTablesWeights() const {
return _varyingStencilTables ? _varyingStencilTables->GetWeights() : 0;
}
// -----------------------------------------------------------------------------
CLComputeContext *
CLComputeContext::Create(Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
cl_context clContext) {
CLComputeContext *result =
new CLComputeContext(
vertexStencilTables, varyingStencilTables, clContext);
return result;
}
// -----------------------------------------------------------------------------
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,155 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CL_COMPUTE_CONTEXT_H
#define OSD_CL_COMPUTE_CONTEXT_H
#include "../version.h"
#include "../osd/nonCopyable.h"
#include "../osd/opencl.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far { class StencilTables; }
namespace Osd {
///
/// \brief OpenCL Refine Context
///
/// The OpenCL-Compute implementation of the Refine module contextual functionality.
///
/// Contexts interface the serialized topological data pertaining to the
/// geometric primitives with the capabilities of the selected discrete
/// compute device.
///
class CLComputeContext : public NonCopyable<CLComputeContext> {
public:
/// Creates an CLComputeContext instance
///
/// @param vertexStencilTables The Far::StencilTables used for vertex
/// interpolation
///
/// @param varyingStencilTables The Far::StencilTables used for varying
/// interpolation
///
/// @param clContext An active OpenCL compute context
///
static CLComputeContext * Create(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
cl_context clContext);
/// Creates an CLComputeContext instance (template version)
///
/// @param vertexStencilTables The Far::StencilTables used for vertex
/// interpolation
///
/// @param varyingStencilTables The Far::StencilTables used for varying
/// interpolation
///
/// @param context A user defined class to provide cl_context.
/// must implement GetContext()
///
template<typename DEVICE_CONTEXT>
static CLComputeContext * Create(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
DEVICE_CONTEXT context) {
return Create(vertexStencilTables, varyingStencilTables,
context->GetContext());
}
/// Destructor
virtual ~CLComputeContext();
/// Returns true if the Context has a 'vertex' interpolation stencil table
bool HasVertexStencilTables() const;
/// Returns true if the Context has a 'varying' interpolation stencil table
bool HasVaryingStencilTables() const;
/// Returns the number of control vertices
int GetNumControlVertices() const {
return _numControlVertices;
}
/// Returns the number of stencils in vertex stencil table
int GetNumStencilsInVertexStencilTables() const;
/// Returns the number of stencils in varying stencil table
int GetNumStencilsInVaryingStencilTables() const;
/// Returns the Cuda buffer containing vertex-stencil stencil sizes
cl_mem GetVertexStencilTablesSizes() const;
/// Returns the Cuda buffer containing vertex-stencil stencil offsets
cl_mem GetVertexStencilTablesOffsets() const;
/// Returns the Cuda buffer containing vertex-stencil stencil indices
cl_mem GetVertexStencilTablesIndices() const;
/// Returns the Cuda buffer containing vertex-stencil stencil weights
cl_mem GetVertexStencilTablesWeights() const;
/// Returns the Cuda buffer containing Varying-stencil stencil sizes
cl_mem GetVaryingStencilTablesSizes() const;
/// Returns the Cuda buffer containing Varying-stencil stencil offsets
cl_mem GetVaryingStencilTablesOffsets() const;
/// Returns the Cuda buffer containing Varying-stencil stencil indices
cl_mem GetVaryingStencilTablesIndices() const;
/// Returns the Cuda buffer containing Varying-stencil stencil weights
cl_mem GetVaryingStencilTablesWeights() const;
protected:
explicit CLComputeContext(Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
cl_context clContext);
private:
class CLStencilTables;
CLStencilTables * _vertexStencilTables,
* _varyingStencilTables;
int _numControlVertices;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_CL_COMPUTE_CONTEXT_H

View File

@ -1,285 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/clComputeController.h"
#include "../far/error.h"
#if defined(_WIN32)
#include <windows.h>
#endif
#include <algorithm>
#include <string.h>
#include <sstream>
#include <cassert>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
static const char *clSource =
#include "clKernel.gen.h"
;
// -----------------------------------------------------------------------------
static cl_kernel buildKernel(cl_program prog, const char * name) {
cl_int errNum;
cl_kernel k = clCreateKernel(prog, name, &errNum);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel '%s' (%d)\n", name, errNum);
}
return k;
}
// -----------------------------------------------------------------------------
class CLComputeController::KernelBundle :
NonCopyable<CLComputeController::KernelBundle> {
public:
bool Compile(cl_context clContext,
VertexBufferDescriptor const & srcDesc,
VertexBufferDescriptor const & dstDesc) {
cl_int errNum;
// XXX: only store srcDesc.
// this is ok since currently this kernel doesn't get called with
// different strides for src and dst. This function will be
// refactored soon.
_desc = VertexBufferDescriptor(0, srcDesc.length, srcDesc.stride);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< "#define DST_STRIDE " << dstDesc.stride << "\n";
std::string defineStr = defines.str();
const char *sources[] = { defineStr.c_str(), clSource };
_program = clCreateProgramWithSource(clContext, 2, sources, 0, &errNum);
if (errNum!=CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"clCreateProgramWithSource (%d)", errNum);
}
errNum = clBuildProgram(_program, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clBuildProgram (%d) \n", errNum);
cl_int numDevices = 0;
clGetContextInfo(clContext,
CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL);
cl_device_id *devices = new cl_device_id[numDevices];
clGetContextInfo(clContext, CL_CONTEXT_DEVICES,
sizeof(cl_device_id)*numDevices, devices, NULL);
for (int i = 0; i < numDevices; ++i) {
char cBuildLog[10240];
clGetProgramBuildInfo(_program, devices[i],
CL_PROGRAM_BUILD_LOG, sizeof(cBuildLog), cBuildLog, NULL);
Far::Error(Far::FAR_RUNTIME_ERROR, cBuildLog);
}
delete[] devices;
return false;
}
// compile all cl compute kernels
_stencilsKernel = buildKernel(_program, "computeStencils");
return true;
}
cl_kernel GetStencilsKernel() const {
return _stencilsKernel;
}
struct Match {
Match(VertexBufferDescriptor const & d) : desc(d) { }
bool operator() (KernelBundle const * kernel) {
return (desc.length==kernel->_desc.length and
desc.stride==kernel->_desc.stride);
}
VertexBufferDescriptor desc;
};
private:
cl_program _program;
cl_kernel _stencilsKernel;
VertexBufferDescriptor _desc;
};
// ----------------------------------------------------------------------------
void
CLComputeController::ApplyStencilTableKernel(ComputeContext const *context) {
assert(context);
cl_int errNum;
size_t globalWorkSize = 0;
if (context->HasVertexStencilTables()) {
int start = 0;
int end = context->GetNumStencilsInVertexStencilTables();
globalWorkSize = (size_t)(end - start);
KernelBundle const * bundle = getKernel(_currentBindState.vertexDesc);
cl_kernel kernel = bundle->GetStencilsKernel();
cl_mem sizes = context->GetVertexStencilTablesSizes(),
offsets = context->GetVertexStencilTablesOffsets(),
indices = context->GetVertexStencilTablesIndices(),
weights = context->GetVertexStencilTablesWeights();
cl_mem src = _currentBindState.vertexBuffer;
cl_mem dst = _currentBindState.vertexBuffer;
VertexBufferDescriptor srcDesc = _currentBindState.vertexDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride;
clSetKernelArg(kernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(kernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(kernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &sizes);
clSetKernelArg(kernel, 5, sizeof(cl_mem), &offsets);
clSetKernelArg(kernel, 6, sizeof(cl_mem), &indices);
clSetKernelArg(kernel, 7, sizeof(cl_mem), &weights);
clSetKernelArg(kernel, 8, sizeof(int), &start);
clSetKernelArg(kernel, 9, sizeof(int), &end);
errNum = clEnqueueNDRangeKernel(
_clQueue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL);
if (errNum!=CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyStencilTableKernel (%d) ", errNum);
}
}
if (context->HasVaryingStencilTables()) {
int start = 0;
int end = context->GetNumStencilsInVaryingStencilTables();
globalWorkSize = (size_t)(end - start);
KernelBundle const * bundle = getKernel(_currentBindState.varyingDesc);
cl_kernel kernel = bundle->GetStencilsKernel();
cl_mem sizes = context->GetVaryingStencilTablesSizes(),
offsets = context->GetVaryingStencilTablesOffsets(),
indices = context->GetVaryingStencilTablesIndices(),
weights = context->GetVaryingStencilTablesWeights();
cl_mem src = _currentBindState.varyingBuffer;
cl_mem dst = _currentBindState.varyingBuffer;
VertexBufferDescriptor srcDesc = _currentBindState.varyingDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride;
clSetKernelArg(kernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(kernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(kernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &sizes);
clSetKernelArg(kernel, 5, sizeof(cl_mem), &offsets);
clSetKernelArg(kernel, 6, sizeof(cl_mem), &indices);
clSetKernelArg(kernel, 7, sizeof(cl_mem), &weights);
clSetKernelArg(kernel, 8, sizeof(int), &start);
clSetKernelArg(kernel, 9, sizeof(int), &end);
errNum = clEnqueueNDRangeKernel(
_clQueue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL);
if (errNum!=CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyStencilTableKernel (%d)", errNum);
}
}
}
// ----------------------------------------------------------------------------
CLComputeController::KernelBundle const *
CLComputeController::getKernel(VertexBufferDescriptor const &desc) {
KernelRegistry::iterator it =
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
KernelBundle::Match(desc));
if (it != _kernelRegistry.end()) {
return *it;
} else {
KernelBundle * kernelBundle = new KernelBundle();
kernelBundle->Compile(_clContext, desc, desc);
_kernelRegistry.push_back(kernelBundle);
return kernelBundle;
}
}
// ----------------------------------------------------------------------------
CLComputeController::CLComputeController(
cl_context clContext, cl_command_queue queue) :
_clContext(clContext), _clQueue(queue) {
}
CLComputeController::~CLComputeController() {
for (KernelRegistry::iterator it = _kernelRegistry.begin();
it != _kernelRegistry.end(); ++it) {
delete *it;
}
}
// ----------------------------------------------------------------------------
void
CLComputeController::Synchronize() {
clFinish(_clQueue);
}
// -----------------------------------------------------------------------------
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,204 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CL_COMPUTE_CONTROLLER_H
#define OSD_CL_COMPUTE_CONTROLLER_H
#include "../version.h"
#include "../osd/clComputeContext.h"
#include "../osd/vertexDescriptor.h"
#include "../osd/opencl.h"
#include <vector>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
class CLKernelBundle;
/// \brief Compute controller for launching OpenCL Compute subdivision kernels.
///
/// CLComputeController is a compute controller class to launch
/// OpenCL subdivision kernels. It requires CLVertexBufferInterface
/// as arguments of Refine function.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class CLComputeController {
public:
typedef CLComputeContext ComputeContext;
/// Constructor.
///
/// @param clContext a valid instanciated OpenCL context
///
/// @param queue a valid non-zero OpenCL command queue
///
CLComputeController(cl_context clContext, cl_command_queue queue);
/// Destructor.
~CLComputeController();
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The CLContext to apply refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
/// @param vertexDesc The descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingBuffer Vertex-interpolated data buffer
///
/// @param varyingDesc The descriptor of varying elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Compute( CLComputeContext const * context,
VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc=NULL,
VertexBufferDescriptor const * varyingDesc=NULL ){
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
ApplyStencilTableKernel(context);
unbind();
}
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The CLContext to apply refinement operations to
///
/// @param batches Vector of batches of vertices organized by operative
/// kernel
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
template<class VERTEX_BUFFER>
void Compute(CLComputeContext const * context,
VERTEX_BUFFER *vertexBuffer) {
Compute<VERTEX_BUFFER>(context, vertexBuffer, (VERTEX_BUFFER*)0);
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
/// Returns CL context
cl_context GetContext() const { return _clContext; }
/// Returns CL command queue
cl_command_queue GetCommandQueue() const { return _clQueue; }
protected:
void ApplyStencilTableKernel(ComputeContext const *context);
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind( VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc,
VertexBufferDescriptor const * varyingDesc ) {
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0;
_currentBindState.vertexDesc =
VertexBufferDescriptor(0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0;
_currentBindState.varyingDesc =
VertexBufferDescriptor(0, numElements, numElements);
}
_currentBindState.vertexBuffer = vertexBuffer ?
vertexBuffer->BindCLBuffer(_clQueue) : 0;
_currentBindState.varyingBuffer = varyingBuffer ?
varyingBuffer->BindCLBuffer(_clQueue) : 0;
}
void unbind() {
_currentBindState.Reset();
}
private:
class KernelBundle;
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of the vertex buffers.
struct BindState {
BindState() : vertexBuffer(0), varyingBuffer(0) { }
void Reset() {
vertexBuffer = varyingBuffer = NULL;
vertexDesc.Reset();
varyingDesc.Reset();
}
cl_mem vertexBuffer,
varyingBuffer;
VertexBufferDescriptor vertexDesc,
varyingDesc;
};
BindState _currentBindState;
KernelBundle const * getKernel(VertexBufferDescriptor const &desc);
typedef std::vector<KernelBundle *> KernelRegistry;
KernelRegistry _kernelRegistry;
cl_context _clContext;
cl_command_queue _clQueue;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_COMPUTE_CONTROLLER_H

0
opensubdiv/osd/clD3D11VertexBuffer.cpp Executable file → Normal file
View File

View File

@ -51,7 +51,7 @@ namespace Osd {
/// D3D11VertexBuffer implements CLVertexBufferInterface and
/// D3D11VertexBufferInterface.
///
/// An instance of this buffer class can be passed to D3D11ComputeController.
/// An instance of this buffer class can be passed to D3D11ComputeEvaluator.
///
class CLD3D11VertexBuffer {
public:

View File

@ -0,0 +1,206 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/clEvaluator.h"
#include <sstream>
#include <string>
#include <vector>
#include "../osd/opencl.h"
#include "../far/error.h"
#include "../far/stencilTables.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
static const char *clSource =
#include "clKernel.gen.h"
;
// ----------------------------------------------------------------------------
template <class T> cl_mem
createCLBuffer(std::vector<T> const & src, cl_context clContext) {
cl_int errNum = 0;
cl_mem devicePtr = clCreateBuffer(clContext,
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
src.size()*sizeof(T),
(void*)(&src.at(0)),
&errNum);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", errNum);
}
return devicePtr;
}
// ----------------------------------------------------------------------------
CLStencilTables::CLStencilTables(Far::StencilTables const *stencilTables,
cl_context clContext) {
_numStencils = stencilTables->GetNumStencils();
if (_numStencils > 0) {
_sizes = createCLBuffer(stencilTables->GetSizes(), clContext);
_offsets = createCLBuffer(stencilTables->GetOffsets(), clContext);
_indices = createCLBuffer(stencilTables->GetControlIndices(),
clContext);
_weights = createCLBuffer(stencilTables->GetWeights(), clContext);
} else {
_sizes = _offsets = _indices = _weights = NULL;
}
}
CLStencilTables::~CLStencilTables() {
if (_sizes) clReleaseMemObject(_sizes);
if (_offsets) clReleaseMemObject(_offsets);
if (_indices) clReleaseMemObject(_indices);
if (_weights) clReleaseMemObject(_weights);
}
// ---------------------------------------------------------------------------
CLEvaluator::CLEvaluator(cl_context context, cl_command_queue queue)
: _clContext(context), _clCommandQueue(queue),
_program(NULL), _stencilsKernel(NULL) {
}
CLEvaluator::~CLEvaluator() {
if (_stencilsKernel) clReleaseKernel(_stencilsKernel);
if (_program) clReleaseProgram(_program);
}
bool
CLEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc) {
if (srcDesc.length > dstDesc.length) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"srcDesc length must be less than or equal to "
"dstDesc length.\n");
return false;
}
cl_int errNum;
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< "#define DST_STRIDE " << dstDesc.stride << "\n";
std::string defineStr = defines.str();
const char *sources[] = { defineStr.c_str(), clSource };
_program = clCreateProgramWithSource(_clContext, 2, sources, 0, &errNum);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"clCreateProgramWithSource (%d)", errNum);
}
errNum = clBuildProgram(_program, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clBuildProgram (%d) \n", errNum);
cl_int numDevices = 0;
clGetContextInfo(
_clContext, CL_CONTEXT_NUM_DEVICES,
sizeof(cl_uint), &numDevices, NULL);
cl_device_id *devices = new cl_device_id[numDevices];
clGetContextInfo(_clContext, CL_CONTEXT_DEVICES,
sizeof(cl_device_id)*numDevices, devices, NULL);
for (int i = 0; i < numDevices; ++i) {
char cBuildLog[10240];
clGetProgramBuildInfo(
_program, devices[i],
CL_PROGRAM_BUILD_LOG, sizeof(cBuildLog), cBuildLog, NULL);
Far::Error(Far::FAR_RUNTIME_ERROR, cBuildLog);
}
delete[] devices;
return false;
}
_stencilsKernel = clCreateKernel(_program, "computeStencils", &errNum);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
return false;
}
return true;
}
bool
CLEvaluator::EvalStencils(cl_mem src,
VertexBufferDescriptor const &srcDesc,
cl_mem dst,
VertexBufferDescriptor const &dstDesc,
cl_mem sizes,
cl_mem offsets,
cl_mem indices,
cl_mem weights,
int start,
int end) const {
if (end <= start) return true;
size_t globalWorkSize = (size_t)(end - start);
clSetKernelArg(_stencilsKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_stencilsKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_stencilsKernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(_stencilsKernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(_stencilsKernel, 4, sizeof(cl_mem), &sizes);
clSetKernelArg(_stencilsKernel, 5, sizeof(cl_mem), &offsets);
clSetKernelArg(_stencilsKernel, 6, sizeof(cl_mem), &indices);
clSetKernelArg(_stencilsKernel, 7, sizeof(cl_mem), &weights);
clSetKernelArg(_stencilsKernel, 8, sizeof(int), &start);
clSetKernelArg(_stencilsKernel, 9, sizeof(int), &end);
cl_int errNum = clEnqueueNDRangeKernel(
_clCommandQueue, _stencilsKernel, 1, NULL,
&globalWorkSize, NULL, 0, NULL, NULL);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyStencilTableKernel (%d) ", errNum);
return false;
}
clFinish(_clCommandQueue);
return true;
}
/* static */
void
CLEvaluator::Synchronize(cl_command_queue clCommandQueue) {
clFinish(clCommandQueue);
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,230 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV_OSD_CL_EVALUATOR_H
#define OPENSUBDIV_OSD_CL_EVALUATOR_H
#include "../version.h"
#include "../osd/opencl.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far {
class StencilTables;
}
namespace Osd {
/// \brief OpenCL stencil tables
///
/// This class is an OpenCL buffer representation of Far::StencilTables.
///
/// CLCompute consumes this table to apply stencils
///
///
class CLStencilTables {
public:
template <typename DEVICE_CONTEXT>
static CLStencilTables *Create(Far::StencilTables const *stencilTables,
DEVICE_CONTEXT context) {
return new CLStencilTables(stencilTables, context->GetContext());
}
CLStencilTables(Far::StencilTables const *stencilTables,
cl_context clContext);
~CLStencilTables();
// interfaces needed for CLComputeKernel
cl_mem GetSizesBuffer() const { return _sizes; }
cl_mem GetOffsetsBuffer() const { return _offsets; }
cl_mem GetIndicesBuffer() const { return _indices; }
cl_mem GetWeightsBuffer() const { return _weights; }
int GetNumStencils() const { return _numStencils; }
private:
cl_mem _sizes;
cl_mem _offsets;
cl_mem _indices;
cl_mem _weights;
int _numStencils;
};
// ---------------------------------------------------------------------------
/// \brief OpenCL stencil kernel
///
///
class CLEvaluator {
public:
typedef bool Instantiatable;
/// Constructor.
CLEvaluator(cl_context context, cl_command_queue queue);
/// Desctructor.
~CLEvaluator();
/// Generic creator template.
template <typename DEVICE_CONTEXT>
static CLEvaluator *Create(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
DEVICE_CONTEXT deviceContext) {
return Create(srcDesc, dstDesc,
deviceContext->GetContext(),
deviceContext->GetCommandQueue());
}
static CLEvaluator * Create(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
cl_context clContext,
cl_command_queue clCommandQueue) {
CLEvaluator *kernel = new CLEvaluator(clContext, clCommandQueue);
if (kernel->Compile(srcDesc, dstDesc)) return kernel;
delete kernel;
return NULL;
}
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTables stencil table to be applied. The table must have
/// OpenCL memory interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE,
typename DEVICE_CONTEXT>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext) {
if (instance) {
return instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
stencilTable);
} else {
// Create an instance on demand (slow)
instance = Create(srcDesc, dstDesc, deviceContext);
if (instance) {
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
stencilTable);
delete instance;
return r;
}
return false;
}
}
/// Generic compute function.
/// Dispatch the CL compute kernel asynchronously.
/// Returns false if the kernel hasn't been compiled yet.
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcVertexBuffer->BindCLBuffer(_clCommandQueue),
srcDesc,
dstVertexBuffer->BindCLBuffer(_clCommandQueue),
dstDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
stencilTable->GetIndicesBuffer(),
stencilTable->GetWeightsBuffer(),
0,
stencilTable->GetNumStencils());
}
/// Dispatch the CL compute kernel asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(cl_mem src,
VertexBufferDescriptor const &srcDesc,
cl_mem dst,
VertexBufferDescriptor const &dstDesc,
cl_mem sizes,
cl_mem offsets,
cl_mem indices,
cl_mem weights,
int start,
int end) const;
/// Configure OpenCL kernel.
/// Returns false if it fails to compile the kernel.
bool Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc);
/// Wait the OpenCL kernels finish.
template <typename DEVICE_CONTEXT>
static void Synchronize(DEVICE_CONTEXT deviceContext) {
Synchronize(deviceContext->GetCommandQueue());
}
static void Synchronize(cl_command_queue queue);
private:
cl_context _clContext;
cl_command_queue _clCommandQueue;
cl_program _program;
cl_kernel _stencilsKernel;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV_OSD_CL_EVALUATOR_H

View File

@ -37,7 +37,7 @@ namespace Osd {
/// \brief Concrete vertex buffer class for OpenCL subvision.
///
/// CLVertexBuffer implements CLVertexBufferInterface. An instance of this
/// buffer class can be passed to CLComputeController
/// buffer class can be passed to CLEvaluator
///
class CLVertexBuffer {

View File

@ -1,77 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../far/stencilTables.h"
#include "../osd/cpuComputeContext.h"
#include "../osd/cpuKernel.h"
#include <cstring>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
// ----------------------------------------------------------------------------
CpuComputeContext::CpuComputeContext(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables) :
_vertexStencilTables(0), _varyingStencilTables(0) {
// XXXX manuelk we do not own the tables, so use copy-constructor for now
// smart pointers eventually
if (vertexStencilTables) {
_vertexStencilTables = new Far::StencilTables(*vertexStencilTables);
}
if (varyingStencilTables) {
_varyingStencilTables = new Far::StencilTables(*varyingStencilTables);
}
}
// ----------------------------------------------------------------------------
CpuComputeContext::~CpuComputeContext() {
delete _vertexStencilTables;
delete _varyingStencilTables;
}
// ----------------------------------------------------------------------------
CpuComputeContext *
CpuComputeContext::Create(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
void * /*deviceContext*/) {
return new CpuComputeContext(vertexStencilTables, varyingStencilTables);
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,102 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CPU_COMPUTE_CONTEXT_H
#define OSD_CPU_COMPUTE_CONTEXT_H
#include "../version.h"
#include <cstddef>
#include "../osd/nonCopyable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far { class StencilTables; }
namespace Osd {
///
/// \brief CPU Compute Context
///
/// The CPU implementation of the Compute module contextual functionality.
///
/// The Osd Compute module provides functionality to interpolate primitive
/// variable data according to a subdivision scheme.
///
/// Contexts provide an interface between the serialized topological data
/// of a geometric primitive and the computation resources of a compute device.
///
class CpuComputeContext : private NonCopyable<CpuComputeContext> {
public:
/// Creates an CpuComputeContext instance
///
/// @param vertexStencilTables The Far::StencilTables used for vertex
/// interpolation
///
/// @param varyingStencilTables The Far::StencilTables used for varying
/// interpolation
///
/// @param deviceContext (not used)
///
static CpuComputeContext * Create(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
void *deviceContext = NULL);
/// Destructor
virtual ~CpuComputeContext();
/// Returns the stencils data applied by this context for vertex
/// interpolation
Far::StencilTables const * GetVertexStencilTables() const {
return _vertexStencilTables;
}
/// Returns the stencils data applied by this context for varying
/// interpolation
Far::StencilTables const * GetVaryingStencilTables() const {
return _varyingStencilTables;
}
protected:
explicit CpuComputeContext(Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables);
private:
Far::StencilTables const * _vertexStencilTables,
* _varyingStencilTables;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_CPU_COMPUTE_CONTEXT_H

View File

@ -1,106 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../far/stencilTables.h"
#include "../osd/cpuComputeContext.h"
#include "../osd/cpuComputeController.h"
#include "../osd/cpuKernel.h"
#include <cassert>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CpuComputeController::CpuComputeController() {
}
CpuComputeController::~CpuComputeController() {
}
void
CpuComputeController::Synchronize() {
}
void
CpuComputeController::ApplyStencilTableKernel(
ComputeContext const *context) const {
assert(context);
Far::StencilTables const * vertexStencils = context->GetVertexStencilTables();
if (vertexStencils and _currentBindState.vertexBuffer) {
VertexBufferDescriptor srcDesc = _currentBindState.vertexDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += vertexStencils->GetNumControlVertices() * dstDesc.stride;
int start = 0;
int end = vertexStencils->GetNumStencils();
if (end > start) {
CpuComputeStencils(_currentBindState.vertexBuffer,
srcDesc,
_currentBindState.vertexBuffer,
dstDesc,
&vertexStencils->GetSizes().at(0),
&vertexStencils->GetOffsets().at(0),
&vertexStencils->GetControlIndices().at(0),
&vertexStencils->GetWeights().at(0),
start,
end);
}
}
Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables();
if (varyingStencils and _currentBindState.varyingBuffer) {
VertexBufferDescriptor srcDesc = _currentBindState.varyingDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += varyingStencils->GetNumControlVertices() * dstDesc.stride;
int start = 0;
int end = varyingStencils->GetNumStencils();
if (end > start) {
CpuComputeStencils(_currentBindState.varyingBuffer,
srcDesc,
_currentBindState.varyingBuffer,
dstDesc,
&varyingStencils->GetSizes().at(0),
&varyingStencils->GetOffsets().at(0),
&varyingStencils->GetControlIndices().at(0),
&varyingStencils->GetWeights().at(0),
start,
end);
}
}
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,179 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CPU_COMPUTE_CONTROLLER_H
#define OSD_CPU_COMPUTE_CONTROLLER_H
#include "../version.h"
#include "../osd/cpuComputeContext.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Compute controller for launching CPU subdivision kernels.
///
/// CpuComputeController is a compute controller class to launch
/// single threaded CPU subdivision kernels. It requires
/// CpuVertexBufferInterface as arguments of the Refine() function.
///
/// The Osd Compute module provides functionality to interpolate primitive
/// variable data according to a subdivision scheme.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class CpuComputeController {
public:
typedef CpuComputeContext ComputeContext;
/// Constructor.
CpuComputeController();
/// Destructor.
~CpuComputeController();
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The CpuContext to apply refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
/// @param vertexDesc The descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingBuffer Vertex-interpolated data buffer
///
/// @param varyingDesc The descriptor of varying elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Compute( CpuComputeContext const * context,
VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc=NULL,
VertexBufferDescriptor const * varyingDesc=NULL ){
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
ApplyStencilTableKernel(context);
unbind();
}
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The CpuContext to apply refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
template<class VERTEX_BUFFER>
void Compute(CpuComputeContext const * context,
VERTEX_BUFFER *vertexBuffer) {
Compute<VERTEX_BUFFER>(context, vertexBuffer, (VERTEX_BUFFER*)0);
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
protected:
void ApplyStencilTableKernel(ComputeContext const *context) const;
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind( VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc,
VertexBufferDescriptor const * varyingDesc ) {
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0;
_currentBindState.vertexDesc =
VertexBufferDescriptor(0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0;
_currentBindState.varyingDesc =
VertexBufferDescriptor(0, numElements, numElements);
}
_currentBindState.vertexBuffer = vertexBuffer ?
vertexBuffer->BindCpuBuffer() : 0;
_currentBindState.varyingBuffer = varyingBuffer ?
varyingBuffer->BindCpuBuffer() : 0;
}
void unbind() {
_currentBindState.Reset();
}
private:
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of the vertex buffers.
struct BindState {
BindState() : vertexBuffer(0), varyingBuffer(0) { }
void Reset() {
vertexBuffer = varyingBuffer = 0;
vertexDesc.Reset();
varyingDesc.Reset();
}
float * vertexBuffer,
* varyingBuffer;
VertexBufferDescriptor vertexDesc,
varyingDesc;
};
BindState _currentBindState;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_CPU_COMPUTE_CONTROLLER_H

View File

@ -43,7 +43,7 @@ namespace Osd {
/// CpuD3D11VertexBuffer implements CpuVertexBufferInterface and
/// D3D11VertexBufferInterface.
///
/// An instance of this buffer class can be passed to CpuComputeController.
/// An instance of this buffer class can be passed to CpuEvaluator.
///
class CpuD3D11VertexBuffer {
public:

View File

@ -1,57 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cpuEvalLimitContext.h"
#include "../osd/vertexDescriptor.h"
#include <string.h>
#include <cassert>
#include <cstdio>
#include <cmath>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CpuEvalLimitContext *
CpuEvalLimitContext::Create(Far::PatchTables const & patchTables) {
// there is no limit with uniform subdivision
if (not patchTables.IsFeatureAdaptive())
return NULL;
return new CpuEvalLimitContext(patchTables);
}
CpuEvalLimitContext::CpuEvalLimitContext(Far::PatchTables const & patchTables) :
EvalLimitContext(patchTables),
_patchTables(patchTables),
_patchMap(patchTables) {
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,80 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CPU_EVAL_LIMIT_CONTEXT_H
#define OSD_CPU_EVAL_LIMIT_CONTEXT_H
#include "../version.h"
#include "../osd/evalLimitContext.h"
#include "../far/patchTables.h"
#include "../far/patchMap.h"
#include <map>
#include <stdio.h>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
class CpuEvalLimitContext : public EvalLimitContext {
public:
/// \brief Factory
/// Returns an EvalLimitContext from the given far patch tables.
/// Note : the patchtables is expected to be feature-adaptive and have ptex
/// coordinates tables.
///
/// @param patchTables a pointer to an initialized Far::PatchTables
///
static CpuEvalLimitContext * Create(Far::PatchTables const &patchTables);
Far::PatchTables const & GetPatchTables() const {
return _patchTables;
}
Far::PatchMap const & GetPatchMap() const {
return _patchMap;
}
protected:
explicit CpuEvalLimitContext(Far::PatchTables const & patchTables);
private:
Far::PatchTables const _patchTables; // Patch topology data
Far::PatchMap const _patchMap; // Patch search accelerator
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif /* OSD_CPU_EVAL_LIMIT_CONTEXT_H */

View File

@ -1,288 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cpuEvalLimitController.h"
#include "../osd/cpuEvalLimitContext.h"
#include "../osd/cpuEvalLimitKernel.h"
#include "../far/patchTables.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CpuEvalLimitController::CpuEvalLimitController() {
}
CpuEvalLimitController::~CpuEvalLimitController() {
}
// Vertex interpolation of a sample at the limit
int
CpuEvalLimitController::EvalLimitSample( LimitLocation const & coord,
CpuEvalLimitContext * context,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV ) const {
typedef Far::PatchDescriptor Desc;
float s=coord.s,
t=coord.t;
Far::PatchMap::Handle const * handle = context->GetPatchMap().FindPatch( coord.ptexIndex, s, t );
if (not handle) {
return 0; // no handle if there is a hole or 'coord' is incorrect
}
VertexData const & vertexData = _currentBindState.vertexData;
if (vertexData.in) {
Far::PatchTables const & ptables = context->GetPatchTables();
Far::PatchParam pparam = ptables.GetPatchParam(*handle);
Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle);
Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle);
switch (desc.GetType()) {
case Desc::REGULAR : evalBSpline( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
outDesc,
outQ, outDQU, outDQV );
break;
case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
outDesc,
outQ, outDQU, outDQV );
break;
case Desc::CORNER : evalCorner( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
outDesc,
outQ, outDQU, outDQV );
break;
case Desc::GREGORY : evalGregory( pparam.bitField, t, s, cvs.begin(),
&ptables.GetVertexValenceTable()[0],
ptables.GetPatchQuadOffsets(*handle).begin(),
ptables.GetMaxValence(),
vertexData.inDesc,
vertexData.in,
outDesc,
outQ, outDQU, outDQV );
break;
case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(),
&ptables.GetVertexValenceTable()[0],
ptables.GetPatchQuadOffsets(*handle).begin(),
ptables.GetMaxValence(),
vertexData.inDesc,
vertexData.in,
outDesc,
outQ, outDQU, outDQV );
break;
case Desc::GREGORY_BASIS : {
evalGregoryBasis( pparam.bitField, s, t,
cvs.begin(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
outQ, outDQU, outDQV );
} break;
case Desc::QUADS : evalBilinear( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
outDesc,
outQ, outDQU, outDQV );
break;
default:
assert(0);
}
}
assert(0);
return 1;
}
// Vertex interpolation of samples at the limit
int
CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
CpuEvalLimitContext * context,
unsigned int index ) const {
typedef Far::PatchDescriptor Desc;
float s=coords.s,
t=coords.t;
Far::PatchMap::Handle const * handle = context->GetPatchMap().FindPatch( coords.ptexIndex, s, t );
if (not handle) {
return 0; // no handle if there is a hole or 'coord' is incorrect
}
VertexData const & vertexData = _currentBindState.vertexData;
Far::PatchTables const & ptables = context->GetPatchTables();
Far::PatchParam pparam = ptables.GetPatchParam(*handle);
Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle);
Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle);
if (vertexData.in) {
int offset = vertexData.outDesc.stride * index,
doffset = vertexData.outDesc.length * index;
if (vertexData.out) {
// note : don't apply outDesc.offset here, it's done inside patch
// evaluation
float * out = vertexData.out+offset,
* outDu = vertexData.outDu ? vertexData.outDu+doffset : 0,
* outDv = vertexData.outDv ? vertexData.outDv+doffset : 0;
switch (desc.GetType()) {
case Desc::REGULAR : evalBSpline( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
break;
case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
break;
case Desc::CORNER : evalCorner( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
break;
case Desc::GREGORY : evalGregory( pparam.bitField, t, s, cvs.begin(),
&ptables.GetVertexValenceTable()[0],
ptables.GetPatchQuadOffsets(*handle).begin(),
ptables.GetMaxValence(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
break;
case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(),
&ptables.GetVertexValenceTable()[0],
ptables.GetPatchQuadOffsets(*handle).begin(),
ptables.GetMaxValence(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
break;
case Desc::GREGORY_BASIS : {
evalGregoryBasis( pparam.bitField, s, t,
cvs.begin(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
} break;
case Desc::QUADS : evalBilinear( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
break;
default:
assert(0);
}
}
}
VaryingData const & varyingData = _currentBindState.varyingData;
if (varyingData.in and varyingData.out) {
static int const zeroRings[6][4] = { {5, 6,10, 9}, // regular
{1, 2, 6, 5}, // boundary / single-crease
{1, 2, 5, 4}, // corner
{0, 1, 2, 3}, // no permutation
{0, 5, 10, 15} }; // gregory basis
int const * permute = 0;
switch (desc.GetType()) {
case Desc::REGULAR : permute = zeroRings[0]; break;
case Desc::BOUNDARY : permute = zeroRings[1]; break;
case Desc::CORNER : permute = zeroRings[2]; break;
case Desc::GREGORY :
case Desc::GREGORY_BOUNDARY : permute = zeroRings[3]; break;
case Desc::GREGORY_BASIS : permute = zeroRings[4]; break;
default:
assert(0);
};
int offset = varyingData.outDesc.stride * index;
Far::Index zeroRing[4] = { cvs[permute[0]],
cvs[permute[1]],
cvs[permute[2]],
cvs[permute[3]] };
evalBilinear( pparam.bitField, s, t, zeroRing,
varyingData.inDesc,
varyingData.in,
varyingData.outDesc,
varyingData.out+offset, 0, 0);
}
// Note : currently we only support bilinear boundary interpolation rules
// for limit face-varying data.
FacevaryingData const & facevaryingData = _currentBindState.facevaryingData;
if (facevaryingData.in and facevaryingData.out) {
int offset = facevaryingData.outDesc.stride * index;
static int const zeroRing[4] = {0,1,2,3};
// XXXX manuelk this assumes FVar data is ordered with 4 CVs / patch :
// bi-cubic FVar interpolation will require proper topology
// accessors in Far::PatchTables and this code will change
evalBilinear( pparam.bitField, s, t, zeroRing,
facevaryingData.inDesc,
&facevaryingData.in[handle->patchIndex*4*facevaryingData.outDesc.stride],
facevaryingData.outDesc,
facevaryingData.out+offset, 0, 0);
}
return 1;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,298 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CPU_EVAL_LIMIT_CONTROLLER_H
#define OSD_CPU_EVAL_LIMIT_CONTROLLER_H
#include "../version.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
struct LimitLocation;
class CpuEvalLimitContext;
/// \brief CPU controler for limit surface evaluation.
///
/// A CPU-driven controller that can be called to evaluate samples on the limit
/// surface for a given EvalContext.
///
/// Warning : this eval controller is re-entrant but it breaks the Osd API pattern
/// by requiring client code to bind and unbind the data buffers to the
/// Controller before calling evaluation methods.
///
/// Ex :
/// \code
/// evalCtroller->BindVertexBuffers( ... );
/// evalCtroller->BindVaryingBuffers( ... );
/// evalCtroller->BindFacevaryingBuffers( ... );
///
/// parallel_for( int index=0; i<nsamples; ++index ) {
/// evalCtroller->EvalLimitSample( coord, evalCtxt, index );
/// }
///
/// evalCtroller->Unbind();
/// \endcode
///
class CpuEvalLimitController {
public:
/// Constructor.
CpuEvalLimitController();
/// Destructor.
~CpuEvalLimitController();
/// \brief Binds control vertex data buffer
///
/// @param iDesc data descriptor shared by all input data buffers
///
/// @param inQ input vertex data
///
/// @param oDesc data descriptor for the outQ data buffer
/// -- derivative buffers do not have a descriptor and
/// cannot be offset or padded with a stride (yet ?)
///
/// @param outQ output vertex data
///
/// @param outdQu output derivative along "u" of the vertex data (optional)
///
/// @param outdQv output derivative along "v" of the vertex data (optional)
///
template<class INPUT_BUFFER, class OUTPUT_BUFFER>
void BindVertexBuffers( VertexBufferDescriptor const & iDesc, INPUT_BUFFER *inQ,
VertexBufferDescriptor const & oDesc, OUTPUT_BUFFER *outQ,
OUTPUT_BUFFER *outdQu=0,
OUTPUT_BUFFER *outdQv=0 ) {
_currentBindState.vertexData.inDesc = iDesc;
_currentBindState.vertexData.in = inQ ? inQ->BindCpuBuffer() : 0;
_currentBindState.vertexData.outDesc = oDesc;
_currentBindState.vertexData.out = outQ ? outQ->BindCpuBuffer() : 0;
_currentBindState.vertexData.outDu = outdQu ? outdQu->BindCpuBuffer() : 0;
_currentBindState.vertexData.outDv = outdQv ? outdQv->BindCpuBuffer() : 0;
}
/// \brief Binds the varying-interpolated data streams
///
/// @param iDesc data descriptor shared by all input data buffers
///
/// @param inQ input varying data
///
/// @param oDesc data descriptor for the outQ data buffer
///
/// @param outQ output varying data
///
template<class INPUT_BUFFER, class OUTPUT_BUFFER>
void BindVaryingBuffers( VertexBufferDescriptor const & iDesc, INPUT_BUFFER *inQ,
VertexBufferDescriptor const & oDesc, OUTPUT_BUFFER *outQ ) {
_currentBindState.varyingData.inDesc = iDesc;
_currentBindState.varyingData.in = inQ ? inQ->BindCpuBuffer() : 0;
_currentBindState.varyingData.outDesc = oDesc;
_currentBindState.varyingData.out = outQ ? outQ->BindCpuBuffer() : 0;
}
/// \brief Binds the face-varying-interpolated data streams
///
/// Note : currently we only support bilinear boundary interpolation rules
/// for face-varying data. Although Hbr supports 3 addition smooth rule sets,
/// the feature-adaptive patch interpolation code currently does not support
/// them, and neither does this EvalContext
///
/// @param iDesc data descriptor shared by all input data buffers
///
/// @param inQ input face-varying data
///
/// @param oDesc data descriptor for the outQ data buffer
///
/// @param outQ output face-varying data
///
template<class INPUT_BUFFER, class OUTPUT_BUFFER>
void BindFacevaryingBuffers( VertexBufferDescriptor const & iDesc, INPUT_BUFFER *inQ,
VertexBufferDescriptor const & oDesc, OUTPUT_BUFFER *outQ ) {
_currentBindState.facevaryingData.inDesc = iDesc;
_currentBindState.facevaryingData.in = inQ ? inQ->BindCpuBuffer() : 0;
_currentBindState.facevaryingData.outDesc = oDesc;
_currentBindState.facevaryingData.out = outQ ? outQ->BindCpuBuffer() : 0;
}
/// \brief Vertex interpolation of a single sample at the limit
///
/// Evaluates "vertex" interpolation of a single sample on the surface limit.
///
/// This function is re-entrant but does not require binding the
/// output vertex buffers. Pointers to memory where the data is
/// output are explicitly passed to the function.
///
/// @param coord location on the limit surface to be evaluated
///
/// @param context the EvalLimitContext that the controller will evaluate
///
/// @param outDesc data descriptor for the outQ data buffer
/// -- derivative buffers do not have a descriptor and
/// cannot be offset or padded with a stride (yet ?)
///
/// @param outQ output vertex data
///
/// @param outDQU output derivative along "u" of the vertex data (optional)
///
/// @param outDQV output derivative along "v" of the vertex data (optional)
///
/// @return 1 if the sample was found
///
int EvalLimitSample( LimitLocation const & coord,
CpuEvalLimitContext * context,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV ) const;
/// \brief Vertex interpolation of samples at the limit
///
/// Evaluates "vertex" interpolation of a sample on the surface limit.
///
/// @param coords location on the limit surface to be evaluated
///
/// @param context the EvalLimitContext that the controller will evaluate
///
/// @param index the index of the vertex in the output buffers bound to the
/// context
///
/// @return the number of samples found (0 if the location was tagged as a hole
/// or the coordinate was invalid)
///
int EvalLimitSample( LimitLocation const & coords,
CpuEvalLimitContext * context,
unsigned int index ) const {
if (not context)
return 0;
int n = _EvalLimitSample( coords, context, index );
return n;
}
void Unbind() {
_currentBindState.Reset();
}
protected:
// Vertex interpolated streams
struct VertexData {
VertexData() : in(0), out(0), outDu(0), outDv(0) { }
void Reset() {
in = out = outDu = outDv = NULL;
inDesc.Reset();
outDesc.Reset();
}
VertexBufferDescriptor inDesc,
outDesc;
float * in,
* out,
* outDu,
* outDv;
};
// Varying interpolated streams
struct VaryingData {
VaryingData() : in(0), out(0) { }
void Reset() {
in = out = NULL;
inDesc.Reset();
outDesc.Reset();
}
VertexBufferDescriptor inDesc,
outDesc;
float * in,
* out;
};
// Facevarying interpolated streams
struct FacevaryingData {
FacevaryingData() : in(0), out(0) { }
void Reset() {
in = out = NULL;
inDesc.Reset();
outDesc.Reset();
}
VertexBufferDescriptor inDesc,
outDesc;
float * in,
* out;
};
private:
int _EvalLimitSample( LimitLocation const & coords,
CpuEvalLimitContext * context,
unsigned int index ) const;
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of vertex buffers.
struct BindState {
BindState() { }
void Reset() {
vertexData.Reset();
varyingData.Reset();
facevaryingData.Reset();
}
VertexData vertexData; // vertex interpolated data descriptor
VaryingData varyingData; // varying interpolated data descriptor
FacevaryingData facevaryingData; // face-varying interpolated data descriptor
};
BindState _currentBindState;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif /* OSD_CPU_EVAL_LIMIT_CONTROLLER_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,132 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CPU_EVAL_LIMIT_KERNEL_H
#define OSD_CPU_EVAL_LIMIT_KERNEL_H
#include "../version.h"
#include "../osd/vertexDescriptor.h"
#include "../far/patchParam.h"
#include "../far/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far {
class StencilTables;
}
namespace Osd {
void
evalBilinear(Far::PatchParam::BitField bits,
float u, float v,
Far::Index const * vertexIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV);
void
evalBSpline(Far::PatchParam::BitField bits,
float u, float v,
Far::Index const * vertexIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV );
void
evalBoundary(Far::PatchParam::BitField bits,
float u, float v,
Far::Index const * vertexIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV );
void
evalCorner(Far::PatchParam::BitField bits,
float u, float v,
Far::Index const * vertexIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV );
void
evalGregoryBasis(Far::PatchParam::BitField bits,
float u, float v,
Far::Index const *vertsIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV );
void
evalGregory(Far::PatchParam::BitField bits, float u, float v,
Far::Index const * vertexIndices,
Far::Index const * vertexValenceBuffer,
unsigned int const * quadOffsetBuffer,
int maxValence,
VertexBufferDescriptor const & inDesc,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV );
void
evalGregoryBoundary(Far::PatchParam::BitField bits, float u, float v,
Far::Index const * vertexIndices,
Far::Index const * vertexValenceBuffer,
unsigned int const * quadOffsetBuffer,
int maxValence,
VertexBufferDescriptor const & inDesc,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV );
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif /* OSD_CPU_EVAL_LIMIT_KERNEL_H */

View File

@ -1,74 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef FAR_CPU_EVALSTENCILS_CONTEXT_H
#define FAR_CPU_EVALSTENCILS_CONTEXT_H
#include "../version.h"
#include "../far/stencilTables.h"
#include "../osd/vertexDescriptor.h"
#include "../osd/nonCopyable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
///
/// \brief CPU stencils evaluation context
///
///
class CpuEvalStencilsContext : private NonCopyable<CpuEvalStencilsContext> {
public:
/// \brief Creates an CpuEvalStencilsContext instance
///
/// @param stencils a pointer to the Far::StencilTables
///
static CpuEvalStencilsContext * Create(Far::LimitStencilTables const *stencils);
/// \brief Returns the Far::StencilTables applied
Far::LimitStencilTables const * GetStencilTables() const {
return _stencils;
}
protected:
CpuEvalStencilsContext(Far::LimitStencilTables const *stencils);
private:
Far::LimitStencilTables const * _stencils;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // FAR_CPU_EVALSTENCILS_CONTEXT_H

View File

@ -1,149 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cpuEvalStencilsController.h"
#include <cassert>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CpuEvalStencilsController::CpuEvalStencilsController() {
}
CpuEvalStencilsController::~CpuEvalStencilsController() {
}
int
CpuEvalStencilsController::_UpdateValues( CpuEvalStencilsContext * context ) {
int result=0;
Far::LimitStencilTables const * stencils = context->GetStencilTables();
int nstencils = stencils->GetNumStencils();
if (not nstencils)
return result;
VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc,
outDesc = _currentBindState.outputDataDesc;
// make sure that we have control data to work with
if (not ctrlDesc.CanEval(outDesc))
return 0;
float const * ctrl = _currentBindState.controlData + ctrlDesc.offset;
float * out = _currentBindState.outputData + outDesc.offset;
if ((not ctrl) or (not out))
return result;
unsigned char const * sizes = &stencils->GetSizes().at(0);
Far::Index const * index = &stencils->GetControlIndices().at(0);
float const * weight = &stencils->GetWeights().at(0);
for (int i=0; i<nstencils; ++i) {
memset(out, 0, outDesc.length*sizeof(float));
for (int j=0; j<sizes[i]; ++j, ++index, ++weight) {
float const * cv = ctrl + (*index)*ctrlDesc.stride;
for (int k=0; k<outDesc.length; ++k) {
out[k] += cv[k] * (*weight);
}
}
out += outDesc.stride;
}
return nstencils;
}
int
CpuEvalStencilsController::_UpdateDerivs( CpuEvalStencilsContext * context ) {
int result=0;
Far::LimitStencilTables const * stencils = context->GetStencilTables();
int nstencils = stencils->GetNumStencils();
if (not nstencils)
return result;
VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc,
duDesc = _currentBindState.outputDuDesc,
dvDesc = _currentBindState.outputDvDesc;
// make sure that we have control data to work with
if (not (ctrlDesc.CanEval(duDesc) and ctrlDesc.CanEval(dvDesc)))
return 0;
float const * ctrl = _currentBindState.controlData + ctrlDesc.offset;
float * du = _currentBindState.outputUDeriv + duDesc.offset,
* dv = _currentBindState.outputVDeriv + dvDesc.offset;
if ((not ctrl) or (not du) or (not dv))
return result;
unsigned char const * sizes = &stencils->GetSizes().at(0);
Far::Index const * index = &stencils->GetControlIndices().at(0);
float const * duweight = &stencils->GetDuWeights().at(0),
* dvweight = &stencils->GetDvWeights().at(0);
for (int i=0; i<nstencils; ++i) {
memset(du, 0, duDesc.length*sizeof(float));
memset(dv, 0, dvDesc.length*sizeof(float));
for (int j=0; j<sizes[i]; ++j, ++index, ++duweight, ++dvweight) {
float const * cv = ctrl + (*index)*ctrlDesc.stride;
for (int k=0; k<duDesc.length; ++k) {
du[k] += cv[k] * (*duweight);
dv[k] += cv[k] * (*dvweight);
}
}
du += duDesc.stride;
dv += dvDesc.stride;
}
return nstencils;
}
void
CpuEvalStencilsController::Synchronize() {
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,209 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef FAR_CPU_EVALSTENCILS_CONTROLLER_H
#define FAR_CPU_EVALSTENCILS_CONTROLLER_H
#include "../version.h"
#include "../osd/cpuEvalStencilsContext.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
///
/// \brief CPU stencils evaluation controller
///
/// CpuStencilsController is a compute controller class to launch
/// single threaded CPU stencil evalution kernels.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class CpuEvalStencilsController {
public:
/// Constructor.
CpuEvalStencilsController();
/// Destructor.
~CpuEvalStencilsController();
/// \brief Applies stencil weights to the control vertex data
///
/// Applies the stencil weights to the control vertex data to evaluate the
/// interpolated limit positions at the parametric locations of the stencils
///
/// @param context the CpuEvalStencilsContext with the stencil weights
///
/// @param controlDataDesc vertex buffer descriptor for the control vertex data
///
/// @param controlVertices vertex buffer with the control vertices data
///
/// @param outputDataDesc vertex buffer descriptor for the output vertex data
///
/// @param outputData vertex buffer where the vertex data will be output
///
template<class CONTROL_BUFFER, class OUTPUT_BUFFER>
int UpdateValues( CpuEvalStencilsContext * context,
VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices,
VertexBufferDescriptor const & outputDataDesc, OUTPUT_BUFFER *outputData ) {
if (not context->GetStencilTables()->GetNumStencils())
return 0;
bindControlData( controlDataDesc, controlVertices );
bindOutputData( outputDataDesc, outputData );
int n = _UpdateValues( context );
unbind();
return n;
}
/// \brief Applies derivative stencil weights to the control vertex data
///
/// Computes the U and V derivative stencils to the control vertex data at
/// the parametric locations contained in each stencil
///
/// @param context the CpuEvalStencilsContext with the stencil weights
///
/// @param controlDataDesc vertex buffer descriptor for the control vertex data
///
/// @param controlVertices vertex buffer with the control vertices data
///
/// @param outputDuDesc vertex buffer descriptor for the U derivative output data
///
/// @param outputDuData output vertex buffer for the U derivative data
///
/// @param outputDvDesc vertex buffer descriptor for the V deriv output data
///
/// @param outputDvData output vertex buffer for the V derivative data
///
template<class CONTROL_BUFFER, class OUTPUT_BUFFER>
int UpdateDerivs( CpuEvalStencilsContext * context,
VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices,
VertexBufferDescriptor const & outputDuDesc, OUTPUT_BUFFER *outputDuData,
VertexBufferDescriptor const & outputDvDesc, OUTPUT_BUFFER *outputDvData ) {
if (not context->GetStencilTables()->GetNumStencils())
return 0;
bindControlData( controlDataDesc, controlVertices );
bindOutputDerivData( outputDuDesc, outputDuData, outputDvDesc, outputDvData );
int n = _UpdateDerivs( context );
unbind();
return n;
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
protected:
/// \brief Binds control vertex data buffer
template<class VERTEX_BUFFER>
void bindControlData(VertexBufferDescriptor const & controlDataDesc, VERTEX_BUFFER *controlData ) {
_currentBindState.controlData = controlData ? controlData->BindCpuBuffer() : 0;
_currentBindState.controlDataDesc = controlDataDesc;
}
/// \brief Binds output vertex data buffer
template<class VERTEX_BUFFER>
void bindOutputData( VertexBufferDescriptor const & outputDataDesc, VERTEX_BUFFER *outputData ) {
_currentBindState.outputData = outputData ? outputData->BindCpuBuffer() : 0;
_currentBindState.outputDataDesc = outputDataDesc;
}
/// \brief Binds output derivative vertex data buffer
template<class VERTEX_BUFFER>
void bindOutputDerivData( VertexBufferDescriptor const & outputDuDesc, VERTEX_BUFFER *outputDu,
VertexBufferDescriptor const & outputDvDesc, VERTEX_BUFFER *outputDv ) {
_currentBindState.outputUDeriv = outputDu ? outputDu ->BindCpuBuffer() : 0;
_currentBindState.outputVDeriv = outputDv ? outputDv->BindCpuBuffer() : 0;
_currentBindState.outputDuDesc = outputDuDesc;
_currentBindState.outputDvDesc = outputDvDesc;
}
/// \brief Unbinds any previously bound vertex and varying data buffers.
void unbind() {
_currentBindState.Reset();
}
private:
int _UpdateValues( CpuEvalStencilsContext * context );
int _UpdateDerivs( CpuEvalStencilsContext * context );
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of vertex buffers.
struct BindState {
BindState() : controlData(0), outputData(0), outputUDeriv(0), outputVDeriv(0) { }
void Reset() {
controlData = outputData = outputUDeriv = outputVDeriv = NULL;
controlDataDesc.Reset();
outputDataDesc.Reset();
outputDuDesc.Reset();
outputDvDesc.Reset();
}
// transient mesh data
VertexBufferDescriptor controlDataDesc,
outputDataDesc,
outputDuDesc,
outputDvDesc;
float * controlData,
* outputData,
* outputUDeriv,
* outputVDeriv;
};
BindState _currentBindState;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // FAR_CPU_EVALSTENCILS_CONTROLLER_H

View File

@ -0,0 +1,148 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cpuEvaluator.h"
#include "../osd/cpuKernel.h"
#include <cstdlib>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/* static */
bool
CpuEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
// XXX: we can probably expand cpuKernel.cpp to here.
CpuEvalStencils(src, srcDesc, dst, dstDesc,
sizes, offsets, indices, weights, start, end);
return true;
}
/* static */
bool
CpuEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDu,
VertexBufferDescriptor const &dstDuDesc,
float *dstDv,
VertexBufferDescriptor const &dstDvDesc,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
if (srcDesc.length != dstDuDesc.length) return false;
if (srcDesc.length != dstDvDesc.length) return false;
CpuEvalStencils(src, srcDesc,
dst, dstDesc,
dstDu, dstDuDesc,
dstDv, dstDvDesc,
sizes, offsets, indices,
weights, duWeights, dvWeights,
start, end);
return true;
}
template <typename T>
struct BufferAdapter {
BufferAdapter(T *p, int length, int stride) :
_p(p), _length(length), _stride(stride) { }
void Clear() {
for (int i = 0; i < _length; ++i) _p[i] = 0;
}
void AddWithWeight(T const *src, float w, float wu, float wv) {
(void)wu;
(void)wv;
// TODO: derivatives.
for (int i = 0; i < _length; ++i) {
_p[i] += src[i] * w;
}
}
const T *operator[] (int index) const {
return _p + _stride * index;
}
BufferAdapter<T> & operator ++() {
_p += _stride;
return *this;
}
T *_p;
int _length;
int _stride;
};
/* static */
int
CpuEvaluator::EvalPatches(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
PatchCoordArray const &patchCoords,
Far::PatchTables const *patchTable) {
src += srcDesc.offset;
dst += dstDesc.offset;
int count = 0;
// XXX: this implementaion is temporary.
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
for (size_t i = 0; i < patchCoords.size(); ++i) {
PatchCoord const &coords = patchCoords[i];
patchTable->Evaluate(coords.handle, coords.s, coords.t,
srcT, dstT);
++count;
++dstT;
}
return count;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,242 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV_OSD_CPU_EVALUATOR_H
#define OPENSUBDIV_OSD_CPU_EVALUATOR_H
#include "../version.h"
#include <cstddef>
#include <vector>
#include "../osd/vertexDescriptor.h"
#include "../far/patchTables.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Coordinates set on a patch table
/// XXX: this is a temporary structure, exists during Osd refactoring work.
///
struct PatchCoord {
/// \brief Constructor
///
/// @param p patch handle
///
/// @param s parametric location on the patch
///
/// @param t parametric location on the patch
///
PatchCoord(Far::PatchTables::PatchHandle handle, float s, float t) :
handle(handle), s(s), t(t) { }
Far::PatchTables::PatchHandle handle; ///< patch handle
float s, t; ///< parametric location on patch
};
typedef std::vector<PatchCoord> PatchCoordArray;
class CpuEvaluator {
public:
/// \brief Generic static eval stencils function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the cpu kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// stencil evaluate function.
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
DST_BUFFER *dstDuBuffer,
VertexBufferDescriptor const &dstDuDesc,
DST_BUFFER *dstDvBuffer,
VertexBufferDescriptor const &dstDvDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *evaluator = NULL,
void * deviceContext = NULL) {
(void)evaluator; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
dstDuBuffer->BindCpuBuffer(),
dstDuDesc,
dstDvBuffer->BindCpuBuffer(),
dstDvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDu,
VertexBufferDescriptor const &dstDuDesc,
float *dstDv,
VertexBufferDescriptor const &dstDvDesc,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start,
int end);
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently.
///
/// XXX: This interface is still work in progress. XXX
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param patchCoord array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable
///
/// @param instanced not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER>
static int EvalPatches(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
PatchCoordArray const &patchCoords,
Far::PatchTables const *patchTable,
CpuEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
patchCoords,
patchTable);
}
/// \brief limit eval function.
static int EvalPatches(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
PatchCoordArray const &patchCoords,
Far::PatchTables const *patchTable);
/// \brief synchronize all asynchronous computation invoked on this device.
static void Synchronize(void * /*deviceContext = NULL*/) {
// nothing.
}
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV_OSD_CPU_EVALUATOR_H

View File

@ -70,15 +70,15 @@ copy(float *dst, int dstIndex, const float *src,
}
void
CpuComputeStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end) {
CpuEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end) {
assert(start>=0 and start<end);
@ -125,21 +125,21 @@ CpuComputeStencils(float const * src,
}
void
CpuComputeStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
float * dstDu,
VertexBufferDescriptor const &dstDuDesc,
float * dstDv,
VertexBufferDescriptor const &dstDvDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end) {
CpuEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
float * dstDu,
VertexBufferDescriptor const &dstDuDesc,
float * dstDv,
VertexBufferDescriptor const &dstDvDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end) {
if (start > 0) {
sizes += start;
indices += offsets[start];

View File

@ -22,8 +22,8 @@
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CPU_KERNEL_H
#define OSD_CPU_KERNEL_H
#ifndef OPENSUBDIV_OSD_CPU_KERNEL_H
#define OPENSUBDIV_OSD_CPU_KERNEL_H
#include "../version.h"
#include <cstring>
@ -36,32 +36,32 @@ namespace Osd {
struct VertexBufferDescriptor;
void
CpuComputeStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end);
CpuEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end);
void
CpuComputeStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
float * dstDu,
VertexBufferDescriptor const &dstDuDesc,
float * dstDv,
VertexBufferDescriptor const &dstDvDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end);
CpuEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
float * dstDu,
VertexBufferDescriptor const &dstDuDesc,
float * dstDv,
VertexBufferDescriptor const &dstDvDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end);
//
// SIMD ICC optimization of the stencil kernel

View File

@ -37,7 +37,7 @@ namespace Osd {
/// \brief Concrete vertex buffer class for cpu subvision.
///
/// CpuVertexBuffer implements the VertexBufferInterface. An instance
/// of this buffer class can be passed to CpuComputeController
/// of this buffer class can be passed to CpuEvaluator
///
class CpuVertexBuffer {
public:

View File

@ -1,227 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../far/stencilTables.h"
#include "../osd/cudaComputeContext.h"
#include <cuda_runtime.h>
#include <vector>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
// ----------------------------------------------------------------------------
template <class T> void *
createCudaBuffer(std::vector<T> const & src) {
void * devicePtr = 0;
size_t size = src.size()*sizeof(T);
cudaError_t err = cudaMalloc(&devicePtr, size);
if (err != cudaSuccess) {
return devicePtr;
}
err = cudaMemcpy(devicePtr, &src.at(0), size, cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
cudaFree(devicePtr);
return 0;
}
return devicePtr;
}
// ----------------------------------------------------------------------------
class CudaComputeContext::CudaStencilTables {
public:
explicit CudaStencilTables(Far::StencilTables const & stencilTables) {
_numStencils = stencilTables.GetNumStencils();
if (_numStencils > 0) {
_sizes = createCudaBuffer(stencilTables.GetSizes());
_offsets = createCudaBuffer(stencilTables.GetOffsets());
_indices = createCudaBuffer(stencilTables.GetControlIndices());
_weights = createCudaBuffer(stencilTables.GetWeights());
} else {
_sizes = _offsets = _indices = _weights = NULL;
}
}
~CudaStencilTables() {
if (_sizes) { cudaFree(_sizes); }
if (_offsets) { cudaFree(_offsets); }
if (_indices) { cudaFree(_indices); }
if (_weights) { cudaFree(_weights); }
}
bool IsValid() const {
return _sizes and _offsets and _indices and _weights;
}
void * GetSizes() const {
return _sizes;
}
void * GetOffsets() const {
return _offsets;
}
void * GetIndices() const {
return _indices;
}
void * GetWeights() const {
return _weights;
}
int GetNumStencils() const {
return _numStencils;
}
private:
void * _sizes,
* _offsets,
* _indices,
* _weights;
int _numStencils;
};
// ----------------------------------------------------------------------------
CudaComputeContext::CudaComputeContext(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables) :
_vertexStencilTables(0), _varyingStencilTables(0),
_numControlVertices(0) {
if (vertexStencilTables) {
_vertexStencilTables = new CudaStencilTables(*vertexStencilTables);
_numControlVertices = vertexStencilTables->GetNumControlVertices();
}
if (varyingStencilTables) {
_varyingStencilTables = new CudaStencilTables(*varyingStencilTables);
if (_numControlVertices) {
assert(_numControlVertices ==
varyingStencilTables->GetNumControlVertices());
} else {
_numControlVertices = varyingStencilTables->GetNumControlVertices();
}
}
}
CudaComputeContext::~CudaComputeContext() {
delete _vertexStencilTables;
delete _varyingStencilTables;
}
// ----------------------------------------------------------------------------
bool
CudaComputeContext::HasVertexStencilTables() const {
return _vertexStencilTables ? _vertexStencilTables->IsValid() : false;
}
bool
CudaComputeContext::HasVaryingStencilTables() const {
return _varyingStencilTables ? _varyingStencilTables->IsValid() : false;
}
int
CudaComputeContext::GetNumStencilsInVertexStencilTables() const {
return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0;
}
int
CudaComputeContext::GetNumStencilsInVaryingStencilTables() const {
return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0;
}
// ----------------------------------------------------------------------------
void *
CudaComputeContext::GetVertexStencilTablesSizes() const {
return _vertexStencilTables ? _vertexStencilTables->GetSizes() : 0;
}
void *
CudaComputeContext::GetVertexStencilTablesOffsets() const {
return _vertexStencilTables ? _vertexStencilTables->GetOffsets() : 0;
}
void *
CudaComputeContext::GetVertexStencilTablesIndices() const {
return _vertexStencilTables ? _vertexStencilTables->GetIndices() : 0;
}
void *
CudaComputeContext::GetVertexStencilTablesWeights() const {
return _vertexStencilTables ? _vertexStencilTables->GetWeights() : 0;
}
// ----------------------------------------------------------------------------
void *
CudaComputeContext::GetVaryingStencilTablesSizes() const {
return _varyingStencilTables ? _varyingStencilTables->GetSizes() : 0;
}
void *
CudaComputeContext::GetVaryingStencilTablesOffsets() const {
return _varyingStencilTables ? _varyingStencilTables->GetOffsets() : 0;
}
void *
CudaComputeContext::GetVaryingStencilTablesIndices() const {
return _varyingStencilTables ? _varyingStencilTables->GetIndices() : 0;
}
void *
CudaComputeContext::GetVaryingStencilTablesWeights() const {
return _varyingStencilTables ? _varyingStencilTables->GetWeights() : 0;
}
// ----------------------------------------------------------------------------
CudaComputeContext *
CudaComputeContext::Create(Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
void * /*deviceContext*/) {
CudaComputeContext *result =
new CudaComputeContext(vertexStencilTables, varyingStencilTables);
return result;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,134 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CUDA_COMPUTE_CONTEXT_H
#define OSD_CUDA_COMPUTE_CONTEXT_H
#include "../version.h"
#include <cstddef>
#include "../osd/nonCopyable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far { class StencilTables; }
namespace Osd {
///
/// \brief CUDA Refine Context
///
/// The CUDA implementation of the Refine module contextual functionality.
///
/// Contexts interface the serialized topological data pertaining to the
/// geometric primitives with the capabilities of the selected discrete
/// compute device.
///
class CudaComputeContext : public NonCopyable<CudaComputeContext> {
public:
/// Creates an CudaComputeContext instance
///
/// @param vertexStencilTables The Far::StencilTables used for vertex
/// interpolation
///
/// @param varyingStencilTables The Far::StencilTables used for varying
/// interpolation
///
/// @param deviceContext (not used)
///
static CudaComputeContext * Create(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
void *deviceContext = NULL);
/// Destructor
virtual ~CudaComputeContext();
/// Returns true if the Context has a 'vertex' interpolation stencil table
bool HasVertexStencilTables() const;
/// Returns true if the Context has a 'varying' interpolation stencil table
bool HasVaryingStencilTables() const;
/// Returns the number of control vertices
int GetNumControlVertices() const {
return _numControlVertices;
}
/// Returns the number of stencils in vertex stencil tables
int GetNumStencilsInVertexStencilTables() const;
/// Returns the number of stencils in varying stencil tables
int GetNumStencilsInVaryingStencilTables() const;
/// Returns the Cuda buffer containing vertex-stencil stencil sizes
void * GetVertexStencilTablesSizes() const;
/// Returns the Cuda buffer containing vertex-stencil stencil offsets
void * GetVertexStencilTablesOffsets() const;
/// Returns the Cuda buffer containing vertex-stencil stencil indices
void * GetVertexStencilTablesIndices() const;
/// Returns the Cuda buffer containing vertex-stencil stencil weights
void * GetVertexStencilTablesWeights() const;
/// Returns the Cuda buffer containing Varying-stencil stencil sizes
void * GetVaryingStencilTablesSizes() const;
/// Returns the Cuda buffer containing Varying-stencil stencil offsets
void * GetVaryingStencilTablesOffsets() const;
/// Returns the Cuda buffer containing Varying-stencil stencil indices
void * GetVaryingStencilTablesIndices() const;
/// Returns the Cuda buffer containing Varying-stencil stencil weights
void * GetVaryingStencilTablesWeights() const;
protected:
explicit CudaComputeContext(Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables);
private:
class CudaStencilTables;
CudaStencilTables * _vertexStencilTables,
* _varyingStencilTables;
int _numControlVertices;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_CUDA_COMPUTE_CONTEXT_H

View File

@ -1,124 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cudaComputeController.h"
#include <cuda_runtime.h>
#include <string.h>
#include <cassert>
extern "C" {
void CudaComputeStencils(const float *src,
float *dst,
int length,
int srcStride,
int dstStride,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
}
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
void
CudaComputeController::ApplyStencilTableKernel(
ComputeContext const *context) const {
assert(context);
if (context->HasVertexStencilTables() and _currentBindState.vertexBuffer) {
VertexBufferDescriptor srcDesc = _currentBindState.vertexDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride;
int start = 0;
int end = context->GetNumStencilsInVertexStencilTables();
float const * src = _currentBindState.vertexBuffer;
float * dst = _currentBindState.vertexBuffer;
if (end > start) {
CudaComputeStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDesc.stride,
(unsigned char const *)context->GetVertexStencilTablesSizes(),
(int const *)context->GetVertexStencilTablesOffsets(),
(int const *)context->GetVertexStencilTablesIndices(),
(float const *)context->GetVertexStencilTablesWeights(),
start,
end);
}
}
if (context->HasVaryingStencilTables() and _currentBindState.varyingBuffer) {
VertexBufferDescriptor srcDesc = _currentBindState.varyingDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride;
int start = 0;
int end = context->GetNumStencilsInVaryingStencilTables();
float const * src = _currentBindState.varyingBuffer;
float * dst = _currentBindState.varyingBuffer;
if (end > start) {
CudaComputeStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDesc.stride,
(unsigned char const *)context->GetVaryingStencilTablesSizes(),
(int const *)context->GetVaryingStencilTablesOffsets(),
(int const *)context->GetVaryingStencilTablesIndices(),
(float const *)context->GetVaryingStencilTablesWeights(),
start,
end);
}
}
}
CudaComputeController::CudaComputeController() {
}
CudaComputeController::~CudaComputeController() {
}
void
CudaComputeController::Synchronize() {
cudaThreadSynchronize();
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,180 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CUDA_COMPUTE_CONTROLLER_H
#define OSD_CUDA_COMPUTE_CONTROLLER_H
#include "../version.h"
#include "../osd/cudaComputeContext.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Compute controller for launching CUDA subdivision kernels.
///
/// CudaComputeController is a compute controller class to launch
/// Cuda subdivision kernels. It requires CudaVertexBufferInterface
/// as arguments of Refine function.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class CudaComputeController {
public:
typedef CudaComputeContext ComputeContext;
/// Constructor.
CudaComputeController();
/// Destructor.
~CudaComputeController();
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The CudaContext to apply refinement operations to
///
/// @param batches Vector of batches of vertices organized by operative
/// kernel
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
/// @param vertexDesc The descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingBuffer Vertex-interpolated data buffer
///
/// @param varyingDesc The descriptor of varying elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Compute( CudaComputeContext const * context,
VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc=NULL,
VertexBufferDescriptor const * varyingDesc=NULL ){
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
ApplyStencilTableKernel(context);
unbind();
}
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The CudaContext to apply refinement operations to
///
/// @param batches Vector of batches of vertices organized by operative
/// kernel
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
template<class VERTEX_BUFFER>
void Compute(CudaComputeContext const * context,
VERTEX_BUFFER *vertexBuffer) {
Compute<VERTEX_BUFFER>(context, vertexBuffer, (VERTEX_BUFFER*)0);
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
protected:
void ApplyStencilTableKernel(ComputeContext const *context) const;
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind( VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc,
VertexBufferDescriptor const * varyingDesc ) {
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0;
_currentBindState.vertexDesc =
VertexBufferDescriptor(0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0;
_currentBindState.varyingDesc =
VertexBufferDescriptor(0, numElements, numElements);
}
_currentBindState.vertexBuffer = vertexBuffer ?
static_cast<float*>(vertexBuffer->BindCudaBuffer()) : 0;
_currentBindState.varyingBuffer = varyingBuffer ?
static_cast<float*>(varyingBuffer->BindCudaBuffer()) : 0;
}
/// Unbinds any previously bound vertex and varying data buffers.
void unbind() {
_currentBindState.Reset();
}
private:
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of the vertex buffers.
struct BindState {
BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {}
void Reset() {
vertexBuffer = varyingBuffer = NULL;
vertexDesc.Reset();
varyingDesc.Reset();
}
float * vertexBuffer, // cuda buffers
* varyingBuffer;
VertexBufferDescriptor vertexDesc,
varyingDesc;
};
BindState _currentBindState;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_CUDA_COMPUTE_CONTROLLER_H

View File

@ -0,0 +1,124 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cudaEvaluator.h"
#include <cuda_runtime.h>
#include <vector>
#include "../far/stencilTables.h"
extern "C" {
void CudaEvalStencils(const float *src,
float *dst,
int length,
int srcStride,
int dstStride,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
}
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
template <class T> void *
createCudaBuffer(std::vector<T> const & src) {
void * devicePtr = 0;
size_t size = src.size()*sizeof(T);
cudaError_t err = cudaMalloc(&devicePtr, size);
if (err != cudaSuccess) {
return devicePtr;
}
err = cudaMemcpy(devicePtr, &src.at(0), size, cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
cudaFree(devicePtr);
return 0;
}
return devicePtr;
}
// ----------------------------------------------------------------------------
CudaStencilTables::CudaStencilTables(Far::StencilTables const *stencilTables) {
_numStencils = stencilTables->GetNumStencils();
if (_numStencils > 0) {
_sizes = createCudaBuffer(stencilTables->GetSizes());
_offsets = createCudaBuffer(stencilTables->GetOffsets());
_indices = createCudaBuffer(stencilTables->GetControlIndices());
_weights = createCudaBuffer(stencilTables->GetWeights());
} else {
_sizes = _offsets = _indices = _weights = NULL;
}
}
CudaStencilTables::~CudaStencilTables() {
if (_sizes) cudaFree(_sizes);
if (_offsets) cudaFree(_offsets);
if (_indices) cudaFree(_indices);
if (_weights) cudaFree(_weights);
}
// ---------------------------------------------------------------------------
/* static */
bool
CudaEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end) {
CudaEvalStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDesc.stride,
sizes, offsets, indices, weights,
start, end);
return true;
}
/* static */
void
CudaEvaluator::Synchronize(void * /*deviceContext*/) {
cudaThreadSynchronize();
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,148 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV_OSD_CUDA_EVALUATOR_H
#define OPENSUBDIV_OSD_CUDA_EVALUATOR_H
#include "../version.h"
#include <vector>
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far {
class StencilTables;
}
namespace Osd {
/// \brief CUDA stencil tables
///
/// This class is a cuda buffer representation of Far::StencilTables.
///
/// CudaComputeKernel consumes this table to apply stencils
///
///
class CudaStencilTables {
public:
static CudaStencilTables *Create(Far::StencilTables const *stencilTables,
void *deviceContext = NULL) {
(void)deviceContext; // unused
return new CudaStencilTables(stencilTables);
}
explicit CudaStencilTables(Far::StencilTables const *stencilTables);
~CudaStencilTables();
// interfaces needed for CudaCompute
void *GetSizesBuffer() const { return _sizes; }
void *GetOffsetsBuffer() const { return _offsets; }
void *GetIndicesBuffer() const { return _indices; }
void *GetWeightsBuffer() const { return _weights; }
int GetNumStencils() const { return _numStencils; }
private:
void * _sizes,
* _offsets,
* _indices,
* _weights;
int _numStencils;
};
// ---------------------------------------------------------------------------
class CudaEvaluator {
public:
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTables stencil table to be applied. The table must have
/// Cuda memory interfaces.
///
/// @param instance not used in the CudaEvaluator
///
/// @param deviceContext not used in the CudaEvaluator
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const void *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcVertexBuffer->BindCudaBuffer(),
srcDesc,
dstVertexBuffer->BindCudaBuffer(),
dstDesc,
(unsigned char const *)stencilTable->GetSizesBuffer(),
(int const *)stencilTable->GetOffsetsBuffer(),
(int const *)stencilTable->GetIndicesBuffer(),
(float const *)stencilTable->GetWeightsBuffer(),
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
static void Synchronize(void *deviceContext = NULL);
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV_OSD_CUDA_EVALUATOR_H

View File

@ -257,17 +257,17 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs,
extern "C" {
void CudaComputeStencils(const float *src,
float *dst,
int length,
int srcStride,
int dstStride,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end)
void CudaEvalStencils(const float *src,
float *dst,
int length,
int srcStride,
int dstStride,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end)
{
// assert(cvs and dst and sizes and offsets and indices and weights and (end>=start));

View File

@ -35,7 +35,7 @@ namespace Osd {
/// \brief Concrete vertex buffer class for Cuda subvision.
///
/// CudaVertexBuffer implements CudaVertexBufferInterface.
/// An instance of this buffer class can be passed to CudaComputeController
/// An instance of this buffer class can be passed to CudaEvaluator
///
class CudaVertexBuffer {

View File

@ -1,284 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../far/stencilTables.h"
#include "../osd/d3d11ComputeContext.h"
#include "../far/error.h"
#include <D3D11.h>
#include <vector>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
// ----------------------------------------------------------------------------
struct D3D11Table {
D3D11Table() : buffer(0), srv(0) { }
~D3D11Table() {
SAFE_RELEASE(buffer);
SAFE_RELEASE(srv);
}
bool IsValid() const {
return (buffer and srv);
}
template <class T> void initialize(std::vector<T> const & src,
DXGI_FORMAT format, ID3D11DeviceContext *deviceContext) {
size_t size = src.size()*sizeof(T);
if (size==0) {
buffer = 0;
srv = 0;
return;
}
ID3D11Device *device = 0;
deviceContext->GetDevice(&device);
assert(device);
D3D11_BUFFER_DESC bd;
bd.ByteWidth = (unsigned int)size;
bd.Usage = D3D11_USAGE_IMMUTABLE;
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
bd.CPUAccessFlags = 0;
bd.MiscFlags = 0;
bd.StructureByteStride = 0;
D3D11_SUBRESOURCE_DATA initData;
initData.pSysMem = &src.at(0);
HRESULT hr = device->CreateBuffer(&bd, &initData, &buffer);
if (FAILED(hr)) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"Error creating compute table buffer\n");
return;
}
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
ZeroMemory(&srvd, sizeof(srvd));
srvd.Format = format;
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvd.Buffer.FirstElement = 0;
srvd.Buffer.NumElements = (unsigned int)src.size();
hr = device->CreateShaderResourceView(buffer, &srvd, &srv);
if (FAILED(hr)) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"Error creating compute table shader resource view\n");
return;
}
}
ID3D11Buffer * buffer;
ID3D11ShaderResourceView * srv;
};
// ----------------------------------------------------------------------------
class D3D11ComputeContext::D3D11StencilTables {
public:
D3D11StencilTables(Far::StencilTables const & stencilTables,
ID3D11DeviceContext *deviceContext) {
_numStencils = stencilTables.GetNumStencils();
if (_numStencils > 0) {
// convert unsigned char sizes buffer to ints
// (HLSL does not have uint8 type)
std::vector<int> const sizes(stencilTables.GetSizes().begin(),
stencilTables.GetSizes().end());
_sizes.initialize(sizes,
DXGI_FORMAT_R32_SINT,
deviceContext);
_offsets.initialize(stencilTables.GetOffsets(),
DXGI_FORMAT_R32_SINT,
deviceContext);
_indices.initialize(stencilTables.GetControlIndices(),
DXGI_FORMAT_R32_SINT,
deviceContext);
_weights.initialize(stencilTables.GetWeights(),
DXGI_FORMAT_R32_FLOAT,
deviceContext);
}
}
bool IsValid() const {
return _sizes.IsValid() and _offsets.IsValid() and
_indices.IsValid() and _weights.IsValid();
}
D3D11Table const & GetSizes() const {
return _sizes;
}
D3D11Table const & GetOffsets() const {
return _offsets;
}
D3D11Table const & GetIndices() const {
return _indices;
}
D3D11Table const & GetWeights() const {
return _weights;
}
int GetNumStencils() const {
return _numStencils;
}
void Bind(ID3D11DeviceContext * deviceContext) const {
ID3D11ShaderResourceView *SRViews[] = {
_sizes.srv,
_offsets.srv,
_indices.srv,
_weights.srv
};
deviceContext->CSSetShaderResources(1, 4, SRViews); // t1-t4
}
static void Unbind(ID3D11DeviceContext * deviceContext) {
ID3D11ShaderResourceView *SRViews[] = { 0, 0, 0, 0 };
deviceContext->CSSetShaderResources(1, 4, SRViews);
}
private:
D3D11Table _sizes,
_offsets,
_indices,
_weights;
int _numStencils;
};
// ----------------------------------------------------------------------------
D3D11ComputeContext::D3D11ComputeContext(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
ID3D11DeviceContext *deviceContext) :
_vertexStencilTables(0), _varyingStencilTables(0),
_numControlVertices(0) {
if (vertexStencilTables) {
_vertexStencilTables =
new D3D11StencilTables(*vertexStencilTables, deviceContext);
_numControlVertices = vertexStencilTables->GetNumControlVertices();
}
if (varyingStencilTables) {
_varyingStencilTables =
new D3D11StencilTables(*varyingStencilTables, deviceContext);
if (_numControlVertices) {
assert(_numControlVertices==varyingStencilTables->GetNumControlVertices());
} else {
_numControlVertices = varyingStencilTables->GetNumControlVertices();
}
}
}
D3D11ComputeContext::~D3D11ComputeContext() {
delete _vertexStencilTables;
delete _varyingStencilTables;
}
// ----------------------------------------------------------------------------
bool
D3D11ComputeContext::HasVertexStencilTables() const {
return _vertexStencilTables ? _vertexStencilTables->IsValid() : false;
}
bool
D3D11ComputeContext::HasVaryingStencilTables() const {
return _varyingStencilTables ? _varyingStencilTables->IsValid() : false;
}
int
D3D11ComputeContext::GetNumStencilsInVertexStencilTables() const {
return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0;
}
int
D3D11ComputeContext::GetNumStencilsInVaryingStencilTables() const {
return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0;
}
// ----------------------------------------------------------------------------
void
D3D11ComputeContext::BindVertexStencilTables(ID3D11DeviceContext *deviceContext) const {
if (_vertexStencilTables) {
_vertexStencilTables->Bind(deviceContext);
}
}
void
D3D11ComputeContext::BindVaryingStencilTables(ID3D11DeviceContext *deviceContext) const {
if (_varyingStencilTables) {
_varyingStencilTables->Bind(deviceContext);
}
}
void
D3D11ComputeContext::UnbindStencilTables(ID3D11DeviceContext *deviceContext) const {
D3D11StencilTables::Unbind(deviceContext);
}
// ----------------------------------------------------------------------------
D3D11ComputeContext *
D3D11ComputeContext::Create(Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
ID3D11DeviceContext *deviceContext) {
D3D11ComputeContext *result =
new D3D11ComputeContext(vertexStencilTables, varyingStencilTables,
deviceContext);
return result;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,128 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_D3D11_COMPUTE_CONTEXT_H
#define OSD_D3D11_COMPUTE_CONTEXT_H
#include "../version.h"
#include "../osd/nonCopyable.h"
struct ID3D11DeviceContext;
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far{ class StencilTables; }
namespace Osd {
///
/// \brief D3D Refine Context
///
/// The D3D implementation of the Refine module contextual functionality.
///
/// Contexts interface the serialized topological data pertaining to the
/// geometric primitives with the capabilities of the selected discrete
/// compute device.
///
class D3D11ComputeContext : public NonCopyable<D3D11ComputeContext> {
public:
/// Creates an D3D11ComputeContext instance
///
/// @param vertexStencilTables The Far::StencilTables used for vertex
/// interpolation
///
/// @param varyingStencilTables The Far::StencilTables used for varying
/// interpolation
///
/// @param deviceContext The D3D device
///
static D3D11ComputeContext * Create(Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
ID3D11DeviceContext *deviceContext);
/// Destructor
virtual ~D3D11ComputeContext();
/// Returns true if the Context has a 'vertex' interpolation stencil table
bool HasVertexStencilTables() const;
/// Returns true if the Context has a 'varying' interpolation stencil table
bool HasVaryingStencilTables() const;
/// Returns the number of control vertices
int GetNumControlVertices() const {
return _numControlVertices;
}
/// Returns the number of stencils in vertex stencil table
int GetNumStencilsInVertexStencilTables() const;
/// Returns the number of stencils in varying stencil table
int GetNumStencilsInVaryingStencilTables() const;
/// Binds D3D11 buffers containing stencils for 'vertex' interpolation
///
/// @param deviceContext The D3D device
///
void BindVertexStencilTables(ID3D11DeviceContext *deviceContext) const;
/// Binds D3D11 buffers containing stencils for 'varying' interpolation
///
/// @param deviceContext The D3D device
///
void BindVaryingStencilTables(ID3D11DeviceContext *deviceContext) const;
/// Unbinds D3D11 stencil buffers
///
/// @param deviceContext The D3D device
///
void UnbindStencilTables(ID3D11DeviceContext *deviceContext) const;
protected:
explicit D3D11ComputeContext(Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
ID3D11DeviceContext *deviceContext);
private:
class D3D11StencilTables;
D3D11StencilTables * _vertexStencilTables,
* _varyingStencilTables;
int _numControlVertices;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_D3D11_COMPUTE_CONTEXT_H

View File

@ -1,340 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/d3d11ComputeController.h"
#include "../far/error.h"
#include "../osd/vertexDescriptor.h"
#define INITGUID // for IID_ID3D11ShaderReflection
#include <D3D11.h>
#include <D3D11shader.h>
#include <D3Dcompiler.h>
#include <algorithm>
#include <cassert>
#include <sstream>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
static const char *shaderSource =
#include "../osd/hlslComputeKernel.gen.h"
;
// ----------------------------------------------------------------------------
// must match constant buffer declaration in hlslComputeKernel.hlsl
__declspec(align(16))
struct KernelUniformArgs {
int start; // batch
int end;
int srcOffset;
int dstOffset;
};
// ----------------------------------------------------------------------------
class D3D11ComputeController::KernelBundle :
NonCopyable<D3D11ComputeController::KernelBundle> {
public:
KernelBundle() :
_computeShader(0),
_classLinkage(0),
_singleBufferKernel(0),
_separateBufferKernel(0),
_uniformArgs(0),
_workGroupSize(64) { }
~KernelBundle() {
SAFE_RELEASE(_computeShader);
SAFE_RELEASE(_classLinkage);
SAFE_RELEASE(_singleBufferKernel);
SAFE_RELEASE(_separateBufferKernel);
SAFE_RELEASE(_uniformArgs);
}
bool Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
ID3D11DeviceContext *deviceContext) {
// XXX: only store srcDesc.
// this is ok since currently this kernel doesn't get called with
// different strides for src and dst. This function will be
// refactored soon.
_desc = VertexBufferDescriptor(0, srcDesc.length, srcDesc.stride);
DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
#ifdef _DEBUG
dwShaderFlags |= D3DCOMPILE_DEBUG;
#endif
std::ostringstream ss;
ss << srcDesc.length; std::string lengthValue(ss.str()); ss.str("");
ss << srcDesc.stride; std::string srcStrideValue(ss.str()); ss.str("");
ss << dstDesc.stride; std::string dstStrideValue(ss.str()); ss.str("");
ss << _workGroupSize; std::string workgroupSizeValue(ss.str()); ss.str("");
D3D_SHADER_MACRO defines[] =
{ "LENGTH", lengthValue.c_str(),
"SRC_STRIDE", srcStrideValue.c_str(),
"DST_STRIDE", dstStrideValue.c_str(),
"WORK_GROUP_SIZE", workgroupSizeValue.c_str(),
0, 0 };
ID3DBlob * computeShaderBuffer = NULL;
ID3DBlob * errorBuffer = NULL;
HRESULT hr = D3DCompile(shaderSource, strlen(shaderSource),
NULL, &defines[0], NULL,
"cs_main", "cs_5_0",
dwShaderFlags, 0,
&computeShaderBuffer, &errorBuffer);
if (FAILED(hr)) {
if (errorBuffer != NULL) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"Error compiling HLSL shader: %s\n",
(CHAR*)errorBuffer->GetBufferPointer());
errorBuffer->Release();
return false;
}
}
ID3D11Device *device = NULL;
deviceContext->GetDevice(&device);
assert(device);
device->CreateClassLinkage(&_classLinkage);
assert(_classLinkage);
device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(),
computeShaderBuffer->GetBufferSize(),
_classLinkage,
&_computeShader);
assert(_computeShader);
ID3D11ShaderReflection *reflector;
D3DReflect(computeShaderBuffer->GetBufferPointer(),
computeShaderBuffer->GetBufferSize(),
IID_ID3D11ShaderReflection, (void**) &reflector);
assert(reflector);
assert(reflector->GetNumInterfaceSlots() == 1);
reflector->Release();
computeShaderBuffer->Release();
_classLinkage->GetClassInstance("singleBufferCompute", 0, &_singleBufferKernel);
assert(_singleBufferKernel);
_classLinkage->GetClassInstance("separateBufferCompute", 0, &_separateBufferKernel);
assert(_separateBufferKernel);
return true;
}
void ApplyStencilTableKernel(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
int start,
int end,
ID3D11DeviceContext *deviceContext) {
int count = end - start;
if (count <= 0) return;
KernelUniformArgs args;
args.start = start;
args.end = end;
args.srcOffset = srcDesc.offset;
args.dstOffset = dstDesc.offset;
if (not _uniformArgs) {
ID3D11Device *device = NULL;
deviceContext->GetDevice(&device);
assert(device);
D3D11_BUFFER_DESC cbDesc;
ZeroMemory(&cbDesc, sizeof(cbDesc));
cbDesc.Usage = D3D11_USAGE_DYNAMIC;
cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
cbDesc.MiscFlags = 0;
cbDesc.ByteWidth = sizeof(KernelUniformArgs);
device->CreateBuffer(&cbDesc, NULL, &_uniformArgs);
}
assert(_uniformArgs);
D3D11_MAPPED_SUBRESOURCE mappedResource;
deviceContext->Map(_uniformArgs, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
CopyMemory(mappedResource.pData, &args, sizeof(KernelUniformArgs));
deviceContext->Unmap(_uniformArgs, 0);
deviceContext->CSSetConstantBuffers(0, 1, &_uniformArgs); // b0
deviceContext->CSSetShader(_computeShader, &_singleBufferKernel, 1);
deviceContext->Dispatch((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
}
struct Match {
Match(VertexBufferDescriptor const & d) : desc(d) { }
bool operator() (KernelBundle const * kernel) {
return (desc.length==kernel->_desc.length and
desc.stride==kernel->_desc.stride);
}
VertexBufferDescriptor desc;
};
private:
ID3D11ComputeShader * _computeShader;
ID3D11ClassLinkage * _classLinkage;
ID3D11ClassInstance * _singleBufferKernel;
ID3D11ClassInstance * _separateBufferKernel;
ID3D11Buffer * _uniformArgs; // uniform paramaeters for kernels
VertexBufferDescriptor _desc; // primvar buffer descriptor
int _workGroupSize;
};
// ----------------------------------------------------------------------------
void
D3D11ComputeController::Synchronize() {
if (not _query) {
ID3D11Device *device = NULL;
_deviceContext->GetDevice(&device);
assert(device);
D3D11_QUERY_DESC desc;
desc.Query = D3D11_QUERY_EVENT;
desc.MiscFlags = 0;
device->CreateQuery(&desc, &_query);
}
_deviceContext->Flush();
_deviceContext->End(_query);
while (S_OK != _deviceContext->GetData(_query, NULL, 0, 0));
}
// ----------------------------------------------------------------------------
D3D11ComputeController::KernelBundle const *
D3D11ComputeController::getKernel(VertexBufferDescriptor const &desc) {
KernelRegistry::iterator it =
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
KernelBundle::Match(desc));
if (it != _kernelRegistry.end()) {
return *it;
} else {
assert(_deviceContext);
KernelBundle * kernelBundle = new KernelBundle();
kernelBundle->Compile(desc, desc, _deviceContext);
_kernelRegistry.push_back(kernelBundle);
return kernelBundle;
}
}
void
D3D11ComputeController::bindBuffer() {
// Unbind the vertexBuffer from the input assembler
ID3D11Buffer *NULLBuffer = 0;
UINT voffset = 0, vstride = 0;
_deviceContext->IASetVertexBuffers(0, 1, &NULLBuffer, &voffset, &vstride);
// Unbind the vertexBuffer from the vertex shader
ID3D11ShaderResourceView *NULLSRV = 0;
_deviceContext->VSSetShaderResources(0, 1, &NULLSRV);
if (_currentBindState.buffer)
_deviceContext->CSSetUnorderedAccessViews(0, 1, &_currentBindState.buffer, 0); // u0
}
void
D3D11ComputeController::unbindBuffer() {
assert(_deviceContext);
ID3D11UnorderedAccessView *UAViews[] = { 0 };
_deviceContext->CSSetUnorderedAccessViews(0, 1, UAViews, 0); // u0
}
// ----------------------------------------------------------------------------
void
D3D11ComputeController::ApplyStencilTableKernel(
D3D11ComputeContext const *context, int numStencils) const {
assert(context);
// XXXX manuelk messy const drop forced by D3D API - could use better solution
D3D11ComputeController::KernelBundle * bundle =
const_cast<D3D11ComputeController::KernelBundle *>(_currentBindState.kernelBundle);
VertexBufferDescriptor srcDesc = _currentBindState.desc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride;
bundle->ApplyStencilTableKernel(srcDesc,
dstDesc,
0,
numStencils,
_deviceContext);
}
// ----------------------------------------------------------------------------
D3D11ComputeController::D3D11ComputeController(
ID3D11DeviceContext *deviceContext)
: _deviceContext(deviceContext), _query(0) {
}
D3D11ComputeController::~D3D11ComputeController() {
for (KernelRegistry::iterator it = _kernelRegistry.begin();
it != _kernelRegistry.end(); ++it) {
delete *it;
}
SAFE_RELEASE(_query);
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,213 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_D3D11_COMPUTE_CONTROLLER_H
#define OSD_D3D11_COMPUTE_CONTROLLER_H
#include "../version.h"
#include "../osd/d3d11ComputeContext.h"
#include "../osd/vertexDescriptor.h"
#include <vector>
struct ID3D11DeviceContext;
struct ID3D11Query;
struct ID3D11UnorderedAccessView;
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Compute controller for launching D3D11 Compute subdivision kernels.
///
/// D3D11ComputeController is a compute controller class to launch
/// D3D11Compute transfrom feedback subdivision kernels. It requires
/// GLVertexBufferInterface as arguments of Refine function.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class D3D11ComputeController {
public:
typedef D3D11ComputeContext ComputeContext;
/// Constructor.
///
/// @param deviceContext a valid instanciated D3D11 device context
///
D3D11ComputeController(ID3D11DeviceContext *deviceContext);
/// Destructor.
~D3D11ComputeController();
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The D3D11Context to apply refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
/// @param vertexDesc The descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingBuffer Vertex-interpolated data buffer
///
/// @param varyingDesc The descriptor of varying elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Compute( D3D11ComputeContext const * context,
VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc=NULL,
VertexBufferDescriptor const * varyingDesc=NULL ){
if (vertexBuffer) {
bind(vertexBuffer, vertexDesc);
context->BindVertexStencilTables(_deviceContext);
ApplyStencilTableKernel(
context, context->GetNumStencilsInVertexStencilTables());
}
if (varyingBuffer) {
bind(varyingBuffer, varyingDesc);
context->BindVaryingStencilTables(_deviceContext);
ApplyStencilTableKernel(
context, context->GetNumStencilsInVaryingStencilTables());
}
context->UnbindStencilTables(_deviceContext);
unbind();
}
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The D3D11Context to apply refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
template<class VERTEX_BUFFER>
void Compute(D3D11ComputeContext const * context,
VERTEX_BUFFER *vertexBuffer) {
Compute<VERTEX_BUFFER>(context, vertexBuffer, (VERTEX_BUFFER*)0);
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
protected:
void ApplyStencilTableKernel(ComputeContext const *context,
int numStencils) const;
template<class BUFFER>
void bind( BUFFER * buffer,
VertexBufferDescriptor const * desc ) {
assert(buffer);
// if the vertex buffer descriptor is specified, use it
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (desc) {
_currentBindState.desc = *desc;
} else {
int numElements = buffer ? buffer->GetNumElements() : 0;
_currentBindState.desc =
VertexBufferDescriptor(0, numElements, numElements);
}
_currentBindState.buffer = buffer->BindD3D11UAV(_deviceContext);
_currentBindState.kernelBundle = getKernel(_currentBindState.desc);
bindBuffer();
}
// Unbinds any previously bound vertex and varying data buffers.
void unbind() {
_currentBindState.Reset();
unbindBuffer();
}
// binds the primvar data buffer
void bindBuffer();
// unbinds the primvar data buffer
void unbindBuffer();
private:
ID3D11DeviceContext *_deviceContext;
ID3D11Query *_query;
class KernelBundle;
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of the vertex buffers.
struct BindState {
BindState() : buffer(0), kernelBundle(0) { }
void Reset() {
buffer = 0;
desc.Reset();
kernelBundle = 0;
}
ID3D11UnorderedAccessView * buffer;
VertexBufferDescriptor desc;
KernelBundle const * kernelBundle;
};
BindState _currentBindState;
typedef std::vector<KernelBundle *> KernelRegistry;
KernelBundle const * getKernel(VertexBufferDescriptor const &desc);
KernelRegistry _kernelRegistry;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_D3D11_COMPUTE_CONTROLLER_H

View File

@ -0,0 +1,375 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/d3d11ComputeEvaluator.h"
#include <cassert>
#include <sstream>
#include <string>
#include <vector>
#define INITGUID // for IID_ID3D11ShaderReflection
#include <D3D11.h>
#include <D3D11shader.h>
#include <D3Dcompiler.h>
#include "../far/error.h"
#include "../far/stencilTables.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
static const char *shaderSource =
#include "../osd/hlslComputeKernel.gen.h"
;
// ----------------------------------------------------------------------------
// must match constant buffer declaration in hlslComputeKernel.hlsl
__declspec(align(16))
struct KernelUniformArgs {
int start; // batch
int end;
int srcOffset;
int dstOffset;
};
// ----------------------------------------------------------------------------
template <typename T>
static ID3D11Buffer *createBuffer(std::vector<T> const &src,
ID3D11Device *device) {
size_t size = src.size()*sizeof(T);
ID3D11Buffer *buffer = NULL;
D3D11_BUFFER_DESC bd;
bd.ByteWidth = (unsigned int)size;
bd.Usage = D3D11_USAGE_IMMUTABLE;
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
bd.CPUAccessFlags = 0;
bd.MiscFlags = 0;
bd.StructureByteStride = 0;
D3D11_SUBRESOURCE_DATA initData;
initData.pSysMem = &src.at(0);
HRESULT hr = device->CreateBuffer(&bd, &initData, &buffer);
if (FAILED(hr)) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"Error creating compute table buffer\n");
return NULL;
}
return buffer;
}
static ID3D11ShaderResourceView *createSRV(ID3D11Buffer *buffer,
DXGI_FORMAT format,
ID3D11Device *device,
size_t size) {
ID3D11ShaderResourceView *srv = NULL;
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
ZeroMemory(&srvd, sizeof(srvd));
srvd.Format = format;
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
srvd.Buffer.FirstElement = 0;
srvd.Buffer.NumElements = (unsigned int)size;
HRESULT hr = device->CreateShaderResourceView(buffer, &srvd, &srv);
if (FAILED(hr)) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"Error creating compute table shader resource view\n");
return NULL;
}
return srv;
}
D3D11StencilTables::D3D11StencilTables(Far::StencilTables const *stencilTables,
ID3D11DeviceContext *deviceContext)
{
ID3D11Device *device = NULL;
deviceContext->GetDevice(&device);
assert(device);
_numStencils = stencilTables->GetNumStencils();
if (_numStencils > 0) {
// convert unsigned char sizes buffer to ints
// (HLSL does not have uint8 type)
std::vector<int> const sizes(stencilTables->GetSizes().begin(),
stencilTables->GetSizes().end());
_sizesBuffer = createBuffer(sizes, device);
_offsetsBuffer = createBuffer(stencilTables->GetOffsets(), device);
_indicesBuffer = createBuffer(stencilTables->GetControlIndices(), device);
_weightsBuffer = createBuffer(stencilTables->GetWeights(), device);
_sizes = createSRV(_sizesBuffer, DXGI_FORMAT_R32_SINT, device,
stencilTables->GetSizes().size());
_offsets = createSRV(_offsetsBuffer, DXGI_FORMAT_R32_SINT, device,
stencilTables->GetOffsets().size());
_indices = createSRV(_indicesBuffer, DXGI_FORMAT_R32_SINT, device,
stencilTables->GetControlIndices().size());
_weights= createSRV(_weightsBuffer, DXGI_FORMAT_R32_FLOAT, device,
stencilTables->GetWeights().size());
} else {
_sizes = _offsets = _indices = _weights = NULL;
_sizesBuffer = _offsetsBuffer = _indicesBuffer = _weightsBuffer = NULL;
}
}
D3D11StencilTables::~D3D11StencilTables() {
SAFE_RELEASE(_sizes);
SAFE_RELEASE(_sizesBuffer);
SAFE_RELEASE(_offsets);
SAFE_RELEASE(_offsetsBuffer);
SAFE_RELEASE(_indices);
SAFE_RELEASE(_indicesBuffer);
SAFE_RELEASE(_weights);
SAFE_RELEASE(_weightsBuffer);
}
// ---------------------------------------------------------------------------
D3D11ComputeEvaluator::D3D11ComputeEvaluator() :
_computeShader(NULL),
_classLinkage(NULL),
_singleBufferKernel(NULL),
_separateBufferKernel(NULL),
_uniformArgs(NULL),
_workGroupSize(64) {
}
D3D11ComputeEvaluator *
D3D11ComputeEvaluator::Create(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
ID3D11DeviceContext *deviceContext) {
(void)deviceContext; // not used
D3D11ComputeEvaluator *instance = new D3D11ComputeEvaluator();
if (instance->Compile(srcDesc, dstDesc, deviceContext)) return instance;
delete instance;
return NULL;
}
D3D11ComputeEvaluator::~D3D11ComputeEvaluator() {
SAFE_RELEASE(_computeShader);
SAFE_RELEASE(_classLinkage);
SAFE_RELEASE(_singleBufferKernel);
SAFE_RELEASE(_separateBufferKernel);
SAFE_RELEASE(_uniformArgs);
}
bool
D3D11ComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
ID3D11DeviceContext *deviceContext) {
if (srcDesc.length > dstDesc.length) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"srcDesc length must be less than or equal to "
"dstDesc length.\n");
return false;
}
DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS
| D3D10_SHADER_RESOURCES_MAY_ALIAS;
#ifdef _DEBUG
dwShaderFlags |= D3DCOMPILE_DEBUG;
#endif
std::ostringstream ss;
ss << srcDesc.length; std::string lengthValue(ss.str()); ss.str("");
ss << srcDesc.stride; std::string srcStrideValue(ss.str()); ss.str("");
ss << dstDesc.stride; std::string dstStrideValue(ss.str()); ss.str("");
ss << _workGroupSize; std::string workgroupSizeValue(ss.str()); ss.str("");
D3D_SHADER_MACRO defines[] =
{ "LENGTH", lengthValue.c_str(),
"SRC_STRIDE", srcStrideValue.c_str(),
"DST_STRIDE", dstStrideValue.c_str(),
"WORK_GROUP_SIZE", workgroupSizeValue.c_str(),
0, 0 };
ID3DBlob * computeShaderBuffer = NULL;
ID3DBlob * errorBuffer = NULL;
HRESULT hr = D3DCompile(shaderSource, strlen(shaderSource),
NULL, &defines[0], NULL,
"cs_main", "cs_5_0",
dwShaderFlags, 0,
&computeShaderBuffer, &errorBuffer);
if (FAILED(hr)) {
if (errorBuffer != NULL) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"Error compiling HLSL shader: %s\n",
(CHAR*)errorBuffer->GetBufferPointer());
errorBuffer->Release();
return false;
}
}
ID3D11Device *device = NULL;
deviceContext->GetDevice(&device);
assert(device);
device->CreateClassLinkage(&_classLinkage);
assert(_classLinkage);
device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(),
computeShaderBuffer->GetBufferSize(),
_classLinkage,
&_computeShader);
assert(_computeShader);
ID3D11ShaderReflection *reflector;
D3DReflect(computeShaderBuffer->GetBufferPointer(),
computeShaderBuffer->GetBufferSize(),
IID_ID3D11ShaderReflection, (void**) &reflector);
assert(reflector);
assert(reflector->GetNumInterfaceSlots() == 1);
reflector->Release();
computeShaderBuffer->Release();
_classLinkage->GetClassInstance("singleBufferCompute", 0, &_singleBufferKernel);
assert(_singleBufferKernel);
_classLinkage->GetClassInstance("separateBufferCompute", 0, &_separateBufferKernel);
assert(_separateBufferKernel);
D3D11_BUFFER_DESC cbDesc;
ZeroMemory(&cbDesc, sizeof(cbDesc));
cbDesc.Usage = D3D11_USAGE_DYNAMIC;
cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
cbDesc.MiscFlags = 0;
cbDesc.ByteWidth = sizeof(KernelUniformArgs);
device->CreateBuffer(&cbDesc, NULL, &_uniformArgs);
return true;
}
/* static */
void
D3D11ComputeEvaluator::Synchronize(ID3D11DeviceContext *deviceContext) {
// XXX: this is currently just for the performance measuring purpose.
// XXXFIXME!
ID3D11Query *query = NULL;
ID3D11Device *device = NULL;
deviceContext->GetDevice(&device);
assert(device);
D3D11_QUERY_DESC desc;
desc.Query = D3D11_QUERY_EVENT;
desc.MiscFlags = 0;
device->CreateQuery(&desc, &query);
deviceContext->Flush();
deviceContext->End(query);
while (S_OK != deviceContext->GetData(query, NULL, 0, 0));
SAFE_RELEASE(query);
}
bool
D3D11ComputeEvaluator::EvalStencils(ID3D11UnorderedAccessView *srcUAV,
VertexBufferDescriptor const &srcDesc,
ID3D11UnorderedAccessView *dstUAV,
VertexBufferDescriptor const &dstDesc,
ID3D11ShaderResourceView *sizesSRV,
ID3D11ShaderResourceView *offsetsSRV,
ID3D11ShaderResourceView *indicesSRV,
ID3D11ShaderResourceView *weightsSRV,
int start,
int end,
ID3D11DeviceContext *deviceContext) const {
assert(deviceContext);
int count = end - start;
if (count <= 0) return true;
KernelUniformArgs args;
args.start = start;
args.end = end;
args.srcOffset = srcDesc.offset;
args.dstOffset = dstDesc.offset;
D3D11_MAPPED_SUBRESOURCE mappedResource;
deviceContext->Map(_uniformArgs, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource);
CopyMemory(mappedResource.pData, &args, sizeof(KernelUniformArgs));
deviceContext->Unmap(_uniformArgs, 0);
deviceContext->CSSetConstantBuffers(0, 1, &_uniformArgs); // b0
// Unbind the vertexBuffer from the input assembler
ID3D11Buffer *NULLBuffer = 0;
UINT voffset = 0, vstride = 0;
deviceContext->IASetVertexBuffers(0, 1, &NULLBuffer, &voffset, &vstride);
ID3D11ShaderResourceView *NULLSRV = 0;
deviceContext->VSSetShaderResources(0, 1, &NULLSRV);
// bind UAV
ID3D11UnorderedAccessView *UAViews[] = { srcUAV, dstUAV };
ID3D11ShaderResourceView *SRViews[] = {
sizesSRV, offsetsSRV, indicesSRV, weightsSRV };
// bind source vertex and stencil tables
deviceContext->CSSetShaderResources(1, 4, SRViews); // t1-t4
if (srcUAV == dstUAV) {
deviceContext->CSSetUnorderedAccessViews(0, 1, UAViews, 0); // u0
// Dispatch src == dst buffer
deviceContext->CSSetShader(_computeShader, &_singleBufferKernel, 1);
deviceContext->Dispatch((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
} else {
deviceContext->CSSetUnorderedAccessViews(0, 2, UAViews, 0); // u0, u1
// Dispatch src != dst buffer
deviceContext->CSSetShader(_computeShader, &_separateBufferKernel, 1);
deviceContext->Dispatch((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
}
// unbind stencil tables and vertexbuffers
SRViews[0] = SRViews[1] = SRViews[2] = SRViews[3] = NULL;
deviceContext->CSSetShaderResources(1, 4, SRViews);
UAViews[0] = UAViews[1] = NULL;
deviceContext->CSSetUnorderedAccessViews(0, 2, UAViews, 0);
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,227 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV_OSD_D3D11_COMPUTE_EVALUATOR_H
#define OPENSUBDIV_OSD_D3D11_COMPUTE_EVALUATOR_H
#include "../version.h"
struct ID3D11DeviceContext;
struct ID3D11Buffer;
struct ID3D11ComputeShader;
struct ID3D11ClassLinkage;
struct ID3D11ClassInstance;
struct ID3D11ShaderResourceView;
struct ID3D11UnorderedAccessView;
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far {
class StencilTables;
}
namespace Osd {
/// \brief D3D11 stencil tables
///
/// This class is a D3D11 Shader Resource View representation of
/// Far::StencilTables.
///
/// D3D11ComputeEvaluator consumes this table to apply stencils
///
class D3D11StencilTables {
public:
template <typename DEVICE_CONTEXT>
static D3D11StencilTables *Create(Far::StencilTables const *stencilTables,
DEVICE_CONTEXT context) {
return new D3D11StencilTables(stencilTables, context->GetDeviceContext());
}
static D3D11StencilTables *Create(Far::StencilTables const *stencilTables,
ID3D11DeviceContext *deviceContext) {
return new D3D11StencilTables(stencilTables, deviceContext);
}
D3D11StencilTables(Far::StencilTables const *stencilTables,
ID3D11DeviceContext *deviceContext);
~D3D11StencilTables();
// interfaces needed for D3D11ComputeEvaluator
ID3D11ShaderResourceView *GetSizesSRV() const { return _sizes; }
ID3D11ShaderResourceView *GetOffsetsSRV() const { return _offsets; }
ID3D11ShaderResourceView *GetIndicesSRV() const { return _indices; }
ID3D11ShaderResourceView *GetWeightsSRV() const { return _weights; }
int GetNumStencils() const { return _numStencils; }
private:
ID3D11ShaderResourceView *_sizes;
ID3D11ShaderResourceView *_offsets;
ID3D11ShaderResourceView *_indices;
ID3D11ShaderResourceView *_weights;
ID3D11Buffer *_sizesBuffer;
ID3D11Buffer *_offsetsBuffer;
ID3D11Buffer *_indicesBuffer;
ID3D11Buffer *_weightsBuffer;
int _numStencils;
};
// ---------------------------------------------------------------------------
class D3D11ComputeEvaluator {
public:
typedef bool Instantiatable;
static D3D11ComputeEvaluator * Create(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
ID3D11DeviceContext *deviceContext);
/// Constructor.
D3D11ComputeEvaluator();
/// Destructor.
~D3D11ComputeEvaluator();
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTables stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext ID3D11DeviceContext.
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
D3D11ComputeEvaluator const *instance,
ID3D11DeviceContext * deviceContext) {
if (instance) {
return instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
stencilTable,
deviceContext);
} else {
// Create an instace on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, deviceContext);
if (instance) {
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
stencilTable,
deviceContext);
delete instance;
return r;
}
return false;
}
}
/// Dispatch the DX compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
ID3D11DeviceContext *deviceContext) const {
return EvalStencils(srcVertexBuffer->BindD3D11UAV(deviceContext),
srcDesc,
dstVertexBuffer->BindD3D11UAV(deviceContext),
dstDesc,
stencilTable->GetSizesSRV(),
stencilTable->GetOffsetsSRV(),
stencilTable->GetIndicesSRV(),
stencilTable->GetWeightsSRV(),
/* start = */ 0,
/* end = */ stencilTable->GetNumStencils(),
deviceContext);
}
/// Dispatch the DX compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(ID3D11UnorderedAccessView *srcSRV,
VertexBufferDescriptor const &srcDesc,
ID3D11UnorderedAccessView *dstUAV,
VertexBufferDescriptor const &dstDesc,
ID3D11ShaderResourceView *sizesSRV,
ID3D11ShaderResourceView *offsetsSRV,
ID3D11ShaderResourceView *indicesSRV,
ID3D11ShaderResourceView *weightsSRV,
int start,
int end,
ID3D11DeviceContext *deviceContext) const;
/// Configure DX kernel. Returns false if it fails to compile the kernel.
bool Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
ID3D11DeviceContext *deviceContext);
/// Wait the dispatched kernel finishes.
static void Synchronize(ID3D11DeviceContext *deviceContext);
private:
ID3D11ComputeShader * _computeShader;
ID3D11ClassLinkage * _classLinkage;
ID3D11ClassInstance * _singleBufferKernel;
ID3D11ClassInstance * _separateBufferKernel;
ID3D11Buffer * _uniformArgs; // uniform paramaeters for kernels
int _workGroupSize;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV_OSD_D3D11_COMPUTE_EVALUATOR_H

View File

@ -41,7 +41,7 @@ namespace Osd {
/// \brief Concrete vertex buffer class for DirectX subvision and DirectX drawing.
///
/// D3D11VertexBuffer implements D3D11VertexBufferInterface. An instance
/// of this buffer class can be passed to D3D11ComputeController.
/// of this buffer class can be passed to D3D11ComputeEvaluator.
///
class D3D11VertexBuffer {
public:

View File

@ -1,44 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/evalLimitContext.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
EvalLimitContext::EvalLimitContext(Far::PatchTables const & patchTables) {
_adaptive = patchTables.IsFeatureAdaptive();
}
EvalLimitContext::~EvalLimitContext() {
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,100 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_EVAL_LIMIT_CONTEXT_H
#define OSD_EVAL_LIMIT_CONTEXT_H
#include "../version.h"
#include "../far/patchTables.h"
#include "../osd/nonCopyable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Coordinates set on a limit surface
///
struct LimitLocation {
LimitLocation() { }
/// \brief Constructor
///
/// @param f Ptex face id
///
/// @param x parametric location on face
///
/// @param y parametric location on face
///
LimitLocation(int f, float x, float y) : ptexIndex(f), s(x), t(y) { }
int ptexIndex; ///< ptex face index
float s, t; ///< parametric location on face
};
class LimitLocationsArray {
public:
/// \brief Constructor
LimitLocationsArray() : ptexIndex(-1), numLocations(0), s(0), t(0) { }
int ptexIndex, ///< ptex face index
numLocations; ///< number of (u,v) coordinates in the array
float const * s, ///< array of u coordinates
* t; ///< array of v coordinates
};
/// \brief LimitEval Context
///
/// A stub class to derive LimitEval context classes.
///
class EvalLimitContext : private NonCopyable<EvalLimitContext> {
public:
/// \brief Destructor.
virtual ~EvalLimitContext();
protected:
explicit EvalLimitContext(Far::PatchTables const & patchTables);
private:
bool _adaptive;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif /* OSD_EVAL_LIMIT_CONTEXT_H */

View File

@ -0,0 +1,224 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/glComputeEvaluator.h"
#include <cassert>
#include <sstream>
#include <string>
#include <vector>
#include "../far/error.h"
#include "../far/stencilTables.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
static const char *shaderSource =
#include "../osd/glslComputeKernel.gen.h"
;
template <class T> GLuint
createSSBO(std::vector<T> const & src) {
GLuint devicePtr = 0;
glGenBuffers(1, &devicePtr);
#if defined(GL_EXT_direct_state_access)
if (glNamedBufferDataEXT) {
glNamedBufferDataEXT(devicePtr, src.size()*sizeof(T),
&src.at(0), GL_STATIC_DRAW);
} else {
#else
{
#endif
GLint prev = 0;
glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &prev);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr);
glBufferData(GL_SHADER_STORAGE_BUFFER, src.size()*sizeof(T),
&src.at(0), GL_STATIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, prev);
}
return devicePtr;
}
GLStencilTablesSSBO::GLStencilTablesSSBO(
Far::StencilTables const *stencilTables) {
_numStencils = stencilTables->GetNumStencils();
if (_numStencils > 0) {
_sizes = createSSBO(stencilTables->GetSizes());
_offsets = createSSBO(stencilTables->GetOffsets());
_indices = createSSBO(stencilTables->GetControlIndices());
_weights = createSSBO(stencilTables->GetWeights());
} else {
_sizes = _offsets = _indices = _weights = 0;
}
}
GLStencilTablesSSBO::~GLStencilTablesSSBO() {
if (_sizes) glDeleteBuffers(1, &_sizes);
if (_offsets) glDeleteBuffers(1, &_offsets);
if (_weights) glDeleteBuffers(1, &_weights);
if (_indices) glDeleteBuffers(1, &_indices);
}
// ---------------------------------------------------------------------------
GLComputeEvaluator::GLComputeEvaluator() :
_program(0), _workGroupSize(64) {
}
GLComputeEvaluator::~GLComputeEvaluator() {
if (_program) {
glDeleteProgram(_program);
}
}
bool
GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc) {
if (srcDesc.length > dstDesc.length) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"srcDesc length must be less than or equal to "
"dstDesc length.\n");
return false;
}
if (_program) {
glDeleteProgram(_program);
_program = 0;
}
_program = glCreateProgram();
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< "#define DST_STRIDE " << dstDesc.stride << "\n"
<< "#define WORK_GROUP_SIZE " << _workGroupSize << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3] = {"#version 430\n", 0, 0};
shaderSources[1] = defineStr.c_str();
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(_program, shader);
GLint linked = 0;
glLinkProgram(_program);
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(shader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(_program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(_program);
_program = 0;
return false;
}
glDeleteShader(shader);
// store uniform locations for the compute kernel program.
_uniformSizes = glGetUniformLocation(_program, "stencilSizes");
_uniformOffsets = glGetUniformLocation(_program, "stencilOffsets");
_uniformIndices = glGetUniformLocation(_program, "stencilIndices");
_uniformWeights = glGetUniformLocation(_program, "stencilIWeights");
_uniformStart = glGetUniformLocation(_program, "batchStart");
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
_uniformDstOffset = glGetUniformLocation(_program, "dstOffset");
return true;
}
/* static */
void
GLComputeEvaluator::Synchronize(void * /*kernel*/) {
// XXX: this is currently just for the performance measuring purpose.
// need to be reimplemented by fence and sync.
glFinish();
}
bool
GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer,
VertexBufferDescriptor const &dstDesc,
GLuint sizesBuffer,
GLuint offsetsBuffer,
GLuint indicesBuffer,
GLuint weightsBuffer,
int start,
int end) const {
if (!_program) return false;
int count = end - start;
if (count <= 0) {
return true;
}
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, sizesBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, offsetsBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, indicesBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, weightsBuffer);
glUseProgram(_program);
glUniform1i(_uniformStart, start);
glUniform1i(_uniformEnd, end);
glUniform1i(_uniformSrcOffset, srcDesc.offset);
glUniform1i(_uniformDstOffset, dstDesc.offset);
glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
glUseProgram(0);
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0);
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,215 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV_OSD_GL_COMPUTE_EVALUATOR_H
#define OPENSUBDIV_OSD_GL_COMPUTE_EVALUATOR_H
#include "../version.h"
#include "../osd/opengl.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far {
class StencilTables;
}
namespace Osd {
/// \brief GL stencil tables (Shader Storage buffer)
///
/// This class is a GLSL SSBO representation of Far::StencilTables.
///
/// GLSLComputeKernel consumes this table to apply stencils
///
class GLStencilTablesSSBO {
public:
static GLStencilTablesSSBO *Create(Far::StencilTables const *stencilTables,
void *deviceContext = NULL) {
(void)deviceContext; // unused
return new GLStencilTablesSSBO(stencilTables);
}
explicit GLStencilTablesSSBO(Far::StencilTables const *stencilTables);
~GLStencilTablesSSBO();
// interfaces needed for GLSLComputeKernel
GLuint GetSizesBuffer() const { return _sizes; }
GLuint GetOffsetsBuffer() const { return _offsets; }
GLuint GetIndicesBuffer() const { return _indices; }
GLuint GetWeightsBuffer() const { return _weights; }
int GetNumStencils() const { return _numStencils; }
private:
GLuint _sizes;
GLuint _offsets;
GLuint _indices;
GLuint _weights;
int _numStencils;
};
// ---------------------------------------------------------------------------
class GLComputeEvaluator {
public:
typedef bool Instantiatable;
static GLComputeEvaluator * Create(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
void * deviceContext = NULL) {
(void)deviceContext; // not used
GLComputeEvaluator *instance = new GLComputeEvaluator();
if (instance->Compile(srcDesc, dstDesc)) return instance;
delete instance;
return NULL;
}
/// Constructor.
GLComputeEvaluator();
/// Destructor. note that the GL context must be made current.
~GLComputeEvaluator();
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param evaluator cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLSL kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GLComputeEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
stencilTable);
} else {
// Create a kernel on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
stencilTable);
delete instance;
return r;
}
return false;
}
}
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcVertexBuffer->BindVBO(),
srcDesc,
dstVertexBuffer->BindVBO(),
dstDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
stencilTable->GetIndicesBuffer(),
stencilTable->GetWeightsBuffer(),
/* start = */ 0,
/* end = */ stencilTable->GetNumStencils());
}
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(GLuint srcBuffer,
VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer,
VertexBufferDescriptor const &dstDesc,
GLuint sizesBuffer,
GLuint offsetsBuffer,
GLuint indicesBuffer,
GLuint weightsBuffer,
int start,
int end) const;
/// Configure GLSL kernel. A valid GL context must be made current before
/// calling this function. Returns false if it fails to compile the kernel.
bool Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc);
/// Wait the dispatched kernel finishes.
static void Synchronize(void *deviceContext);
private:
GLuint _program;
GLuint _uniformSizes, // stencil tables
_uniformOffsets,
_uniformIndices,
_uniformWeights,
_uniformStart, // range
_uniformEnd,
_uniformSrcOffset, // src buffer offset (in elements)
_uniformDstOffset; // dst buffer offset (in elements)
int _workGroupSize;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV_OSD_GL_COMPUTE_EVALUATOR_H

View File

@ -39,8 +39,7 @@ namespace Osd {
/// \brief Concrete vertex buffer class for GLSL subvision and OpenGL drawing.
///
/// GLVertexBuffer implements GLVertexBufferInterface. An instance
/// of this buffer class can be passed to OsdGLComputeController
/// and OsdGLDrawController
/// of this buffer class can be passed to OsdGLComputeEvaluator.
///
class GLVertexBuffer {
public:

View File

@ -0,0 +1,353 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/glXFBEvaluator.h"
#include <sstream>
#include <string>
#include <vector>
#include <cstdio>
#include "../far/error.h"
#include "../far/stencilTables.h"
#if _MSC_VER
#define snprintf _snprintf
#endif
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
static const char *shaderSource =
#include "../osd/glslXFBKernel.gen.h"
;
template <class T> GLuint
createGLTextureBuffer(std::vector<T> const & src, GLenum type) {
GLint size = static_cast<int>(src.size()*sizeof(T));
void const * ptr = &src.at(0);
GLuint buffer;
glGenBuffers(1, &buffer);
GLuint devicePtr;
glGenTextures(1, &devicePtr);
#if defined(GL_EXT_direct_state_access)
if (glNamedBufferDataEXT && glTextureBufferEXT) {
glNamedBufferDataEXT(buffer, size, ptr, GL_STATIC_DRAW);
glTextureBufferEXT(devicePtr, GL_TEXTURE_BUFFER, type, buffer);
} else {
#else
{
#endif
GLint prev = 0;
glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &prev);
glBindBuffer(GL_ARRAY_BUFFER, buffer);
glBufferData(GL_ARRAY_BUFFER, size, ptr, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, prev);
glGetIntegerv(GL_TEXTURE_BINDING_BUFFER, &prev);
glBindTexture(GL_TEXTURE_BUFFER, devicePtr);
glTexBuffer(GL_TEXTURE_BUFFER, type, buffer);
glBindTexture(GL_TEXTURE_BUFFER, prev);
}
glDeleteBuffers(1, &buffer);
return devicePtr;
}
GLStencilTablesTBO::GLStencilTablesTBO(
Far::StencilTables const *stencilTables) {
_numStencils = stencilTables->GetNumStencils();
if (_numStencils > 0) {
_sizes = createGLTextureBuffer(stencilTables->GetSizes(), GL_R8UI);
_offsets = createGLTextureBuffer(
stencilTables->GetOffsets(), GL_R32I);
_indices = createGLTextureBuffer(
stencilTables->GetControlIndices(), GL_R32I);
_weights = createGLTextureBuffer(stencilTables->GetWeights(), GL_R32F);
} else {
_sizes = _offsets = _indices = _weights = 0;
}
}
GLStencilTablesTBO::~GLStencilTablesTBO() {
if (_sizes) glDeleteTextures(1, &_sizes);
if (_offsets) glDeleteTextures(1, &_offsets);
if (_weights) glDeleteTextures(1, &_weights);
if (_indices) glDeleteTextures(1, &_indices);
}
// ---------------------------------------------------------------------------
GLXFBEvaluator::GLXFBEvaluator() :
_program(0), _srcBufferTexture(0),
_uniformSrcBufferTexture(0), _uniformSizesTexture(0),
_uniformOffsetsTexture(0), _uniformIndicesTexture(0),
_uniformWeightsTexture(0), _uniformStart(0), _uniformEnd(0),
_uniformSrcOffset(0) {
}
GLXFBEvaluator::~GLXFBEvaluator() {
if (_program) {
glDeleteProgram(_program);
}
if (_srcBufferTexture) {
glDeleteTextures(1, &_srcBufferTexture);
}
}
bool
GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc) {
if (_program) {
glDeleteProgram(_program);
_program = 0;
}
_program = glCreateProgram();
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3] = {"#version 410\n", NULL, NULL};
shaderSources[1] = defineStr.c_str();
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(_program, shader);
std::vector<std::string> outputs;
std::vector<const char *> pOutputs;
{
// vertex data (may include custom vertex data) and varying data
// are stored into the same buffer, interleaved.
//
// (gl_SkipComponents1)
// outVertexData[0]
// outVertexData[1]
// outVertexData[2]
// (gl_SkipComponents1)
//
// note that "primvarOffset" in shader is still needed to read
// interleaved components even if gl_SkipComponents is used.
//
char attrName[32];
int primvarOffset = (dstDesc.offset % dstDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < dstDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outVertexBuffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + dstDesc.length; i < dstDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
// convert to char* array
for (size_t i = 0; i < outputs.size(); ++i) {
pOutputs.push_back(&outputs[i][0]);
}
}
glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(),
&pOutputs[0], GL_INTERLEAVED_ATTRIBS);
GLint linked = 0;
glLinkProgram(_program);
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(shader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(_program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(_program);
_program = 0;
return false;
}
glDeleteShader(shader);
// set uniform locations for compute kernels
_uniformSrcBufferTexture = glGetUniformLocation(_program, "vertexBuffer");
_uniformSizesTexture = glGetUniformLocation(_program, "sizes");
_uniformOffsetsTexture = glGetUniformLocation(_program, "offsets");
_uniformIndicesTexture = glGetUniformLocation(_program, "indices");
_uniformWeightsTexture = glGetUniformLocation(_program, "weights");
_uniformStart = glGetUniformLocation(_program, "batchStart");
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
// create a texture for input buffer
if (!_srcBufferTexture) {
glGenTextures(1, &_srcBufferTexture);
}
return true;
}
/* static */
void
GLXFBEvaluator::Synchronize(void * /*kernel*/) {
// XXX: this is currently just for the test purpose.
// need to be reimplemented by fence and sync.
glFinish();
}
static void
bindTexture(GLint sampler, GLuint texture, int unit) {
if (sampler == -1) {
return;
}
glUniform1i(sampler, unit);
glActiveTexture(GL_TEXTURE0 + unit);
glBindTexture(GL_TEXTURE_BUFFER, texture);
glActiveTexture(GL_TEXTURE0);
}
bool
GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer,
VertexBufferDescriptor const &dstDesc,
GLuint sizesTexture,
GLuint offsetsTexture,
GLuint indicesTexture,
GLuint weightsTexture,
int start,
int end) const {
if (!_program) return false;
int count = end - start;
if (count <= 0) {
return true;
}
// bind vertex array
// always create new one, to be safe with multiple contexts (slow though)
GLuint vao = 0;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glEnable(GL_RASTERIZER_DISCARD);
glUseProgram(_program);
// Set input VBO as a texture buffer.
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
bindTexture(_uniformSrcBufferTexture, _srcBufferTexture, 0);
// bind stencil tables textures.
bindTexture(_uniformSizesTexture, sizesTexture, 1);
bindTexture(_uniformOffsetsTexture, offsetsTexture, 2);
bindTexture(_uniformIndicesTexture, indicesTexture, 3);
bindTexture(_uniformWeightsTexture, weightsTexture, 4);
// set batch range
glUniform1i(_uniformStart, start);
glUniform1i(_uniformEnd, end);
glUniform1i(_uniformSrcOffset, srcDesc.offset);
// The destination buffer is bound at vertex boundary.
//
// Example: When we have a batched and interleaved vertex buffer
//
// Obj X | Obj Y |
// -----------+-------------------------------------------+-------
// | vtx 0 | vtx 1 | |
// -----------+---------------+---------------+-----------+-------
// | x y z r g b a | x y z r g b a | .... |
// -----------+---------------+---------------+-----------+-------
// ^
// srcDesc.offset for Obj Y color
//
// ^-------------------------------------------^
// XFB destination buffer range
// S S S * * * *
// k k k
// i i i
// p p p
//
// We use gl_SkipComponents to skip the first 3 XYZ so the
// buffer itself needs to be bound for entire section of ObjY.
//
// Note that for the source buffer (texture) we bind the whole
// buffer (all VBO range) and use srcOffset=srcDesc.offset for
// indexing.
//
int dstBufferBindOffset =
dstDesc.offset - (dstDesc.offset % dstDesc.stride);
// bind destination buffer
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
0, dstBuffer,
dstBufferBindOffset * sizeof(float),
count * dstDesc.stride * sizeof(float));
glBeginTransformFeedback(GL_POINTS);
glDrawArrays(GL_POINTS, 0, count);
glEndTransformFeedback();
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
for (int i = 0; i < 5; ++i) {
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
glDisable(GL_RASTERIZER_DISCARD);
glUseProgram(0);
glActiveTexture(GL_TEXTURE0);
// revert vao
glBindVertexArray(0);
glDeleteVertexArrays(1, &vao);
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,215 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV_OSD_GL_XFB_EVALUATOR_H
#define OPENSUBDIV_OSD_GL_XFB_EVALUATOR_H
#include "../version.h"
#include "../osd/opengl.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far {
class StencilTables;
}
namespace Osd {
/// \brief GL TextureBuffer stencil tables
///
/// This class is a GL Texture Buffer representation of Far::StencilTables.
///
/// GLSLTransformFeedback consumes this table to apply stencils
///
///
class GLStencilTablesTBO {
public:
static GLStencilTablesTBO *Create(
Far::StencilTables const *stencilTables, void *deviceContext = NULL) {
(void)deviceContext; // unused
return new GLStencilTablesTBO(stencilTables);
}
explicit GLStencilTablesTBO(Far::StencilTables const *stencilTables);
~GLStencilTablesTBO();
// interfaces needed for GLSLTransformFeedbackKernel
GLuint GetSizesTexture() const { return _sizes; }
GLuint GetOffsetsTexture() const { return _offsets; }
GLuint GetIndicesTexture() const { return _indices; }
GLuint GetWeightsTexture() const { return _weights; }
int GetNumStencils() const { return _numStencils; }
private:
GLuint _sizes;
GLuint _offsets;
GLuint _indices;
GLuint _weights;
int _numStencils;
};
// ---------------------------------------------------------------------------
class GLXFBEvaluator {
public:
typedef bool Instantiatable;
static GLXFBEvaluator * Create(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
void * deviceContext = NULL) {
(void)deviceContext; // not used
GLXFBEvaluator *instance = new GLXFBEvaluator();
if (instance->Compile(srcDesc, dstDesc)) return instance;
delete instance;
return NULL;
}
/// Constructor.
GLXFBEvaluator();
/// Destructor. note that the GL context must be made current.
~GLXFBEvaluator();
/// \brief Generic static stencil function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// Texture Buffer Object interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLSLTransformFeedback kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GLXFBEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
stencilTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
stencilTable);
delete instance;
return r;
}
return false;
}
}
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcVertexBuffer->BindVBO(),
srcDesc,
dstVertexBuffer->BindVBO(),
dstDesc,
stencilTable->GetSizesTexture(),
stencilTable->GetOffsetsTexture(),
stencilTable->GetIndicesTexture(),
stencilTable->GetWeightsTexture(),
/* start = */ 0,
/* end = */ stencilTable->GetNumStencils());
}
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(GLuint srcBuffer,
VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer,
VertexBufferDescriptor const &dstDesc,
GLuint sizesBuffer,
GLuint offsetsBuffer,
GLuint indicesBuffer,
GLuint weightsBuffer,
int start,
int end) const;
/// Configure GLSL kernel. A valid GL context must be made current before
/// calling this function. Returns false if it fails to compile the kernel.
bool Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc);
/// Wait the dispatched kernel finishes.
static void Synchronize(void *kernel);
private:
GLuint _program;
GLuint _srcBufferTexture;
GLuint _uniformSrcBufferTexture;
GLuint _uniformSizesTexture;
GLuint _uniformOffsetsTexture;
GLuint _uniformIndicesTexture;
GLuint _uniformWeightsTexture;
GLuint _uniformStart; // range
GLuint _uniformEnd;
GLuint _uniformSrcOffset; // src buffer offset (in elements)
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV_OSD_GL_XFB_EVALUATOR_H

View File

@ -1,229 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../far/stencilTables.h"
//#include "../osd/debug.h"
#include "../osd/glslComputeContext.h"
#include "../osd/opengl.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
// -----------------------------------------------------------------------------
template <class T> GLuint
createGLSLBuffer(std::vector<T> const & src) {
GLuint devicePtr=0;
glGenBuffers(1, &devicePtr);
#if defined(GL_EXT_direct_state_access)
if (glNamedBufferDataEXT) {
glNamedBufferDataEXT(devicePtr, src.size()*sizeof(T), &src.at(0), GL_STATIC_DRAW);
} else {
#else
{
#endif
GLint prev = 0;
glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &prev);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr);
glBufferData(GL_SHADER_STORAGE_BUFFER, src.size()*sizeof(T), &src.at(0), GL_STATIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, prev);
}
//OSD_DEBUG_CHECK_GL_ERROR("createGLSLBuffer size %ld", src.size());
return devicePtr;
}
// -----------------------------------------------------------------------------
class GLSLComputeContext::GLSLStencilTables {
public:
GLSLStencilTables(Far::StencilTables const & stencilTables) {
_numStencils = stencilTables.GetNumStencils();
if (_numStencils > 0) {
_sizes = createGLSLBuffer(stencilTables.GetSizes());
_offsets = createGLSLBuffer(stencilTables.GetOffsets());
_indices = createGLSLBuffer(stencilTables.GetControlIndices());
_weights = createGLSLBuffer(stencilTables.GetWeights());
} else {
_sizes = _offsets = _indices = _weights = 0;
}
}
~GLSLStencilTables() {
if (_sizes) glDeleteBuffers(1, &_sizes);
if (_offsets) glDeleteBuffers(1, &_offsets);
if (_weights) glDeleteBuffers(1, &_weights);
if (_indices) glDeleteBuffers(1, &_indices);
}
bool IsValid() const {
return _sizes and _offsets and _indices and _weights;
}
GLuint GetSizes() const {
return _sizes;
}
GLuint GetOffsets() const {
return _offsets;
}
GLuint GetIndices() const {
return _indices;
}
GLuint GetWeights() const {
return _weights;
}
int GetNumStencils() const {
return _numStencils;
}
void Bind() const {
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, _sizes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, _offsets);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, _indices);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, _weights);
}
static void Unbind() {
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0);
glUseProgram(0);
}
private:
GLuint _sizes,
_offsets,
_indices,
_weights;
int _numStencils;
};
// -----------------------------------------------------------------------------
GLSLComputeContext::GLSLComputeContext(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables) :
_vertexStencilTables(0), _varyingStencilTables(0),
_numControlVertices(0),
_numStencils(0) {
if (vertexStencilTables) {
_vertexStencilTables = new GLSLStencilTables(*vertexStencilTables);
_numControlVertices = vertexStencilTables->GetNumControlVertices();
}
if (varyingStencilTables) {
_varyingStencilTables = new GLSLStencilTables(*varyingStencilTables);
if (_numControlVertices) {
assert(_numControlVertices==varyingStencilTables->GetNumControlVertices());
} else {
_numControlVertices = varyingStencilTables->GetNumControlVertices();
}
}
}
GLSLComputeContext::~GLSLComputeContext() {
delete _vertexStencilTables;
delete _varyingStencilTables;
}
// ----------------------------------------------------------------------------
bool
GLSLComputeContext::HasVertexStencilTables() const {
return _vertexStencilTables ? _vertexStencilTables->IsValid() : false;
}
bool
GLSLComputeContext::HasVaryingStencilTables() const {
return _varyingStencilTables ? _varyingStencilTables->IsValid() : false;
}
int
GLSLComputeContext::GetNumStencilsInVertexStencilTables() const {
return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : false;
}
int
GLSLComputeContext::GetNumStencilsInVaryingStencilTables() const {
return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : false;
}
// ----------------------------------------------------------------------------
void
GLSLComputeContext::BindVertexStencilTables() const {
if (_vertexStencilTables) {
_vertexStencilTables->Bind();
}
}
void
GLSLComputeContext::BindVaryingStencilTables() const {
if (_varyingStencilTables) {
_varyingStencilTables->Bind();
}
}
void
GLSLComputeContext::UnbindStencilTables() const {
GLSLStencilTables::Unbind();
}
// -----------------------------------------------------------------------------
GLSLComputeContext *
GLSLComputeContext::Create(Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
void * /*deviceContext*/) {
GLSLComputeContext *result =
new GLSLComputeContext(vertexStencilTables, varyingStencilTables);
return result;
}
// -----------------------------------------------------------------------------
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,126 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_GLSL_COMPUTE_CONTEXT_H
#define OSD_GLSL_COMPUTE_CONTEXT_H
#include "../version.h"
#include <cstddef>
#include "../osd/nonCopyable.h"
#include "../osd/opengl.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far{ class StencilTables; }
namespace Osd {
///
/// \brief GLSL-Compute Refine Context
///
/// The GLSL-Compute implementation of the Refine module contextual functionality.
///
/// Contexts interface the serialized topological data pertaining to the
/// geometric primitives with the capabilities of the selected discrete
/// compute device.
///
class GLSLComputeContext {
public:
/// Creates an GLSLComputeContext instance
///
/// @param vertexStencilTables The Far::StencilTables used for vertex
/// interpolation
///
/// @param varyingStencilTables The Far::StencilTables used for varying
/// interpolation
///
/// @param deviceContext (not used)
///
static GLSLComputeContext * Create(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
void *deviceContext = NULL);
/// Destructor
virtual ~GLSLComputeContext();
/// Returns true if the Context has a 'vertex' interpolation stencil table
bool HasVertexStencilTables() const;
/// Returns true if the Context has a 'varying' interpolation stencil table
bool HasVaryingStencilTables() const;
/// Returns the number of control vertices
int GetNumControlVertices() const {
return _numControlVertices;
}
/// Returns the number of stencils in vertex stencil table
int GetNumStencilsInVertexStencilTables() const;
/// Returns the number of stencils in varying stencil table
int GetNumStencilsInVaryingStencilTables() const;
/// Returns the GL buffer containing vertex-stencil stencil sizes
GLuint GetVertexStencilTablesSizes() const;
/// Returns the GL buffer containing vertex-stencil stencil offsets
GLuint GetVertexStencilTablesOffsets() const;
/// Binds GL buffers containing stencils for 'vertex' interpolation
void BindVertexStencilTables() const;
/// Binds GL buffers containing stencils for 'varying' interpolation
void BindVaryingStencilTables() const;
/// Unbinds GL stencil buffers
void UnbindStencilTables() const;
protected:
explicit GLSLComputeContext(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables);
private:
class GLSLStencilTables;
GLSLStencilTables * _vertexStencilTables,
* _varyingStencilTables;
int _numControlVertices;
int _numStencils;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_GLSL_COMPUTE_CONTEXT_H

View File

@ -1,292 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/glslComputeController.h"
#include "../osd/vertexDescriptor.h"
#include "../osd/opengl.h"
#include "../far/error.h"
#include <algorithm>
#include <cassert>
#include <iostream>
#include <sstream>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
static const char *shaderSource =
#include "../osd/glslComputeKernel.gen.h"
;
// ----------------------------------------------------------------------------
class GLSLComputeController::KernelBundle :
NonCopyable<GLSLComputeController::KernelBundle> {
public:
KernelBundle() :
_program(0),
_uniformSizes(0),
_uniformOffsets(0),
_uniformIndices(0),
_uniformWeights(0),
_uniformStart(0),
_uniformEnd(0),
_uniformSrcOffset(0),
_uniformDstOffset(0),
_workGroupSize(64) { }
~KernelBundle() {
if (_program) {
glDeleteProgram(_program);
}
}
void UseProgram() const {
glUseProgram(_program);
}
bool Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc) {
// XXX: only store srcDesc.
// this is ok since currently this kernel doesn't get called with
// different strides for src and dst. This function will be
// refactored soon.
_desc = VertexBufferDescriptor(0, srcDesc.length, srcDesc.stride);
if (_program) {
glDeleteProgram(_program);
_program=0;
}
_program = glCreateProgram();
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< "#define DST_STRIDE " << dstDesc.stride << "\n"
<< "#define WORK_GROUP_SIZE " << _workGroupSize << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3] = {"#version 430\n", 0, 0};
shaderSources[1] = defineStr.c_str();
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(_program, shader);
GLint linked = 0;
glLinkProgram(_program);
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(shader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(_program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(_program);
_program = 0;
return false;
}
glDeleteShader(shader);
// set uniform locations for compute kernels
_uniformSizes = glGetUniformLocation(_program, "stencilSizes");
_uniformOffsets = glGetUniformLocation(_program, "stencilOffsets");
_uniformIndices = glGetUniformLocation(_program, "stencilIndices");
_uniformWeights = glGetUniformLocation(_program, "stencilIWeights");
_uniformStart = glGetUniformLocation(_program, "batchStart");
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
_uniformDstOffset = glGetUniformLocation(_program, "dstOffset");
return true;
}
void ApplyStencilTableKernel(int srcOffset, int dstOffset,
int start, int end) const {
dispatchCompute(srcOffset, dstOffset, start, end);
}
struct Match {
Match(VertexBufferDescriptor const & d) : desc(d) { }
bool operator() (KernelBundle const * kernel) {
return (desc.length==kernel->_desc.length and
desc.stride==kernel->_desc.stride);
}
VertexBufferDescriptor desc;
};
protected:
void dispatchCompute(int srcOffset, int dstOffset, int start, int end) const {
int count = end - start;
if (count<=0) {
return;
}
glUniform1i(_uniformStart, start);
glUniform1i(_uniformEnd, end);
glUniform1i(_uniformSrcOffset, srcOffset);
glUniform1i(_uniformDstOffset, dstOffset);
glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
// sync for later reading.
// XXX: in theory, just SHADER_STORAGE_BARRIER is needed here. However
// we found a problem (issue #295) with nvidia driver 331.49 / Quadro4000
// resulting in invalid vertices.
// Apparently adding TEXTURE_FETCH_BARRIER after a kernel fixes it.
// The workaroud is commented out, since it looks fixed as of driver 334.xx.
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
//OSD_DEBUG_CHECK_GL_ERROR("dispatchCompute");
}
private:
GLuint _program;
GLuint _uniformSizes, // uniform paramaeters for kernels
_uniformOffsets,
_uniformIndices,
_uniformWeights,
_uniformStart, // batch
_uniformEnd,
_uniformSrcOffset, // src buffer offset (in elements)
_uniformDstOffset; // dst buffer offset (in elements)
VertexBufferDescriptor _desc; // primvar buffer descriptor
int _workGroupSize;
};
// ----------------------------------------------------------------------------
void
GLSLComputeController::ApplyStencilTableKernel(
ComputeContext const *context, int numStencils) const {
assert(context);
// Note: GLSLComputeContext has a state, knowing whether vertex or
// varying stencil tables are being bound. GetNumStencils() reflects it.
// This structure will likely be revisited.
int start = 0;
int end = numStencils;
_currentBindState.kernelBundle->ApplyStencilTableKernel(
_currentBindState.desc.offset,
_currentBindState.desc.offset + context->GetNumControlVertices() * _currentBindState.desc.stride,
start, end);
}
// ----------------------------------------------------------------------------
GLSLComputeController::GLSLComputeController() { }
GLSLComputeController::~GLSLComputeController() {
for (KernelRegistry::iterator it = _kernelRegistry.begin();
it != _kernelRegistry.end(); ++it) {
delete *it;
}
}
// ----------------------------------------------------------------------------
void
GLSLComputeController::Synchronize() {
glFinish();
}
// ----------------------------------------------------------------------------
GLSLComputeController::KernelBundle const *
GLSLComputeController::getKernel(VertexBufferDescriptor const &desc) {
KernelRegistry::iterator it =
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
KernelBundle::Match(desc));
if (it != _kernelRegistry.end()) {
return *it;
} else {
KernelBundle * kernelBundle = new KernelBundle();
kernelBundle->Compile(desc, desc);
_kernelRegistry.push_back(kernelBundle);
return kernelBundle;
}
}
void
GLSLComputeController::bindBufferAndProgram() {
if (_currentBindState.buffer) {
// src
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, _currentBindState.buffer);
// dst
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, _currentBindState.buffer);
}
_currentBindState.kernelBundle->UseProgram();
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
}
void
GLSLComputeController::unbindBufferAndProgram() {
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
glUseProgram(0);
}
// ----------------------------------------------------------------------------
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,210 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_GLSL_COMPUTE_CONTROLLER_H
#define OSD_GLSL_COMPUTE_CONTROLLER_H
#include "../version.h"
#include "../osd/glslComputeContext.h"
#include "../osd/vertexDescriptor.h"
#include <vector>
#include <cassert>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Compute controller for launching GLSL Compute subdivision kernels.
///
/// GLSLComputeController is a compute controller class to launch
/// GLSLCompute transfrom feedback subdivision kernels. It requires
/// GLVertexBufferInterface as arguments of Refine function.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class GLSLComputeController {
public:
typedef GLSLComputeContext ComputeContext;
/// Constructor.
GLSLComputeController();
/// Destructor.
~GLSLComputeController();
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The OsdGLSLContext to apply refinement operations to
///
/// @param batches Vector of batches of vertices organized by operative
/// kernel
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
/// @param vertexDesc The descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingBuffer Vertex-interpolated data buffer
///
/// @param varyingDesc The descriptor of varying elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Compute( GLSLComputeContext const * context,
VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc=NULL,
VertexBufferDescriptor const * varyingDesc=NULL ){
if (vertexBuffer) {
bind(vertexBuffer, vertexDesc);
context->BindVertexStencilTables();
ApplyStencilTableKernel(
context, context->GetNumStencilsInVertexStencilTables());
}
if (varyingBuffer) {
bind(varyingBuffer, varyingDesc);
context->BindVaryingStencilTables();
ApplyStencilTableKernel(
context, context->GetNumStencilsInVaryingStencilTables());
}
context->UnbindStencilTables();
unbind();
}
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The OsdGLSLContext to apply refinement operations to
///
/// @param batches Vector of batches of vertices organized by operative
/// kernel
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
template<class VERTEX_BUFFER>
void Compute(GLSLComputeContext const * context,
VERTEX_BUFFER *vertexBuffer) {
Compute<VERTEX_BUFFER>(context, vertexBuffer, (VERTEX_BUFFER*)0);
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
protected:
void ApplyStencilTableKernel(ComputeContext const *context,
int numStencils) const;
template<class BUFFER>
void bind( BUFFER * buffer,
VertexBufferDescriptor const * desc ) {
assert(buffer);
// if the vertex buffer descriptor is specified, use it
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (desc) {
_currentBindState.desc = *desc;
} else {
int numElements = buffer ? buffer->GetNumElements() : 0;
_currentBindState.desc =
VertexBufferDescriptor(0, numElements, numElements);
}
_currentBindState.buffer = buffer->BindVBO();
_currentBindState.kernelBundle = getKernel(_currentBindState.desc);
bindBufferAndProgram();
}
// Unbinds any previously bound vertex and varying data buffers.
void unbind() {
_currentBindState.Reset();
unbindBufferAndProgram();
}
// binds the primvar data buffer and compute program
void bindBufferAndProgram();
// unbinds the primvar data buffer and compute program
void unbindBufferAndProgram();
private:
class KernelBundle;
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of the vertex buffers.
struct BindState {
BindState() : buffer(0), kernelBundle(0) { }
void Reset() {
buffer = 0;
desc.Reset();
kernelBundle = 0;
}
GLuint buffer;
VertexBufferDescriptor desc;
KernelBundle const * kernelBundle;
};
BindState _currentBindState;
typedef std::vector<KernelBundle *> KernelRegistry;
KernelBundle const * getKernel(VertexBufferDescriptor const &desc);
KernelRegistry _kernelRegistry;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_GLSL_COMPUTE_CONTROLLER_H

View File

@ -1,253 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../far/stencilTables.h"
//#define OSD_DEBUG_BUILD
//#include "../osd/debug.h"
#include "../osd/glslTransformFeedbackComputeContext.h"
#include "../osd/opengl.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
// -----------------------------------------------------------------------------
template <class T> GLuint
createGLTextureBuffer(std::vector<T> const & src, GLenum type) {
int size = (int)src.size()*sizeof(T);
void const * ptr = &src.at(0);
GLuint buffer;
glGenBuffers(1, &buffer);
GLuint devicePtr;
glGenTextures(1, &devicePtr);
#if defined(GL_EXT_direct_state_access)
if (glNamedBufferDataEXT and glTextureBufferEXT) {
glNamedBufferDataEXT(buffer, size, ptr, GL_STATIC_DRAW);
glTextureBufferEXT(devicePtr, GL_TEXTURE_BUFFER, type, buffer);
} else {
#else
{
#endif
GLint prev = 0;
glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &prev);
glBindBuffer(GL_ARRAY_BUFFER, buffer);
glBufferData(GL_ARRAY_BUFFER, size, ptr, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, prev);
glGetIntegerv(GL_TEXTURE_BINDING_BUFFER, &prev);
glBindTexture(GL_TEXTURE_BUFFER, devicePtr);
glTexBuffer(GL_TEXTURE_BUFFER, type, buffer);
glBindTexture(GL_TEXTURE_BUFFER, prev);
}
glDeleteBuffers(1, &buffer);
//OSD_DEBUG_CHECK_GL_ERROR("createGLTextureBuffer end\n");
return devicePtr;
}
// -----------------------------------------------------------------------------
class GLSLTransformFeedbackComputeContext::GLStencilTables {
public:
GLStencilTables(Far::StencilTables const & stencilTables) {
_numStencils = stencilTables.GetNumStencils();
if (_numStencils > 0) {
_sizes = createGLTextureBuffer(stencilTables.GetSizes(), GL_R8UI);
_offsets = createGLTextureBuffer(stencilTables.GetOffsets(), GL_R32I);
_indices = createGLTextureBuffer(stencilTables.GetControlIndices(), GL_R32I);
_weights = createGLTextureBuffer(stencilTables.GetWeights(), GL_R32F);
} else {
_sizes = _offsets = _indices = _weights = 0;
}
}
~GLStencilTables() {
if (_sizes) glDeleteTextures(1, &_sizes);
if (_offsets) glDeleteTextures(1, &_offsets);
if (_weights) glDeleteTextures(1, &_weights);
if (_indices) glDeleteTextures(1, &_indices);
}
bool IsValid() const {
return _sizes and _offsets and _indices and _weights;
}
GLuint GetSizes() const {
return _sizes;
}
GLuint GetOffsets() const {
return _offsets;
}
GLuint GetIndices() const {
return _indices;
}
GLuint GetWeights() const {
return _weights;
}
int GetNumStencils() const {
return _numStencils;
}
private:
GLuint _sizes,
_offsets,
_indices,
_weights;
int _numStencils;
};
// -----------------------------------------------------------------------------
GLSLTransformFeedbackComputeContext::GLSLTransformFeedbackComputeContext(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables) :
_vertexStencilTables(0), _varyingStencilTables(0),
_numControlVertices(0) {
if (vertexStencilTables) {
_vertexStencilTables = new GLStencilTables(*vertexStencilTables);
_numControlVertices = vertexStencilTables->GetNumControlVertices();
}
if (varyingStencilTables) {
_varyingStencilTables = new GLStencilTables(*varyingStencilTables);
if (_numControlVertices) {
assert(_numControlVertices==varyingStencilTables->GetNumControlVertices());
} else {
_numControlVertices = varyingStencilTables->GetNumControlVertices();
}
}
}
GLSLTransformFeedbackComputeContext::~GLSLTransformFeedbackComputeContext() {
delete _vertexStencilTables;
delete _varyingStencilTables;
}
// ----------------------------------------------------------------------------
bool
GLSLTransformFeedbackComputeContext::HasVertexStencilTables() const {
return _vertexStencilTables ? _vertexStencilTables->IsValid() : false;
}
bool
GLSLTransformFeedbackComputeContext::HasVaryingStencilTables() const {
return _varyingStencilTables ? _varyingStencilTables->IsValid() : false;
}
int
GLSLTransformFeedbackComputeContext::GetNumStencilsInVertexStencilTables() const {
return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0;
}
int
GLSLTransformFeedbackComputeContext::GetNumStencilsInVaryingStencilTables() const {
return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0;
}
// ----------------------------------------------------------------------------
GLuint
GLSLTransformFeedbackComputeContext::GetVertexStencilTablesSizes() const {
return _vertexStencilTables ? _vertexStencilTables->GetSizes() : 0;
}
GLuint
GLSLTransformFeedbackComputeContext::GetVertexStencilTablesOffsets() const {
return _vertexStencilTables ? _vertexStencilTables->GetOffsets() : 0;
}
GLuint
GLSLTransformFeedbackComputeContext::GetVertexStencilTablesIndices() const {
return _vertexStencilTables ? _vertexStencilTables->GetIndices() : 0;
}
GLuint
GLSLTransformFeedbackComputeContext::GetVertexStencilTablesWeights() const {
return _vertexStencilTables ? _vertexStencilTables->GetWeights() : 0;
}
// ----------------------------------------------------------------------------
GLuint
GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesSizes() const {
return _varyingStencilTables ? _varyingStencilTables->GetSizes() : 0;
}
GLuint
GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesOffsets() const {
return _varyingStencilTables ? _varyingStencilTables->GetOffsets() : 0;
}
GLuint
GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesIndices() const {
return _varyingStencilTables ? _varyingStencilTables->GetIndices() : 0;
}
GLuint
GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesWeights() const {
return _varyingStencilTables ? _varyingStencilTables->GetWeights() : 0;
}
// -----------------------------------------------------------------------------
GLSLTransformFeedbackComputeContext *
GLSLTransformFeedbackComputeContext::Create(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
void * /*deviceContext*/) {
GLSLTransformFeedbackComputeContext *result =
new GLSLTransformFeedbackComputeContext(
vertexStencilTables, varyingStencilTables);
return result;
}
// -----------------------------------------------------------------------------
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,134 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTEXT_H
#define OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTEXT_H
#include "../version.h"
#include <cstddef>
#include "../osd/nonCopyable.h"
#include "../osd/opengl.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far{ class StencilTables; }
namespace Osd {
///
/// \brief GLSL-Compute(transform-feedback) Refine Context
///
/// The GLSL (transform-feedback) implementation of the Refine module contextual functionality.
///
/// Contexts interface the serialized topological data pertaining to the
/// geometric primitives with the capabilities of the selected discrete
/// compute device.
///
class GLSLTransformFeedbackComputeContext {
public:
/// Creates an GLSLTransformFeedbackComputeContext instance
///
/// @param vertexStencilTables The Far::StencilTables used for vertex
/// interpolation
///
/// @param varyingStencilTables The Far::StencilTables used for varying
/// interpolation
///
static GLSLTransformFeedbackComputeContext * Create(
Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables,
void *deviceContext = NULL);
/// Destructor
virtual ~GLSLTransformFeedbackComputeContext();
/// Returns true if the Context has a 'vertex' interpolation stencil table
bool HasVertexStencilTables() const;
/// Returns true if the Context has a 'varying' interpolation stencil table
bool HasVaryingStencilTables() const;
/// Returns the number of control vertices
int GetNumControlVertices() const {
return _numControlVertices;
}
/// Returns the number of stencils in vertex stencil table
int GetNumStencilsInVertexStencilTables() const;
/// Returns the number of stencils in varying stencil table
int GetNumStencilsInVaryingStencilTables() const;
/// Returns the GL texture buffer containing vertex-stencil stencil sizes
GLuint GetVertexStencilTablesSizes() const;
/// Returns the GL texture buffer containing vertex-stencil stencil offsets
GLuint GetVertexStencilTablesOffsets() const;
/// Returns the GL texture buffer containing vertex-stencil stencil indices
GLuint GetVertexStencilTablesIndices() const;
/// Returns the GL texture buffer containing vertex-stencil stencil weights
GLuint GetVertexStencilTablesWeights() const;
/// Returns the GL texture buffer containing Varying-stencil stencil sizes
GLuint GetVaryingStencilTablesSizes() const;
/// Returns the GL texture buffer containing Varying-stencil stencil offsets
GLuint GetVaryingStencilTablesOffsets() const;
/// Returns the GL texture buffer containing Varying-stencil stencil indices
GLuint GetVaryingStencilTablesIndices() const;
/// Returns the GL texture buffer containing Varying-stencil stencil weights
GLuint GetVaryingStencilTablesWeights() const;
protected:
explicit GLSLTransformFeedbackComputeContext(Far::StencilTables const * vertexStencilTables,
Far::StencilTables const * varyingStencilTables);
private:
class GLStencilTables;
GLStencilTables * _vertexStencilTables,
* _varyingStencilTables;
int _numControlVertices;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTEXT_H

View File

@ -1,456 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
//#define OSD_DEBUG_BUILD
#include "../osd/debug.h"
#include "../osd/glslTransformFeedbackComputeController.h"
#include "../osd/glslTransformFeedbackComputeContext.h"
#include "../osd/opengl.h"
#include "../far/error.h"
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <iostream>
#include <sstream>
#if _MSC_VER
#define snprintf _snprintf
#endif
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
static const char *shaderSource =
#include "../osd/glslTransformFeedbackKernel.gen.h"
;
// ----------------------------------------------------------------------------
static void
bindTexture(GLint sampler, GLuint texture, int unit) {
if (sampler==-1) {
return;
}
glUniform1i(sampler, unit);
glActiveTexture(GL_TEXTURE0 + unit);
glBindTexture(GL_TEXTURE_BUFFER, texture);
glActiveTexture(GL_TEXTURE0);
}
// ----------------------------------------------------------------------------
class GLSLTransformFeedbackComputeController::KernelBundle :
NonCopyable<GLSLTransformFeedbackComputeController::KernelBundle> {
public:
KernelBundle() :
_program(0),
_uniformSizes(0),
_uniformOffsets(0),
_uniformIndices(0),
_uniformWeights(0),
_uniformStart(0),
_uniformEnd(0),
_uniformSrcOffset(0) { }
~KernelBundle() {
if (_program) {
glDeleteProgram(_program);
}
}
void UseProgram() const {
glUseProgram(_program);
}
bool Compile(VertexBufferDescriptor const & srcDesc,
VertexBufferDescriptor const & dstDesc) {
// XXX: only store srcDesc.
// this is ok since currently this kernel doesn't get called with
// different strides for src and dst. This function will be
// refactored soon.
_desc = VertexBufferDescriptor(0, srcDesc.length, dstDesc.stride);
if (_program) {
glDeleteProgram(_program);
_program=0;
}
_program = glCreateProgram();
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3] = {"#version 410\n", 0, 0};
shaderSources[1] = defineStr.c_str();
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(_program, shader);
std::vector<std::string> outputs;
std::vector<const char *> pOutputs;
{
// vertex data (may include custom vertex data) and varying data
// are stored into the same buffer, interleaved.
//
// (gl_SkipComponents1)
// outVertexData[0]
// outVertexData[1]
// outVertexData[2]
// (gl_SkipComponents1)
//
// note that "primvarOffset" in shader is still needed to read
// interleaved components even if gl_SkipComponents is used.
//
char attrName[32];
int primvarOffset = (dstDesc.offset % dstDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < dstDesc.length; ++i) {
snprintf(attrName, 32, "outVertexBuffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + dstDesc.length; i < dstDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
// convert to char* array
for (size_t i = 0; i < outputs.size(); ++i) {
pOutputs.push_back(&outputs[i][0]);
}
}
glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(),
&pOutputs[0], GL_INTERLEAVED_ATTRIBS);
GLint linked = 0;
glLinkProgram(_program);
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(shader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(_program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(_program);
_program = 0;
return false;
}
glDeleteShader(shader);
// set uniform locations for compute kernels
_primvarBuffer = glGetUniformLocation(_program, "vertexBuffer");
_uniformSizes = glGetUniformLocation(_program, "sizes");
_uniformOffsets = glGetUniformLocation(_program, "offsets");
_uniformIndices = glGetUniformLocation(_program, "indices");
_uniformWeights = glGetUniformLocation(_program, "weights");
_uniformStart = glGetUniformLocation(_program, "batchStart");
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
OSD_DEBUG_CHECK_GL_ERROR("KernelBundle::Compile");
return true;
}
GLint GetPrimvarBufferLocation() const {
return _primvarBuffer;
}
GLint GetSizesLocation() const {
return _uniformSizes;
}
GLint GetOffsetsLocation() const {
return _uniformOffsets;
}
GLint GetIndicesLocation() const {
return _uniformIndices;
}
GLint GetWeightsLocation() const {
return _uniformWeights;
}
void ApplyStencilTableKernel(GLuint srcBuffer,
VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer,
VertexBufferDescriptor const &dstDesc,
int start, int end) const {
assert(end >= start);
(void)srcBuffer; // already bound in bindBufferAndProgram().
// set batch range
glUniform1i(_uniformStart, start);
glUniform1i(_uniformEnd, end);
glUniform1i(_uniformSrcOffset, srcDesc.offset);
int count = end - start;
// The destination buffer is bound at vertex boundary.
//
// Example: When we have a batched and interleaved vertex buffer
//
// Obj X | Obj Y |
// -----------+-------------------------------------------+-------
// | vtx 0 | vtx 1 | |
// -----------+---------------+---------------+-----------+-------
// | x y z r g b a | x y z r g b a | .... |
// -----------+---------------+---------------+-----------+-------
// ^
// srcDesc.offset for Obj Y color
//
// ^-------------------------------------------^
// XFB destination buffer range
// S S S * * * *
// k k k
// i i i
// p p p
//
// We use gl_SkipComponents to skip the first 3 XYZ so the
// buffer itself needs to be bound for entire section of ObjY.
//
// Note that for the source buffer (texture) we bind the whole
// buffer (all VBO range) and use srcOffset=srcDesc.offset for
// indexing.
//
int dstBufferBindOffset =
dstDesc.offset - (dstDesc.offset % dstDesc.stride);
// bind destination buffer
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
0, dstBuffer,
dstBufferBindOffset * sizeof(float),
count * dstDesc.stride * sizeof(float));
glBeginTransformFeedback(GL_POINTS);
glDrawArrays(GL_POINTS, 0, count);
glEndTransformFeedback();
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
//OSD_DEBUG_CHECK_GL_ERROR("TransformPrimvarBuffer\n");
}
struct Match {
Match(VertexBufferDescriptor const & d) : desc(d) { }
bool operator() (KernelBundle const * kernel) {
return (desc.length==kernel->_desc.length and
desc.stride==kernel->_desc.stride);
}
VertexBufferDescriptor desc;
};
private:
GLuint _program;
GLint _primvarBuffer;
GLint _uniformSizes, // uniform paramaeters for kernels
_uniformOffsets,
_uniformIndices,
_uniformWeights,
_uniformStart, // batch
_uniformEnd,
_uniformSrcOffset;
VertexBufferDescriptor _desc; // primvar buffer descriptor
};
// ----------------------------------------------------------------------------
void
GLSLTransformFeedbackComputeController::bindBufferAndProgram(
GLuint & feedbackTexture) {
glEnable(GL_RASTERIZER_DISCARD);
_currentBindState.kernelBundle->UseProgram();
if (not feedbackTexture) {
glGenTextures(1, &feedbackTexture);
#if defined(GL_EXT_direct_state_access)
if (glTextureBufferEXT) {
glTextureBufferEXT(feedbackTexture, GL_TEXTURE_BUFFER, GL_R32F,
_currentBindState.buffer);
} else {
#else
{
#endif
glBindTexture(GL_TEXTURE_BUFFER, feedbackTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.buffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
}
bindTexture(
_currentBindState.kernelBundle->GetPrimvarBufferLocation(), feedbackTexture, 0);
// bind vertex array
// always create new one, to be safe with multiple contexts.
glGenVertexArrays(1, &_vao);
glBindVertexArray(_vao);
}
// ----------------------------------------------------------------------------
void
GLSLTransformFeedbackComputeController::bindContextStencilTables(
ComputeContext const *context, bool varying) {
GLint sizesLocation = _currentBindState.kernelBundle->GetSizesLocation(),
offsetsLocation = _currentBindState.kernelBundle->GetOffsetsLocation(),
indicesLocation = _currentBindState.kernelBundle->GetIndicesLocation(),
weightsLocation = _currentBindState.kernelBundle->GetWeightsLocation();
if (not varying) {
bindTexture(sizesLocation, context->GetVertexStencilTablesSizes(), 1);
bindTexture(offsetsLocation, context->GetVertexStencilTablesOffsets(), 2);
bindTexture(indicesLocation, context->GetVertexStencilTablesIndices(), 3);
bindTexture(weightsLocation, context->GetVertexStencilTablesWeights(), 4);
} else {
bindTexture(sizesLocation, context->GetVaryingStencilTablesSizes(), 1);
bindTexture(offsetsLocation, context->GetVaryingStencilTablesOffsets(), 2);
bindTexture(indicesLocation, context->GetVaryingStencilTablesIndices(), 3);
bindTexture(weightsLocation, context->GetVaryingStencilTablesWeights(), 4);
}
}
// ----------------------------------------------------------------------------
void
GLSLTransformFeedbackComputeController::unbindResources() {
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glDisable(GL_RASTERIZER_DISCARD);
glUseProgram(0);
glActiveTexture(GL_TEXTURE0);
glBindVertexArray(0);
glDeleteVertexArrays(1, &_vao);
}
// ----------------------------------------------------------------------------
GLSLTransformFeedbackComputeController::KernelBundle const *
GLSLTransformFeedbackComputeController::getKernel(
VertexBufferDescriptor const &desc) {
KernelRegistry::iterator it =
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
KernelBundle::Match(desc));
if (it != _kernelRegistry.end()) {
return *it;
} else {
KernelBundle * kernelBundle = new KernelBundle();
kernelBundle->Compile(desc, desc);
_kernelRegistry.push_back(kernelBundle);
return kernelBundle;
}
}
// ----------------------------------------------------------------------------
void
GLSLTransformFeedbackComputeController::ApplyStencilTableKernel(
GLSLTransformFeedbackComputeContext const *context, int numStencils) const {
assert(context);
int start = 0;
int end = numStencils;
VertexBufferDescriptor srcDesc = _currentBindState.desc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += context->GetNumControlVertices() * dstDesc.stride;
_currentBindState.kernelBundle->ApplyStencilTableKernel(
_currentBindState.buffer,
srcDesc,
_currentBindState.buffer,
dstDesc,
start,
end);
}
// ----------------------------------------------------------------------------
GLSLTransformFeedbackComputeController::GLSLTransformFeedbackComputeController() :
_vertexTexture(0), _varyingTexture(0), _vao(0) {
}
GLSLTransformFeedbackComputeController::~GLSLTransformFeedbackComputeController() {
for (KernelRegistry::iterator it = _kernelRegistry.begin();
it != _kernelRegistry.end(); ++it) {
delete *it;
}
if (_vertexTexture) {
glDeleteTextures(1, &_vertexTexture);
}
if (_varyingTexture) {
glDeleteTextures(1, &_varyingTexture);
}
}
// ----------------------------------------------------------------------------
void
GLSLTransformFeedbackComputeController::Synchronize() {
glFinish();
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,214 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTROLLER_H
#define OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTROLLER_H
#include "../version.h"
#include "../osd/glslTransformFeedbackComputeContext.h"
#include "../osd/vertexDescriptor.h"
#include <vector>
#include <cassert>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
class GLSLTransformFeedbackKernelBundle;
/// \brief Compute controller for launching GLSLTransformFeedback transform feedback
/// subdivision kernels.
///
/// GLSLTransformFeedbackComputeController is a compute controller class to launch
/// GLSLTransformFeedback transfrom feedback subdivision kernels. It requires
/// GLVertexBufferInterface as arguments of Refine function.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class GLSLTransformFeedbackComputeController {
public:
typedef GLSLTransformFeedbackComputeContext ComputeContext;
/// Constructor.
GLSLTransformFeedbackComputeController();
/// Destructor.
~GLSLTransformFeedbackComputeController();
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The GLSLTransformFeedbackComputeContext to apply
/// refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
/// @param vertexDesc The descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingBuffer Vertex-interpolated data buffer
///
/// @param varyingDesc The descriptor of varying elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Compute( GLSLTransformFeedbackComputeContext const * context,
VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc=NULL,
VertexBufferDescriptor const * varyingDesc=NULL ){
if (vertexBuffer) {
bind(vertexBuffer, vertexDesc, _vertexTexture);
bindContextStencilTables(context, false);
ApplyStencilTableKernel(
context, context->GetNumStencilsInVertexStencilTables());
}
if (varyingBuffer) {
bind(varyingBuffer, varyingDesc, _varyingTexture);
bindContextStencilTables(context, true);
ApplyStencilTableKernel(
context, context->GetNumStencilsInVaryingStencilTables());
}
unbind();
}
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The GLSLTransformFeedbackComputeContext to apply
/// refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
template<class VERTEX_BUFFER>
void Compute(GLSLTransformFeedbackComputeContext const * context,
VERTEX_BUFFER *vertexBuffer) {
Compute<VERTEX_BUFFER>(context, vertexBuffer, (VERTEX_BUFFER*)0);
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
protected:
void ApplyStencilTableKernel(ComputeContext const *context,
int numStencils) const;
template<class BUFFER>
void bind( BUFFER * buffer, VertexBufferDescriptor const * desc,
GLuint feedbackTexture ) {
assert(buffer);
// if the vertex buffer descriptor is specified, use it
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (desc) {
_currentBindState.desc = *desc;
} else {
int numElements = buffer ? buffer->GetNumElements() : 0;
_currentBindState.desc =
VertexBufferDescriptor(0, numElements, numElements);
}
_currentBindState.buffer = buffer->BindVBO();
_currentBindState.kernelBundle = getKernel(_currentBindState.desc);
bindBufferAndProgram(feedbackTexture);
}
// Unbinds any previously bound vertex and varying data buffers.
void unbind() {
_currentBindState.Reset();
unbindResources();
}
// binds the primvar data buffer and compute program
void bindBufferAndProgram(GLuint & texture);
// binds the stencil tables for 'vertex' interpolation
void bindContextStencilTables(ComputeContext const *context, bool varying=false);
// unbinds the primvar data buffer and compute program
void unbindResources();
private:
class KernelBundle;
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of the vertex buffers.
struct BindState {
BindState() : buffer(0), kernelBundle(0) { }
void Reset() {
buffer = 0;
desc.Reset();
kernelBundle = 0;
}
GLuint buffer;
VertexBufferDescriptor desc;
KernelBundle const * kernelBundle;
};
BindState _currentBindState;
typedef std::vector<KernelBundle *> KernelRegistry;
KernelBundle const * getKernel(VertexBufferDescriptor const &desc);
KernelRegistry _kernelRegistry;
GLuint _vertexTexture,
_varyingTexture,
_vao;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTROLLER_H

View File

@ -39,6 +39,8 @@
#include "../osd/vertexDescriptor.h"
struct ID3D11DeviceContext;
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -80,13 +82,6 @@ public:
virtual void Refine() = 0;
virtual void Refine(VertexBufferDescriptor const *vertexDesc,
VertexBufferDescriptor const *varyingDesc) = 0;
virtual void Refine(VertexBufferDescriptor const *vertexDesc,
VertexBufferDescriptor const *varyingDesc,
bool interleaved) = 0;
virtual void Synchronize() = 0;
virtual DrawContext * GetDrawContext() = 0;
@ -119,25 +114,143 @@ protected:
// ---------------------------------------------------------------------------
template <class VERTEX_BUFFER,
class COMPUTE_CONTROLLER,
class DRAW_CONTEXT,
class DEVICE_CONTEXT = void>
template <typename STENCIL_TABLES, typename DEVICE_CONTEXT>
STENCIL_TABLES const *
convertToCompatibleStencilTables(
Far::StencilTables const *table, DEVICE_CONTEXT *context) {
if (not table) return NULL;
return STENCIL_TABLES::Create(table, context);
}
template <>
Far::StencilTables const *
convertToCompatibleStencilTables<Far::StencilTables, void>(
Far::StencilTables const *table, void * /*context*/) {
// no need for conversion
// XXX: We don't want to even copy.
if (not table) return NULL;
return new Far::StencilTables(*table);
}
template <>
Far::StencilTables const *
convertToCompatibleStencilTables<Far::StencilTables, ID3D11DeviceContext>(
Far::StencilTables const *table, ID3D11DeviceContext * /*context*/) {
// no need for conversion
// XXX: We don't want to even copy.
if (not table) return NULL;
return new Far::StencilTables(*table);
}
// ---------------------------------------------------------------------------
template <typename EVALUATOR>
class EvaluatorCacheT {
public:
~EvaluatorCacheT() {
for(typename Evaluators::iterator it = _evaluators.begin();
it != _evaluators.end(); ++it) {
delete it->evaluator;
}
}
// XXX: FIXME, linear search
struct Entry {
Entry(VertexBufferDescriptor const &sd,
VertexBufferDescriptor const &dd,
EVALUATOR *e) : srcDesc(sd), dstDesc(dd), evaluator(e) {}
VertexBufferDescriptor srcDesc, dstDesc;
EVALUATOR *evaluator;
};
typedef std::vector<Entry> Evaluators;
template <typename DEVICE_CONTEXT>
EVALUATOR *GetEvaluator(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
DEVICE_CONTEXT *deviceContext) {
for(typename Evaluators::iterator it = _evaluators.begin();
it != _evaluators.end(); ++it) {
if (it->srcDesc.length == srcDesc.length and
it->srcDesc.stride == srcDesc.stride and
it->dstDesc.length == dstDesc.length and
it->dstDesc.stride == dstDesc.stride) {
return it->evaluator;
}
}
EVALUATOR *e = EVALUATOR::Create(srcDesc, dstDesc, deviceContext);
_evaluators.push_back(Entry(srcDesc, dstDesc, e));
return e;
}
private:
Evaluators _evaluators;
};
// template helpers to see if the evaluator is instantiatable or not.
template <typename EVALUATOR>
struct instantiatable
{
typedef char yes[1];
typedef char no[2];
template <typename C> static yes &chk(typename C::Instantiatable *t=0);
template <typename C> static no &chk(...);
static bool const value = sizeof(chk<EVALUATOR>(0)) == sizeof(yes);
};
template <bool C, typename T=void>
struct enable_if { typedef T type; };
template <typename T>
struct enable_if<false, T> { };
// extract a kernel from cache if available
template <typename EVALUATOR, typename DEVICE_CONTEXT>
static EVALUATOR *GetEvaluator(
EvaluatorCacheT<EVALUATOR> *cache,
VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
DEVICE_CONTEXT deviceContext,
typename enable_if<instantiatable<EVALUATOR>::value, void>::type*t=0) {
(void)t;
if (cache == NULL) return NULL;
return cache->GetEvaluator(srcDesc, dstDesc, deviceContext);
}
// fallback
template <typename EVALUATOR, typename DEVICE_CONTEXT>
static EVALUATOR *GetEvaluator(
EvaluatorCacheT<EVALUATOR> *,
VertexBufferDescriptor const &,
VertexBufferDescriptor const &,
DEVICE_CONTEXT,
typename enable_if<!instantiatable<EVALUATOR>::value, void>::type*t=0) {
(void)t;
return NULL;
}
// ---------------------------------------------------------------------------
template <typename VERTEX_BUFFER,
typename STENCIL_TABLES,
typename EVALUATOR,
typename DRAW_CONTEXT,
typename DEVICE_CONTEXT = void>
class Mesh : public MeshInterface<DRAW_CONTEXT> {
public:
typedef VERTEX_BUFFER VertexBuffer;
typedef COMPUTE_CONTROLLER ComputeController;
typedef EVALUATOR Evaluator;
typedef STENCIL_TABLES StencilTables;
typedef DRAW_CONTEXT DrawContext;
typedef DEVICE_CONTEXT DeviceContext;
typedef typename ComputeController::ComputeContext ComputeContext;
typedef EvaluatorCacheT<Evaluator> EvaluatorCache;
typedef typename DrawContext::VertexBufferBinding VertexBufferBinding;
Mesh(ComputeController * computeController,
Far::TopologyRefiner * refiner,
Mesh(Far::TopologyRefiner * refiner,
int numVertexElements,
int numVaryingElements,
int level,
MeshBitset bits = MeshBitset(),
EvaluatorCache * evaluatorCache = NULL,
DeviceContext * deviceContext = NULL) :
_refiner(refiner),
@ -145,8 +258,9 @@ public:
_numVertices(0),
_vertexBuffer(NULL),
_varyingBuffer(NULL),
_computeContext(NULL),
_computeController(computeController),
_vertexStencilTables(NULL),
_varyingStencilTables(NULL),
_evaluatorCache(evaluatorCache),
_drawContext(NULL),
_deviceContext(deviceContext) {
@ -157,18 +271,34 @@ public:
bits.test(MeshAdaptive),
bits.test(MeshUseSingleCreasePatch));
int numVertexElementsInterleaved = numVertexElements +
int vertexBufferStride = numVertexElements +
(bits.test(MeshInterleaveVarying) ? numVaryingElements : 0);
int numVaryingElementsNonInterleaved =
int varyingBufferStride =
(bits.test(MeshInterleaveVarying) ? 0 : numVaryingElements);
initializeContext(numVertexElements,
numVaryingElements,
numVertexElementsInterleaved, level, bits);
vertexBufferStride, level, bits);
initializeVertexBuffers(_numVertices,
numVertexElementsInterleaved,
numVaryingElementsNonInterleaved);
vertexBufferStride,
varyingBufferStride);
// configure vertex buffer descriptor
_vertexDesc = VertexBufferDescriptor(0,
numVertexElements,
vertexBufferStride);
if (bits.test(MeshInterleaveVarying)) {
_varyingDesc = VertexBufferDescriptor(numVertexElements,
numVaryingElements,
vertexBufferStride);
} else {
_varyingDesc = VertexBufferDescriptor(0,
numVaryingElements,
varyingBufferStride);
}
// will retire
_drawContext->UpdateVertexTexture(_vertexBuffer, _deviceContext);
@ -179,9 +309,10 @@ public:
delete _patchTables;
delete _vertexBuffer;
delete _varyingBuffer;
delete _computeContext;
delete _vertexStencilTables;
delete _varyingStencilTables;
delete _drawContext;
// devicecontext and computecontroller are not owned by this class.
// deviceContext and evaluatorCache are not owned by this class.
}
virtual void UpdateVertexBuffer(float const *vertexData,
@ -197,29 +328,50 @@ public:
}
virtual void Refine() {
_computeController->Compute(_computeContext,
_vertexBuffer, _varyingBuffer);
}
virtual void Refine(VertexBufferDescriptor const *vertexDesc,
VertexBufferDescriptor const *varyingDesc) {
_computeController->Compute(_computeContext,
_vertexBuffer, _varyingBuffer,
vertexDesc, varyingDesc);
}
int numControlVertices = _refiner->GetNumVertices(0);
virtual void Refine(VertexBufferDescriptor const *vertexDesc,
VertexBufferDescriptor const *varyingDesc,
bool interleaved) {
_computeController->Compute(_computeContext,
_vertexBuffer,
(interleaved ?
_vertexBuffer : _varyingBuffer),
vertexDesc, varyingDesc);
VertexBufferDescriptor srcDesc = _vertexDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += numControlVertices * dstDesc.stride;
// note that the _evaluatorCache can be NULL and thus
// the evaluatorInstance can be NULL
// (for uninstantiatable kernels CPU,TBB etc)
Evaluator const *instance = GetEvaluator<Evaluator>(
_evaluatorCache, srcDesc, dstDesc, _deviceContext);
Evaluator::EvalStencils(_vertexBuffer, srcDesc,
_vertexBuffer, dstDesc,
_vertexStencilTables,
instance, _deviceContext);
if (_varyingDesc.length > 0) {
VertexBufferDescriptor srcDesc = _varyingDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += numControlVertices * dstDesc.stride;
instance = GetEvaluator<Evaluator>(
_evaluatorCache, srcDesc, dstDesc, _deviceContext);
if (_varyingBuffer) {
// non-interleaved
Evaluator::EvalStencils(_varyingBuffer, srcDesc,
_varyingBuffer, dstDesc,
_varyingStencilTables,
instance, _deviceContext);
} else {
// interleaved
Evaluator::EvalStencils(_vertexBuffer, srcDesc,
_vertexBuffer, dstDesc,
_varyingStencilTables,
instance, _deviceContext);
}
}
}
virtual void Synchronize() {
_computeController->Synchronize();
Evaluator::Synchronize(_deviceContext);
}
virtual DrawContext * GetDrawContext() {
@ -333,14 +485,20 @@ private:
_drawContext = DrawContext::Create(_patchTables, numElements,
_deviceContext);
_computeContext = ComputeContext::Create(vertexStencils,
varyingStencils,
_deviceContext);
// numvertices = coarse verts + refined verts + gregory basis verts
_numVertices = vertexStencils->GetNumControlVertices()
+ vertexStencils->GetNumStencils();
// convert to device stenciltables if necessary.
_vertexStencilTables =
convertToCompatibleStencilTables<StencilTables>(
vertexStencils, _deviceContext);
_varyingStencilTables =
convertToCompatibleStencilTables<StencilTables>(
varyingStencils, _deviceContext);
// FIXME: we do extra copyings for Far::Stencils.
delete vertexStencils;
delete varyingStencils;
}
@ -365,14 +523,17 @@ private:
int _numVertices;
VertexBuffer * _vertexBuffer,
* _varyingBuffer;
VertexBuffer * _vertexBuffer;
VertexBuffer * _varyingBuffer;
ComputeContext * _computeContext;
ComputeController * _computeController;
VertexBufferDescriptor _vertexDesc;
VertexBufferDescriptor _varyingDesc;
StencilTables const * _vertexStencilTables;
StencilTables const * _varyingStencilTables;
EvaluatorCache * _evaluatorCache;
DrawContext *_drawContext;
DeviceContext *_deviceContext;
};

View File

@ -1,104 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../far/stencilTables.h"
#include "../osd/ompComputeController.h"
#include "../osd/ompKernel.h"
#include <cassert>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
OmpComputeController::OmpComputeController(int numThreads) {
_numThreads = (numThreads == -1) ? omp_get_max_threads() : numThreads;
}
void
OmpComputeController::ApplyStencilTableKernel(
ComputeContext const *context) const {
assert(context);
Far::StencilTables const * vertexStencils = context->GetVertexStencilTables();
if (vertexStencils and _currentBindState.vertexBuffer) {
VertexBufferDescriptor srcDesc = _currentBindState.vertexDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += vertexStencils->GetNumControlVertices() * dstDesc.stride;
int start = 0;
int end = vertexStencils->GetNumStencils();
if (end > start) {
OmpComputeStencils(_currentBindState.vertexBuffer,
srcDesc,
_currentBindState.vertexBuffer,
dstDesc,
&vertexStencils->GetSizes().at(0),
&vertexStencils->GetOffsets().at(0),
&vertexStencils->GetControlIndices().at(0),
&vertexStencils->GetWeights().at(0),
start,
end);
}
}
Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables();
if (varyingStencils and _currentBindState.varyingBuffer) {
VertexBufferDescriptor srcDesc = _currentBindState.varyingDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += varyingStencils->GetNumControlVertices() * dstDesc.stride;
int start = 0;
int end = varyingStencils->GetNumStencils();
if (end > start) {
OmpComputeStencils(_currentBindState.varyingBuffer,
srcDesc,
_currentBindState.varyingBuffer,
dstDesc,
&varyingStencils->GetSizes().at(0),
&varyingStencils->GetOffsets().at(0),
&varyingStencils->GetControlIndices().at(0),
&varyingStencils->GetWeights().at(0),
start,
end);
}
}
}
void
OmpComputeController::Synchronize() {
// XXX:
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,184 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_OMP_COMPUTE_CONTROLLER_H
#define OSD_OMP_COMPUTE_CONTROLLER_H
#include "../version.h"
#include "../osd/cpuComputeContext.h"
#include "../osd/vertexDescriptor.h"
#ifdef OPENSUBDIV_HAS_OPENMP
#include <omp.h>
#endif
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Compute controller for launching OpenMP subdivision kernels.
///
/// OmpComputeController is a compute controller class to launch OpenMP
/// threaded subdivision kernels. It requires CpuVertexBufferInterface
/// as arguments of Refine function.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class OmpComputeController {
public:
typedef CpuComputeContext ComputeContext;
/// Constructor.
///
/// @param numThreads specifies how many openmp parallel threads to use.
/// -1 attempts to use all available processors.
///
explicit OmpComputeController(int numThreads=-1);
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The CpuContext to apply refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
/// @param vertexDesc The descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingBuffer Vertex-interpolated data buffer
///
/// @param varyingDesc The descriptor of varying elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Compute( CpuComputeContext const * context,
VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc=NULL,
VertexBufferDescriptor const * varyingDesc=NULL ){
omp_set_num_threads(_numThreads);
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
ApplyStencilTableKernel(context);
unbind();
}
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The CpuContext to apply refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
template<class VERTEX_BUFFER>
void Compute(CpuComputeContext const * context,
VERTEX_BUFFER *vertexBuffer) {
Compute<VERTEX_BUFFER>(context, vertexBuffer, (VERTEX_BUFFER*)0);
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
protected:
void ApplyStencilTableKernel(ComputeContext const *context) const;
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind( VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc,
VertexBufferDescriptor const * varyingDesc ) {
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0;
_currentBindState.vertexDesc =
VertexBufferDescriptor(0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0;
_currentBindState.varyingDesc =
VertexBufferDescriptor(0, numElements, numElements);
}
_currentBindState.vertexBuffer = vertexBuffer ?
vertexBuffer->BindCpuBuffer() : 0;
_currentBindState.varyingBuffer = varyingBuffer ?
varyingBuffer->BindCpuBuffer() : 0;
}
void unbind() {
_currentBindState.Reset();
}
private:
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of the vertex buffers.
struct BindState {
BindState() : vertexBuffer(0), varyingBuffer(0) { }
void Reset() {
vertexBuffer = varyingBuffer = 0;
vertexDesc.Reset();
varyingDesc.Reset();
}
float * vertexBuffer,
* varyingBuffer;
VertexBufferDescriptor vertexDesc,
varyingDesc;
};
BindState _currentBindState;
int _numThreads;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_OMP_COMPUTE_CONTROLLER_H

View File

@ -1,155 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/ompEvalStencilsController.h"
#include <cassert>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
OmpEvalStencilsController::OmpEvalStencilsController(int numThreads) {
_numThreads = (numThreads == -1) ? omp_get_num_procs() : numThreads;
}
OmpEvalStencilsController::~OmpEvalStencilsController() {
}
int
OmpEvalStencilsController::_UpdateValues( CpuEvalStencilsContext * context ) {
int result=0;
Far::LimitStencilTables const * stencils = context->GetStencilTables();
int nstencils = stencils->GetNumStencils();
if (not nstencils)
return result;
VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc,
outDesc = _currentBindState.outputDataDesc;
// make sure that we have control data to work with
if (not ctrlDesc.CanEval(outDesc))
return 0;
float const * ctrl = _currentBindState.controlData + ctrlDesc.offset;
if (not ctrl)
return result;
#pragma omp parallel for
for (int i=0; i<nstencils; ++i) {
int size = stencils->GetSizes()[i];
Far::Index offset = stencils->GetOffsets()[i];
Far::Index const * index = &stencils->GetControlIndices().at(offset);
float const * weight = &stencils->GetWeights().at(offset);
float * out = _currentBindState.outputData + i * outDesc.stride + outDesc.offset;
memset(out, 0, outDesc.length*sizeof(float));
for (int j=0; j<size; ++j, ++index, ++weight) {
float const * cv = ctrl + (*index)*ctrlDesc.stride;
for (int k=0; k<outDesc.length; ++k) {
out[k] += cv[k] * (*weight);
}
}
}
return nstencils;
}
int
OmpEvalStencilsController::_UpdateDerivs( CpuEvalStencilsContext * context ) {
int result=0;
Far::LimitStencilTables const * stencils = context->GetStencilTables();
int nstencils = stencils->GetNumStencils();
if (not nstencils)
return result;
VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc,
duDesc = _currentBindState.outputDuDesc,
dvDesc = _currentBindState.outputDvDesc;
// make sure that we have control data to work with
if (not (ctrlDesc.CanEval(duDesc) and ctrlDesc.CanEval(dvDesc)))
return 0;
float const * ctrl = _currentBindState.controlData + ctrlDesc.offset;
if (not ctrl)
return result;
#pragma omp parallel for
for (int i=0; i<nstencils; ++i) {
int size = stencils->GetSizes()[i];
Far::Index offset = stencils->GetOffsets()[i];
Far::Index const * index = &stencils->GetControlIndices().at(offset);
float const * duweight = &stencils->GetDuWeights().at(offset),
* dvweight = &stencils->GetDvWeights().at(offset);
float * du = _currentBindState.outputUDeriv + i * duDesc.stride + duDesc.offset,
* dv = _currentBindState.outputVDeriv + i * dvDesc.stride + dvDesc.offset;
memset(du, 0, duDesc.length*sizeof(float));
memset(dv, 0, dvDesc.length*sizeof(float));
for (int j=0; j<size; ++j, ++index, ++duweight, ++dvweight) {
float const * cv = ctrl + (*index)*ctrlDesc.stride;
for (int k=0; k<duDesc.length; ++k) {
du[k] += cv[k] * (*duweight);
dv[k] += cv[k] * (*dvweight);
}
}
}
return nstencils;
}
void
OmpEvalStencilsController::Synchronize() {
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,221 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef FAR_OMP_EVALSTENCILS_CONTROLLER_H
#define FAR_OMP_EVALSTENCILS_CONTROLLER_H
#include "../version.h"
#include "../osd/cpuEvalStencilsContext.h"
#ifdef OPENSUBDIV_HAS_OPENMP
#include <omp.h>
#endif
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
///
/// \brief CPU stencils evaluation controller
///
/// CpuStencilsController is a compute controller class to launch
/// single threaded CPU stencil evalution kernels.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class OmpEvalStencilsController {
public:
/// \brief Constructor.
///
/// @param numThreads specifies how many openmp parallel threads to use.
/// -1 attempts to use all available processors.
///
OmpEvalStencilsController(int numThreads=-1);
/// \brief Destructor.
~OmpEvalStencilsController();
/// \brief Applies stencil weights to the control vertex data
///
/// Applies the stencil weights to the control vertex data to evaluate the
/// interpolated limit positions at the parametric locations of the stencils
///
/// @param context the CpuEvalStencilsContext with the stencil weights
///
/// @param controlDataDesc vertex buffer descriptor for the control vertex data
///
/// @param controlVertices vertex buffer with the control vertices data
///
/// @param outputDataDesc vertex buffer descriptor for the output vertex data
///
/// @param outputData output vertex buffer for the interpolated data
///
template<class CONTROL_BUFFER, class OUTPUT_BUFFER>
int UpdateValues( CpuEvalStencilsContext * context,
VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices,
VertexBufferDescriptor const & outputDataDesc, OUTPUT_BUFFER *outputData ) {
if (not context->GetStencilTables()->GetNumStencils())
return 0;
omp_set_num_threads(_numThreads);
bindControlData( controlDataDesc, controlVertices );
bindOutputData( outputDataDesc, outputData );
int n = _UpdateValues( context );
unbind();
return n;
}
/// \brief Applies derivative stencil weights to the control vertex data
///
/// Computes the U and V derivative stencils to the control vertex data at
/// the parametric locations contained in each stencil
///
/// @param context the CpuEvalStencilsContext with the stencil weights
///
/// @param controlDataDesc vertex buffer descriptor for the control vertex data
///
/// @param controlVertices vertex buffer with the control vertices data
///
/// @param outputDuDesc vertex buffer descriptor for the U derivative output data
///
/// @param outputDuData output vertex buffer for the U derivative data
///
/// @param outputDvDesc vertex buffer descriptor for the V deriv output data
///
/// @param outputDvData output vertex buffer for the V derivative data
///
template<class CONTROL_BUFFER, class OUTPUT_BUFFER>
int UpdateDerivs( CpuEvalStencilsContext * context,
VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices,
VertexBufferDescriptor const & outputDuDesc, OUTPUT_BUFFER *outputDuData,
VertexBufferDescriptor const & outputDvDesc, OUTPUT_BUFFER *outputDvData ) {
if (not context->GetStencilTables()->GetNumStencils())
return 0;
bindControlData( controlDataDesc, controlVertices );
bindOutputDerivData( outputDuDesc, outputDuData, outputDvDesc, outputDvData );
int n = _UpdateDerivs( context );
unbind();
return n;
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
protected:
/// \brief Binds control vertex data buffer
template<class VERTEX_BUFFER>
void bindControlData(VertexBufferDescriptor const & controlDataDesc, VERTEX_BUFFER *controlData ) {
_currentBindState.controlData = controlData ? controlData->BindCpuBuffer() : 0;
_currentBindState.controlDataDesc = controlDataDesc;
}
/// \brief Binds output vertex data buffer
template<class VERTEX_BUFFER>
void bindOutputData( VertexBufferDescriptor const & outputDataDesc, VERTEX_BUFFER *outputData ) {
_currentBindState.outputData = outputData ? outputData->BindCpuBuffer() : 0;
_currentBindState.outputDataDesc = outputDataDesc;
}
/// \brief Binds output derivative vertex data buffer
template<class VERTEX_BUFFER>
void bindOutputDerivData( VertexBufferDescriptor const & outputDuDesc, VERTEX_BUFFER *outputDu,
VertexBufferDescriptor const & outputDvDesc, VERTEX_BUFFER *outputDv ) {
_currentBindState.outputUDeriv = outputDu ? outputDu ->BindCpuBuffer() : 0;
_currentBindState.outputVDeriv = outputDv ? outputDv->BindCpuBuffer() : 0;
_currentBindState.outputDuDesc = outputDuDesc;
_currentBindState.outputDvDesc = outputDvDesc;
}
/// \brief Unbinds any previously bound vertex and varying data buffers.
void unbind() {
_currentBindState.Reset();
}
private:
int _UpdateValues( CpuEvalStencilsContext * context );
int _UpdateDerivs( CpuEvalStencilsContext * context );
int _numThreads;
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of vertex buffers.
struct BindState {
BindState() : controlData(0), outputData(0), outputUDeriv(0), outputVDeriv(0) { }
void Reset() {
controlData = outputData = outputUDeriv = outputVDeriv = NULL;
controlDataDesc.Reset();
outputDataDesc.Reset();
outputDuDesc.Reset();
outputDvDesc.Reset();
}
// transient mesh data
VertexBufferDescriptor controlDataDesc,
outputDataDesc,
outputDuDesc,
outputDvDesc;
float * controlData,
* outputData,
* outputUDeriv,
* outputVDeriv;
};
BindState _currentBindState;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // FAR_OMP_EVALSTENCILS_CONTROLLER_H

View File

@ -1,5 +1,5 @@
//
// Copyright 2013 Pixar
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
@ -22,23 +22,48 @@
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cpuEvalStencilsContext.h"
#include "../osd/ompEvaluator.h"
#include "../osd/ompKernel.h"
#include <omp.h>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CpuEvalStencilsContext::CpuEvalStencilsContext(Far::LimitStencilTables const *stencils) :
_stencils(stencils) {
/* static */
bool
OmpEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
if (end <= start) return true;
// we can probably expand cpuKernel.cpp to here.
OmpEvalStencils(src, srcDesc, dst, dstDesc,
sizes, offsets, indices, weights, start, end);
return true;
}
CpuEvalStencilsContext *
CpuEvalStencilsContext::Create(Far::LimitStencilTables const *stencils) {
return new CpuEvalStencilsContext(stencils);
/* static */
void
OmpEvaluator::Synchronize(void * /*deviceContext*/) {
// we use "omp parallel for" and it synchronizes by itself
}
} // end namespace Osd
/* static */
void
OmpEvaluator::SetNumThreads(int numThreads) {
omp_set_num_threads(numThreads);
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,114 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV_OSD_OMP_EVALUATOR_H
#define OPENSUBDIV_OSD_OMP_EVALUATOR_H
#include "../version.h"
#include <cstddef>
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
class OmpEvaluator {
public:
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
///
/// @param instance not used in the omp kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the omp kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
OmpEvaluator const * instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused;
(void)deviceContext; // unused;
return EvalStencils(srcVertexBuffer->BindCpuBuffer(),
srcDesc,
dstVertexBuffer->BindCpuBuffer(),
dstDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// stencil compute function.
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
static void Synchronize(void *deviceContext = NULL);
static void SetNumThreads(int numThreads);
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV_OSD_OMP_EVALUATOR_H

View File

@ -73,16 +73,15 @@ copy(float *dst, int dstIndex, const float *src,
// XXXX manuelk this should be optimized further by using SIMD - considering
// OMP is somewhat obsolete - this is probably not worth it.
void
OmpComputeStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end) {
OmpEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end) {
if (start > 0) {
sizes += start;
indices += offsets[start];

View File

@ -22,8 +22,8 @@
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_OMP_KERNEL_H
#define OSD_OMP_KERNEL_H
#ifndef OPENSUBDIV_OSD_OMP_KERNEL_H
#define OPENSUBDIV_OSD_OMP_KERNEL_H
#include "../version.h"
@ -35,15 +35,15 @@ namespace Osd {
struct VertexBufferDescriptor;
void
OmpComputeStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end);
OmpEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end);
} // end namespace Osd
@ -52,4 +52,4 @@ using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_OMP_KERNEL_H
#endif // OPENSUBDIV_OSD_OMP_KERNEL_H

View File

@ -1,185 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/ompSmoothNormalController.h"
#ifdef OPENSUBDIV_HAS_OPENMP
#include <omp.h>
#endif
#include <math.h>
#include <string.h>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
inline void
cross(float *n, const float *p0, const float *p1, const float *p2) {
float a[3] = { p1[0]-p0[0], p1[1]-p0[1], p1[2]-p0[2] };
float b[3] = { p2[0]-p0[0], p2[1]-p0[1], p2[2]-p0[2] };
n[0] = a[1]*b[2]-a[2]*b[1];
n[1] = a[2]*b[0]-a[0]*b[2];
n[2] = a[0]*b[1]-a[1]*b[0];
float rn = 1.0f/sqrtf(n[0]*n[0] + n[1]*n[1] + n[2]*n[2]);
n[0] *= rn;
n[1] *= rn;
n[2] *= rn;
}
void OmpSmoothNormalController::_smootheNormals(
CpuSmoothNormalContext * context) {
VertexBufferDescriptor const & iDesc = context->GetInputVertexDescriptor(),
& oDesc = context->GetOutputVertexDescriptor();
assert(iDesc.length==3 and oDesc.length==3);
float * oBuffer = context->GetCurrentOutputVertexBuffer() + oDesc.offset;
if (context->GetResetMemory()) {
#pragma omp parallel for
for (int j=0; j<context->GetNumVertices(); ++j) {
float * ptr = oBuffer + j * oDesc.stride;
memset(ptr, 0, oDesc.length*sizeof(float));
}
}
{ // note: quads only !
float const * iBuffer = context->GetCurrentInputVertexBuffer() + iDesc.offset;
Far::Index const * fverts = context->GetFaceVertices();
int nfaces = context->GetNumFaces();
#pragma omp parallel for
for (int i=0; i<nfaces; ++i) {
int idx = i*4;
float const * p0 = iBuffer + fverts[idx+0]*iDesc.stride,
* p1 = iBuffer + fverts[idx+1]*iDesc.stride,
* p2 = iBuffer + fverts[idx+2]*iDesc.stride;
// compute face normal
float n[3];
cross( n, p0, p1, p2 );
// add normal to all vertices of the face
for (int j=0; j<4; ++j) {
float * dst = oBuffer + fverts[idx+j]*oDesc.stride;
dst[0] += n[0];
dst[1] += n[1];
dst[2] += n[2];
}
}
}
}
/*
void OmpSmoothNormalController::_smootheNormals(
CpuSmoothNormalContext * context) {
VertexBufferDescriptor const & iDesc = context->GetInputVertexDescriptor(),
& oDesc = context->GetOutputVertexDescriptor();
assert(iDesc.length==3 and oDesc.length==3);
float const * iBuffer = context->GetCurrentInputVertexBuffer() + iDesc.offset;
float * oBuffer = context->GetCurrentOutputVertexBuffer() + oDesc.offset;
Far::PatchTables::PTable const & verts = context->GetControlVertices();
Far::PatchTables::PatchArrayVector const & parrays = context->GetPatchArrayVector();
if (verts.empty() or parrays.empty() or (not iBuffer) or (not oBuffer)) {
return;
}
for (int i=0; i<(int)parrays.size(); ++i) {
Far::PatchTables::PatchArray const & pa = parrays[i];
Far::PatchTables::Type type = pa.GetDescriptor().GetType();
if (type==Far::PatchTables::QUADS or type==Far::PatchTables::TRIANGLES) {
int nv = Far::PatchTables::Descriptor::GetNumControlVertices(type);
// if necessary, reset all normal values to 0
if (context->GetResetMemory()) {
#pragma omp parallel for
for (int j=0; j<context->GetNumVertices(); ++j) {
float * ptr = oBuffer + j * oDesc.stride;
memset(ptr, 0, oDesc.length*sizeof(float));
}
}
#pragma omp parallel for
for (int j=0; j<(int)pa.GetNumPatches(); ++j) {
int idx = pa.GetVertIndex() + j*nv;
float const * p0 = iBuffer + verts[idx+0]*iDesc.stride,
* p1 = iBuffer + verts[idx+1]*iDesc.stride,
* p2 = iBuffer + verts[idx+2]*iDesc.stride;
// compute face normal
float n[3];
cross( n, p0, p1, p2 );
// add normal to all vertices of the face
for (int k=0; k<nv; ++k) {
float * dst = oBuffer + verts[idx+k]*oDesc.stride;
dst[0] += n[0];
dst[1] += n[1];
dst[2] += n[2];
}
}
}
}
}
*/
OmpSmoothNormalController::OmpSmoothNormalController() {
}
OmpSmoothNormalController::~OmpSmoothNormalController() {
}
void
OmpSmoothNormalController::Synchronize() {
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,78 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_OMP_SMOOTHNORMAL_CONTROLLER_H
#define OSD_OMP_SMOOTHNORMAL_CONTROLLER_H
#include "../version.h"
#include "../osd/nonCopyable.h"
#include "../osd/cpuSmoothNormalContext.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
class OmpSmoothNormalController {
public:
/// Constructor
OmpSmoothNormalController();
/// Destructor
~OmpSmoothNormalController();
/// Computes smooth vertex normals
template<class VERTEX_BUFFER>
void SmootheNormals( CpuSmoothNormalContext * context,
VERTEX_BUFFER * iBuffer, int iOfs,
VERTEX_BUFFER * oBuffer, int oOfs ) {
if (not context) return;
context->Bind(iBuffer, iOfs, oBuffer, oOfs);
_smootheNormals(context);
context->Unbind();
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
private:
void _smootheNormals(CpuSmoothNormalContext * context);
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_OMP_SMOOTHNORMAL_CONTROLLER_H

View File

@ -1,114 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include <cassert>
#include "../far/stencilTables.h"
#include "../osd/cpuComputeContext.h"
#include "../osd/tbbComputeController.h"
#include "../osd/tbbKernel.h"
#ifdef OPENSUBDIV_HAS_TBB
#include <tbb/task_scheduler_init.h>
#endif
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
TbbComputeController::TbbComputeController(int numThreads)
: _numThreads(numThreads) {
if(_numThreads == -1)
tbb::task_scheduler_init init;
else
tbb::task_scheduler_init init(numThreads);
}
void
TbbComputeController::ApplyStencilTableKernel(
ComputeContext const *context) const {
assert(context);
Far::StencilTables const * vertexStencils = context->GetVertexStencilTables();
if (vertexStencils and _currentBindState.vertexBuffer) {
VertexBufferDescriptor srcDesc = _currentBindState.vertexDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += vertexStencils->GetNumControlVertices() * dstDesc.stride;
int start = 0;
int end = vertexStencils->GetNumStencils();
if (end > start) {
TbbComputeStencils(_currentBindState.vertexBuffer,
srcDesc,
_currentBindState.vertexBuffer,
dstDesc,
&vertexStencils->GetSizes().at(0),
&vertexStencils->GetOffsets().at(0),
&vertexStencils->GetControlIndices().at(0),
&vertexStencils->GetWeights().at(0),
start,
end);
}
}
Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables();
if (varyingStencils and _currentBindState.varyingBuffer) {
VertexBufferDescriptor srcDesc = _currentBindState.varyingDesc;
VertexBufferDescriptor dstDesc(srcDesc);
dstDesc.offset += varyingStencils->GetNumControlVertices() * dstDesc.stride;
int start = 0;
int end = varyingStencils->GetNumStencils();
if (end > start) {
TbbComputeStencils(_currentBindState.varyingBuffer,
srcDesc,
_currentBindState.varyingBuffer,
dstDesc,
&varyingStencils->GetSizes().at(0),
&varyingStencils->GetOffsets().at(0),
&varyingStencils->GetControlIndices().at(0),
&varyingStencils->GetWeights().at(0),
start,
end);
}
}
}
void
TbbComputeController::Synchronize() {
// XXX:
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,177 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_TBB_COMPUTE_CONTROLLER_H
#define OSD_TBB_COMPUTE_CONTROLLER_H
#include "../version.h"
#include "../osd/cpuComputeContext.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Compute controller for launching TBB subdivision kernels.
///
/// TbbComputeController is a compute controller class to launch TBB
/// threaded subdivision kernels. It requires CpuVertexBufferInterface
/// as arguments of Refine function.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class TbbComputeController {
public:
typedef CpuComputeContext ComputeContext;
/// Constructor.
///
/// @param numThreads specifies how many openmp parallel threads to use.
/// -1 attempts to use all available processors.
///
explicit TbbComputeController(int numThreads=-1);
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The CpuContext to apply refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
/// @param vertexDesc The descriptor of vertex elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
/// @param varyingBuffer Vertex-interpolated data buffer
///
/// @param varyingDesc The descriptor of varying elements to be refined.
/// if it's null, all primvars in the vertex buffer
/// will be refined.
///
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void Compute( CpuComputeContext const * context,
VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc=NULL,
VertexBufferDescriptor const * varyingDesc=NULL ){
bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc);
ApplyStencilTableKernel(context);
unbind();
}
/// Execute subdivision kernels and apply to given vertex buffers.
///
/// @param context The CpuContext to apply refinement operations to
///
/// @param vertexBuffer Vertex-interpolated data buffer
///
template<class VERTEX_BUFFER>
void Compute(CpuComputeContext const * context,
VERTEX_BUFFER *vertexBuffer) {
Compute<VERTEX_BUFFER>(context, vertexBuffer, (VERTEX_BUFFER*)0);
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
protected:
void ApplyStencilTableKernel(ComputeContext const *context) const;
template<class VERTEX_BUFFER, class VARYING_BUFFER>
void bind( VERTEX_BUFFER * vertexBuffer,
VARYING_BUFFER * varyingBuffer,
VertexBufferDescriptor const * vertexDesc,
VertexBufferDescriptor const * varyingDesc ) {
// if the vertex buffer descriptor is specified, use it.
// otherwise, assumes the data is tightly packed in the vertex buffer.
if (vertexDesc) {
_currentBindState.vertexDesc = *vertexDesc;
} else {
int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0;
_currentBindState.vertexDesc =
VertexBufferDescriptor(0, numElements, numElements);
}
if (varyingDesc) {
_currentBindState.varyingDesc = *varyingDesc;
} else {
int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0;
_currentBindState.varyingDesc =
VertexBufferDescriptor(0, numElements, numElements);
}
_currentBindState.vertexBuffer = vertexBuffer ?
vertexBuffer->BindCpuBuffer(): 0;
_currentBindState.varyingBuffer = varyingBuffer ?
varyingBuffer->BindCpuBuffer() : 0;
}
void unbind() {
_currentBindState.Reset();
}
private:
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of the vertex buffers.
struct BindState {
BindState() : vertexBuffer(0), varyingBuffer(0) { }
void Reset() {
vertexBuffer = varyingBuffer = 0;
vertexDesc.Reset();
varyingDesc.Reset();
}
float * vertexBuffer,
* varyingBuffer;
VertexBufferDescriptor vertexDesc,
varyingDesc;
};
BindState _currentBindState;
int _numThreads;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_TBB_COMPUTE_CONTROLLER_H

View File

@ -1,200 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/tbbEvalStencilsController.h"
#include <tbb/parallel_for.h>
#include <tbb/task_scheduler_init.h>
#include <cassert>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
#define grain_size 200
TbbEvalStencilsController::TbbEvalStencilsController(int numThreads) {
_numThreads = numThreads > 0 ? numThreads : tbb::task_scheduler_init::automatic;
tbb::task_scheduler_init init(numThreads);
}
TbbEvalStencilsController::~TbbEvalStencilsController() {
}
class StencilKernel {
public:
enum Mode { UNDEFINED, POINT, U_DERIV, V_DERIV };
StencilKernel( Far::LimitStencilTables const * stencils,
VertexBufferDescriptor ctrlDesc,
float const * ctrlData ) :
_stencils(stencils),
_mode(UNDEFINED),
_ctrlDesc(ctrlDesc),
_length(0),
_outStride(0),
_outData(0) {
_ctrlData = ctrlData + ctrlDesc.offset;
}
bool SetOutput(Mode mode, VertexBufferDescriptor outDesc, float * outData) {
if (_ctrlDesc.CanEval(outDesc)) {
_mode = mode;
_length = outDesc.length;
_outStride = outDesc.stride;
_outData = outData + outDesc.offset;
return true;
}
return false;
}
void operator() (tbb::blocked_range<int> const &r) const {
assert(_stencils and _ctrlData and _length and _outStride and _outData);
Far::Index offset = _stencils->GetOffsets()[r.begin()];
unsigned char const * sizes = &_stencils->GetSizes()[r.begin()];
Far::Index const * index = &_stencils->GetControlIndices()[offset];
float const * weight;
switch (_mode) {
case POINT : weight = &_stencils->GetWeights()[offset]; break;
case U_DERIV : weight = &_stencils->GetDuWeights()[offset]; break;
case V_DERIV : weight = &_stencils->GetDvWeights()[offset]; break;
default:
return;
}
assert( weight);
float * out = _outData + r.begin() * _outStride;
for (int i=r.begin(); i<r.end(); ++i, ++sizes) {
memset( out, 0, _length * sizeof(float) );
for (int j=0; j<(*sizes); ++j, ++index, ++weight) {
float const * cv = _ctrlData + (*index)*_ctrlDesc.stride;
for (int k=0; k<_length; ++k) {
out[k] += cv[k] * (*weight);
}
}
out+=_outStride;
}
}
private:
Far::LimitStencilTables const * _stencils;
Mode _mode;
VertexBufferDescriptor _ctrlDesc;
float const * _ctrlData;
int _length,
_outStride;
float * _outData;
};
int
TbbEvalStencilsController::_UpdateValues( CpuEvalStencilsContext * context ) {
Far::LimitStencilTables const * stencils = context->GetStencilTables();
if (not stencils)
return 0;
int nstencils = stencils->GetNumStencils();
if (not nstencils)
return 0;
StencilKernel kernel( stencils, _currentBindState.controlDataDesc,
_currentBindState.controlData );
if (not kernel.SetOutput( StencilKernel::POINT,
_currentBindState.outputDataDesc,
_currentBindState.outputData ))
return 0;
tbb::blocked_range<int> range(0, nstencils, grain_size);
tbb::parallel_for(range, kernel);
return nstencils;
}
int
TbbEvalStencilsController::_UpdateDerivs( CpuEvalStencilsContext * context ) {
Far::LimitStencilTables const * stencils = context->GetStencilTables();
if (not stencils)
return 0;
int nstencils = stencils->GetNumStencils();
if (not nstencils)
return 0;
tbb::blocked_range<int> range(0, nstencils, grain_size);
StencilKernel kernel( stencils, _currentBindState.controlDataDesc,
_currentBindState.controlData );
if (not kernel.SetOutput( StencilKernel::U_DERIV,
_currentBindState.outputDuDesc,
_currentBindState.outputUDeriv ) )
return 0;
tbb::parallel_for(range, kernel);
if (not kernel.SetOutput( StencilKernel::V_DERIV,
_currentBindState.outputDvDesc,
_currentBindState.outputVDeriv ) )
return 0;
tbb::parallel_for(range, kernel);
return nstencils;
}
void
TbbEvalStencilsController::Synchronize() {
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -1,216 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef FAR_TBB_EVALSTENCILS_CONTROLLER_H
#define FAR_TBB_EVALSTENCILS_CONTROLLER_H
#include "../version.h"
#include "../osd/cpuEvalStencilsContext.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
///
/// \brief CPU stencils evaluation controller
///
/// CpuStencilsController is a compute controller class to launch
/// single threaded CPU stencil evalution kernels.
///
/// Controller entities execute requests from Context instances that they share
/// common interfaces with. Controllers are attached to discrete compute devices
/// and share the devices resources with Context entities.
///
class TbbEvalStencilsController {
public:
/// \brief Constructor.
///
/// @param numThreads specifies how many openmp parallel threads to use.
/// -1 attempts to use all available processors.
///
TbbEvalStencilsController(int numThreads=-1);
/// \brief Destructor.
~TbbEvalStencilsController();
/// \brief Applies stencil weights to the control vertex data
///
/// Applies the stencil weights to the control vertex data to evaluate the
/// interpolated limit positions at the parametric locations of the stencils
///
/// @param context the CpuEvalStencilsContext with the stencil weights
///
/// @param controlDataDesc vertex buffer descriptor for the control vertex data
///
/// @param controlVertices vertex buffer with the control vertices data
///
/// @param outputDataDesc vertex buffer descriptor for the output vertex data
///
/// @param outputData output vertex buffer for the interpolated data
///
template<class CONTROL_BUFFER, class OUTPUT_BUFFER>
int UpdateValues( CpuEvalStencilsContext * context,
VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices,
VertexBufferDescriptor const & outputDataDesc, OUTPUT_BUFFER *outputData ) {
if (not context->GetStencilTables()->GetNumStencils())
return 0;
bindControlData( controlDataDesc, controlVertices );
bindOutputData( outputDataDesc, outputData );
int n = _UpdateValues( context );
unbind();
return n;
}
/// \brief Applies derivative stencil weights to the control vertex data
///
/// Computes the U and V derivative stencils to the control vertex data at
/// the parametric locations contained in each stencil
///
/// @param context the CpuEvalStencilsContext with the stencil weights
///
/// @param controlDataDesc vertex buffer descriptor for the control vertex data
///
/// @param controlVertices vertex buffer with the control vertices data
///
/// @param outputDuDesc vertex buffer descriptor for the U derivative output data
///
/// @param outputDuData output vertex buffer for the U derivative data
///
/// @param outputDvDesc vertex buffer descriptor for the V deriv output data
///
/// @param outputDvData output vertex buffer for the V derivative data
///
template<class CONTROL_BUFFER, class OUTPUT_BUFFER>
int UpdateDerivs( CpuEvalStencilsContext * context,
VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices,
VertexBufferDescriptor const & outputDuDesc, OUTPUT_BUFFER *outputDuData,
VertexBufferDescriptor const & outputDvDesc, OUTPUT_BUFFER *outputDvData ) {
if (not context->GetStencilTables()->GetNumStencils())
return 0;
bindControlData( controlDataDesc, controlVertices );
bindOutputDerivData( outputDuDesc, outputDuData, outputDvDesc, outputDvData );
int n = _UpdateDerivs( context );
unbind();
return n;
}
/// Waits until all running subdivision kernels finish.
void Synchronize();
protected:
/// \brief Binds control vertex data buffer
template<class VERTEX_BUFFER>
void bindControlData(VertexBufferDescriptor const & controlDataDesc, VERTEX_BUFFER *controlData ) {
_currentBindState.controlData = controlData ? controlData->BindCpuBuffer() : 0;
_currentBindState.controlDataDesc = controlDataDesc;
}
/// \brief Binds output vertex data buffer
template<class VERTEX_BUFFER>
void bindOutputData( VertexBufferDescriptor const & outputDataDesc, VERTEX_BUFFER *outputData ) {
_currentBindState.outputData = outputData ? outputData->BindCpuBuffer() : 0;
_currentBindState.outputDataDesc = outputDataDesc;
}
/// \brief Binds output derivative vertex data buffer
template<class VERTEX_BUFFER>
void bindOutputDerivData( VertexBufferDescriptor const & outputDuDesc, VERTEX_BUFFER *outputDu,
VertexBufferDescriptor const & outputDvDesc, VERTEX_BUFFER *outputDv ) {
_currentBindState.outputUDeriv = outputDu ? outputDu ->BindCpuBuffer() : 0;
_currentBindState.outputVDeriv = outputDv ? outputDv->BindCpuBuffer() : 0;
_currentBindState.outputDuDesc = outputDuDesc;
_currentBindState.outputDvDesc = outputDvDesc;
}
/// \brief Unbinds any previously bound vertex and varying data buffers.
void unbind() {
_currentBindState.Reset();
}
private:
int _UpdateValues( CpuEvalStencilsContext * context );
int _UpdateDerivs( CpuEvalStencilsContext * context );
int _numThreads;
// Bind state is a transitional state during refinement.
// It doesn't take an ownership of vertex buffers.
struct BindState {
BindState() : controlData(0), outputData(0), outputUDeriv(0), outputVDeriv(0) { }
void Reset() {
controlData = outputData = outputUDeriv = outputVDeriv = NULL;
controlDataDesc.Reset();
outputDataDesc.Reset();
outputDuDesc.Reset();
outputDvDesc.Reset();
}
// transient mesh data
VertexBufferDescriptor controlDataDesc,
outputDataDesc,
outputDuDesc,
outputDvDesc;
float * controlData,
* outputData,
* outputUDeriv,
* outputVDeriv;
};
BindState _currentBindState;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // FAR_TBB_EVALSTENCILS_CONTROLLER_H

View File

@ -1,5 +1,5 @@
//
// Copyright 2013 Pixar
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
@ -22,46 +22,51 @@
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_COMPUTE_CONTROLLER_H
#define OSD_COMPUTE_CONTROLLER_H
#include "../osd/tbbEvaluator.h"
#include "../osd/tbbKernel.h"
#include "../version.h"
#include <tbb/task_scheduler_init.h>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/*!
\page sequence_page API sequence diagrams
/* static */
bool
TbbEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const unsigned char * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
if (end <= start) return true;
This section describes the typical sequence of initialization and drawing
animated prims using OpenSubdiv.
TbbEvalStencils(src, srcDesc, dst, dstDesc,
sizes, offsets, indices, weights, start, end);
\section init_sec Initialize
return true;
}
\image html OsdCreateSequence.png
/* static */
void
TbbEvaluator::Synchronize(void *) {
}
\section draw_sec Refine and Draw
\image html OsdRefineDrawSequence.png
*/
// XXX: do we really need this base class?
class ComputeController {
public:
virtual ~ComputeController() {}
protected:
ComputeController() {}
};
/* static */
void
TbbEvaluator::SetNumThreads(int numThreads) {
if (numThreads == -1) {
tbb::task_scheduler_init init;
} else {
tbb::task_scheduler_init init(numThreads);
}
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_COMPUTE_CONTROLLER_H

View File

@ -0,0 +1,112 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV_OSD_TBB_EVALUATOR_H
#define OPENSUBDIV_OSD_TBB_EVALUATOR_H
#include "../version.h"
#include "../osd/vertexDescriptor.h"
#include <cstddef>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
class TbbEvaluator {
public:
/// \brief Generic static stencil eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
///
/// @param instance not used in the tbb kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the tbb kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
TbbEvaluator const *instance = NULL,
void *deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcVertexBuffer->BindCpuBuffer(),
srcDesc,
dstVertexBuffer->BindCpuBuffer(),
dstDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const unsigned char *sizes,
const int *offsets,
const int *indices,
const float *weights,
int start,
int end);
static void Synchronize(void *deviceContext = NULL);
static void SetNumThreads(int numThreads);
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV_OSD_TBB_EVALUATOR_H

View File

@ -161,15 +161,15 @@ public:
};
void
TbbComputeStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end) {
TbbEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end) {
if (start > 0) {
sizes += start;

View File

@ -22,8 +22,8 @@
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_TBB_KERNEL_H
#define OSD_TBB_KERNEL_H
#ifndef OPENSUBDIV_OSD_TBB_KERNEL_H
#define OPENSUBIDV_OSD_TBB_KERNEL_H
#include "../version.h"
@ -35,15 +35,15 @@ namespace Osd {
struct VertexBufferDescriptor;
void
TbbComputeStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end);
TbbEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
unsigned char const * sizes,
int const * offsets,
int const * indices,
float const * weights,
int start, int end);
} // end namespace Osd
@ -52,4 +52,4 @@ using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OSD_TBB_KERNEL_H
#endif // OPENSUBDIV_OSD_TBB_KERNEL_H

View File

@ -46,9 +46,9 @@ GLFWwindow* g_window=0;
#include <stdio.h>
#include <cassert>
#include <osd/cpuEvaluator.h>
#include <osd/cpuVertexBuffer.h>
#include <osd/cpuComputeController.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuGLVertexBuffer.h>
@ -268,17 +268,10 @@ static int
checkMeshCPU( FarTopologyRefiner *refiner,
const std::vector<xyzVV>& coarseverts,
xyzmesh * refmesh) {
static Osd::CpuComputeController *controller =
new Osd::CpuComputeController();
Far::StencilTables const *vertexStencils;
Far::StencilTables const *varyingStencils;
buildStencilTables(*refiner, &vertexStencils, &varyingStencils);
Osd::CpuComputeContext *context = Osd::CpuComputeContext::Create(
vertexStencils, varyingStencils);
assert(coarseverts.size() == (size_t)refiner->GetNumVerticesTotal());
@ -287,12 +280,16 @@ checkMeshCPU( FarTopologyRefiner *refiner,
vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size() );
controller->Compute( context, vb );
Osd::CpuEvaluator::EvalStencils(
vb,
Osd::VertexBufferDescriptor(0, 3, 3),
vb,
Osd::VertexBufferDescriptor(refiner->GetNumVertices(0)*3, 3, 3),
vertexStencils);
int result = checkVertexBuffer(*refiner, refmesh, vb->BindCpuBuffer(),
vb->GetNumElements());
delete context;
delete vertexStencils;
delete varyingStencils;
delete vb;
@ -305,28 +302,26 @@ static int
checkMeshCPUGL(FarTopologyRefiner *refiner,
const std::vector<xyzVV>& coarseverts,
xyzmesh * refmesh) {
static Osd::CpuComputeController *controller =
new Osd::CpuComputeController();
Far::StencilTables const *vertexStencils;
Far::StencilTables const *varyingStencils;
buildStencilTables(*refiner, &vertexStencils, &varyingStencils);
Osd::CpuComputeContext *context = Osd::CpuComputeContext::Create(
vertexStencils, varyingStencils);
Osd::CpuGLVertexBuffer *vb = Osd::CpuGLVertexBuffer::Create(3,
refiner->GetNumVerticesTotal());
vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size() );
controller->Compute( context, vb );
Osd::CpuEvaluator::EvalStencils(
vb,
Osd::VertexBufferDescriptor(0, 3, 3),
vb,
Osd::VertexBufferDescriptor(refiner->GetNumVertices(0)*3, 3, 3),
vertexStencils);
int result = checkVertexBuffer(*refiner, refmesh,
vb->BindCpuBuffer(), vb->GetNumElements());
delete context;
delete vertexStencils;
delete varyingStencils;
delete vb;

View File

@ -32,8 +32,7 @@
#include <opensubdiv/far/topologyRefinerFactory.h>
#include <opensubdiv/far/stencilTablesFactory.h>
#include <opensubdiv/osd/cpuComputeContext.h>
#include <opensubdiv/osd/cpuComputeController.h>
#include <opensubdiv/osd/cpuEvaluator.h>
#include <opensubdiv/osd/cpuVertexBuffer.h>
#include <cstdio>
@ -73,11 +72,10 @@ int main(int, char **) {
nCoarseVerts=0,
nRefinedVerts=0;
Osd::CpuComputeContext * context=0;
//
// Setup phase
//
Far::StencilTables const * stencilTables = NULL;
{ // Setup Context
Far::TopologyRefiner const * refiner = createTopologyRefiner(maxlevel);
@ -87,24 +85,15 @@ int main(int, char **) {
options.generateOffsets=true;
options.generateIntermediateLevels=false;
Far::StencilTables const * stencilTables =
Far::StencilTablesFactory::Create(*refiner, options);
// Create an Osd Compute Context from the stencil tables
context = Osd::CpuComputeContext::Create(stencilTables,
/*vayingStencil=*/NULL);
stencilTables = Far::StencilTablesFactory::Create(*refiner, options);
nCoarseVerts = refiner->GetNumVertices(0);
nRefinedVerts = stencilTables->GetNumStencils();
// We are done with Far: cleanup tables
delete refiner;
delete stencilTables;
}
// Setup Controller
Osd::CpuComputeController controller;
// Setup a buffer for vertex primvar data:
Osd::CpuVertexBuffer * vbuffer =
Osd::CpuVertexBuffer::Create(3, nCoarseVerts + nRefinedVerts);
@ -117,8 +106,14 @@ int main(int, char **) {
// and update every time control data changes
vbuffer->UpdateData(g_verts, 0, nCoarseVerts);
Osd::VertexBufferDescriptor srcDesc(0, 3, 3);
Osd::VertexBufferDescriptor dstDesc(nCoarseVerts*3, 3, 3);
// Launch the computation
controller.Compute(context, vbuffer);
Osd::CpuEvaluator::EvalStencils(vbuffer, srcDesc,
vbuffer, dstDesc,
stencilTables);
}
{ // Visualization with Maya : print a MEL script that generates particles
@ -133,8 +128,8 @@ int main(int, char **) {
printf("-c 1;\n");
}
delete stencilTables;
delete vbuffer;
delete context;
}
//------------------------------------------------------------------------------