Osd API refactor: EvalStencils and EvalPatches

Add EvalStencils and EvalPatches API for most of CPU and GPU evaluators.

with this change, Eval API in the osd layer consists of following parts:

- Evaluators (Cpu, Omp, Tbb, Cuda, CL, GLXFB, GLCompute, D3D11Compute)
  implements EvalStencils and EvalPatches(*). Both supports derivatives
  (not fully implemented though)

- Interop vertex buffer classes (optional, same as before)
  Note that these classes are not necessary to use Evaluators.
  All evaluators have EvalStencils/Patches which take device-specific
  buffer objects. For example, GLXFBEvaluator can take GLuint directly
  for both stencil tables and input primvars. Although using these
  interop classes makes it easy to integrate osd into relatively
  simple applications.

- device-dependent StencilTable and PatchTable (optional)
  These are also optional, but can be used simply a substitute of
  Far::StencilTable and Far::PatchTable for osd evaluators.

- PatchArray, PatchCoord, PatchParam
  They are tiny structs used for GPU based patch evaluation.

(*) TODO and known issues:
- CLEvaluator and D3D11Evaluator's EvalPatches() have not been implemented.
- GPU Gregory patch evaluation has not been implemented in EvalPatches().
- CudaEvaluator::EvalPatches() is very unstable.
- All patch evaluation kernels have not been well optimized.
- Currently GLXFB kernel doesn't support derivative evaluation.
   There's a technical difficulty for the multi-stream output.
This commit is contained in:
Takahito Tejima 2015-05-25 21:51:55 -07:00
parent d335c7249e
commit 541aeddd3a
49 changed files with 5816 additions and 1038 deletions

6
examples/dxPtexViewer/dxPtexViewer.cpp Executable file → Normal file
View File

@ -819,7 +819,7 @@ createOsdMesh(int level, int kernel) {
//------------------------------------------------------------------------------
static void
bindProgram(Effect effect, OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch) {
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
@ -984,7 +984,7 @@ drawModel() {
UINT hOffsets = 0;
g_pd3dDeviceContext->IASetVertexBuffers(0, 1, &buffer, &hStrides, &hOffsets);
OpenSubdiv::Osd::D3D11PatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
g_pd3dDeviceContext->IASetIndexBuffer(
@ -993,7 +993,7 @@ drawModel() {
// patch drawing
for (int i = 0; i < (int)patches.size(); ++i) {
OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();

View File

@ -630,7 +630,7 @@ ShaderCache g_shaderCache;
//------------------------------------------------------------------------------
static void
bindProgram(Effect effect, OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch) {
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
typedef OpenSubdiv::Far::PatchDescriptor Descriptor;
@ -822,7 +822,7 @@ display() {
UINT hOffsets = 0;
g_pd3dDeviceContext->IASetVertexBuffers(0, 1, &buffer, &hStrides, &hOffsets);
OpenSubdiv::Osd::D3D11PatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
g_pd3dDeviceContext->IASetIndexBuffer(
@ -834,7 +834,7 @@ display() {
int numDrawCalls = 0;
for (int i=0; i<(int)patches.size(); ++i) {
OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();

630
examples/glEvalLimit/glEvalLimit.cpp Executable file → Normal file
View File

@ -44,15 +44,54 @@ GLFWmonitor* g_primary=0;
#include <osd/cpuEvaluator.h>
#include <osd/cpuVertexBuffer.h>
#include <osd/cpuPatchTable.h>
#include <osd/cpuGLVertexBuffer.h>
#include <osd/mesh.h>
#ifdef OPENSUBDIV_HAS_TBB
#include <osd/tbbEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_OPENMP
#include <osd/ompEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_CUDA
#include <osd/cudaEvaluator.h>
#include <osd/cudaVertexBuffer.h>
#include <osd/cudaGLVertexBuffer.h>
#include <osd/cudaPatchTable.h>
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clVertexBuffer.h>
#include <osd/clGLVertexBuffer.h>
#include <osd/clEvaluator.h>
#include <osd/clPatchTable.h>
#include "../common/clDeviceContext.h"
CLDeviceContext g_clDeviceContext;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
#include <osd/glXFBEvaluator.h>
#include <osd/glVertexBuffer.h>
#include <osd/glPatchTable.h>
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
#include <osd/glComputeEvaluator.h>
#include <osd/glVertexBuffer.h>
#include <osd/glPatchTable.h>
#endif
#include <far/gregoryBasis.h>
#include <far/endCapGregoryBasisPatchFactory.h>
#include <far/topologyRefiner.h>
#include <far/stencilTableFactory.h>
#include <far/patchTableFactory.h>
#include <far/patchMap.h>
#include <far/error.h>
@ -75,32 +114,44 @@ GLFWmonitor* g_primary=0;
using namespace OpenSubdiv;
//------------------------------------------------------------------------------
enum KernelType { kCPU = 0,
kOPENMP = 1,
kTBB = 2,
kCUDA = 3,
kCL = 4,
kGLXFB = 5,
kGLCompute = 6 };
enum EndCap { kEndCapBSplineBasis,
kEndCapGregoryBasis };
enum DrawMode { kUV,
kVARYING,
kNORMAL,
kSHADE,
kFACEVARYING };
std::vector<float> g_orgPositions,
g_positions,
g_varyingColors;
int g_currentShape = 0,
g_level = 3,
g_kernel = kCPU,
g_endCap = kEndCapBSplineBasis,
g_numElements = 3;
std::vector<int> g_coarseEdges;
std::vector<float> g_coarseEdgeSharpness;
std::vector<float> g_coarseVertexSharpness;
enum DrawMode { kRANDOM=0,
kUV,
kVARYING,
kNORMAL,
kSHADE,
kFACEVARYING };
int g_running = 1,
g_width = 1024,
g_height = 1024,
g_fullscreen = 0,
g_drawCageEdges = 1,
g_drawCageVertices = 1,
g_drawMode = kVARYING,
g_drawMode = kUV,
g_prev_x = 0,
g_prev_y = 0,
g_mbutton[3] = {0, 0, 0},
@ -131,11 +182,9 @@ float g_computeTime = 0;
Stopwatch g_fpsTimer;
//------------------------------------------------------------------------------
int g_nparticles=0,
g_nsamples=101,
g_nsamplesFound=0;
int g_nParticles = 65536;
bool g_randomStart=false;
bool g_randomStart = true;//false;
GLuint g_cageEdgeVAO = 0,
g_cageEdgeVBO = 0,
@ -155,6 +204,7 @@ struct Program {
GLuint attrColor;
GLuint attrTangentU;
GLuint attrTangentV;
GLuint attrPatchCoord;
} g_defaultProgram;
//------------------------------------------------------------------------------
@ -204,37 +254,189 @@ createCoarseMesh(OpenSubdiv::Far::TopologyRefiner const & refiner) {
}
//------------------------------------------------------------------------------
Far::TopologyRefiner * g_topologyRefiner = 0;
Far::StencilTable const * g_vertexStencils = NULL;
Far::StencilTable const * g_varyingStencils = NULL;
Far::PatchTable const * g_patchTable = NULL;
Far::PatchMap const * g_patchMap = NULL;
std::vector<Osd::PatchCoord> g_patchCoords;
Osd::VertexBufferDescriptor g_idesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 3),
g_odesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
g_vdesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
g_duDesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
g_dvDesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
g_fvidesc(/*offset*/ 0, /*legnth*/ 2, /*stride*/ 2),
g_fvodesc(/*offset*/ 3, /*legnth*/ 2, /*stride*/ 6);
// input and output vertex data
class EvalOutputBase {
public:
virtual ~EvalOutputBase() {}
virtual GLuint BindVertexData() const = 0;
virtual GLuint BindDerivatives() const = 0;
virtual GLuint BindPatchCoords() const = 0;
virtual void UpdateData(const float *src, int startVertex, int numVertices) = 0;
virtual void UpdateVaryingData(const float *src, int startVertex, int numVertices) = 0;
virtual void Refine() = 0;
virtual void EvalPatches() = 0;
virtual void EvalPatchesWithDerivatives() = 0;
virtual void EvalPatchesVarying() = 0;
virtual void UpdatePatchCoords(
std::vector<Osd::PatchCoord> const &patchCoords) = 0;
};
// input vertex data (coarse + refined)
Osd::CpuVertexBuffer * g_vertexData = 0;
Osd::CpuVertexBuffer * g_varyingData = 0;
// note: Since we don't have a class for device-patchcoord container in osd,
// we cheat to use vertexbuffer as a patch-coord (5int) container.
//
// Please don't follow the pattern in your actual application.
//
template<typename SRC_VERTEX_BUFFER, typename EVAL_VERTEX_BUFFER,
typename STENCIL_TABLE, typename PATCH_TABLE, typename EVALUATOR,
typename DEVICE_CONTEXT = void>
class EvalOutput : public EvalOutputBase {
public:
typedef OpenSubdiv::Osd::EvaluatorCacheT<EVALUATOR> EvaluatorCache;
// output vertex data (limit locations)
Osd::CpuGLVertexBuffer * g_outVertexData = NULL;
Osd::CpuGLVertexBuffer * g_outDerivatives = NULL;
EvalOutput(Far::StencilTable const *vertexStencils,
Far::StencilTable const *varyingStencils,
int numCoarseVerts, int numTotalVerts, int numParticles,
Far::PatchTable const *patchTable,
EvaluatorCache *evaluatorCache = NULL,
DEVICE_CONTEXT *deviceContext = NULL)
: _srcDesc( /*offset*/ 0, /*length*/ 3, /*stride*/ 3),
_srcVaryingDesc(/*offset*/ 0, /*length*/ 3, /*stride*/ 3),
_vertexDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
_varyingDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
_duDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
_dvDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
_deviceContext(deviceContext) {
_srcData = SRC_VERTEX_BUFFER::Create(3, numTotalVerts, _deviceContext);
_srcVaryingData = SRC_VERTEX_BUFFER::Create(3, numTotalVerts, _deviceContext);
_vertexData = EVAL_VERTEX_BUFFER::Create(6, numParticles, _deviceContext);
_derivatives = EVAL_VERTEX_BUFFER::Create(6, numParticles, _deviceContext);
_patchTable = PATCH_TABLE::Create(patchTable, _deviceContext);
_patchCoords = NULL;
_numCoarseVerts = numCoarseVerts;
_vertexStencils =
Osd::convertToCompatibleStencilTable<STENCIL_TABLE>(vertexStencils, _deviceContext);
_varyingStencils =
Osd::convertToCompatibleStencilTable<STENCIL_TABLE>(varyingStencils, _deviceContext);
_evaluatorCache = evaluatorCache;
}
~EvalOutput() {
delete _srcData;
delete _srcVaryingData;
delete _vertexData;
delete _derivatives;
delete _patchTable;
delete _patchCoords;
}
virtual GLuint BindVertexData() const {
return _vertexData->BindVBO();
}
virtual GLuint BindDerivatives() const {
return _derivatives->BindVBO();
}
virtual GLuint BindPatchCoords() const {
return _patchCoords->BindVBO();
}
virtual void UpdateData(const float *src, int startVertex, int numVertices) {
_srcData->UpdateData(src, startVertex, numVertices, _deviceContext);
}
virtual void UpdateVaryingData(const float *src, int startVertex, int numVertices) {
_srcVaryingData->UpdateData(src, startVertex, numVertices, _deviceContext);
}
virtual void Refine() {
Osd::VertexBufferDescriptor dstDesc = _srcDesc;
dstDesc.offset += _numCoarseVerts * _srcDesc.stride;
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
_evaluatorCache, _srcDesc, dstDesc, _deviceContext);
EVALUATOR::EvalStencils(_srcData, _srcDesc,
_srcData, dstDesc,
_vertexStencils,
evalInstance,
_deviceContext);
dstDesc = _srcVaryingDesc;
dstDesc.offset += _numCoarseVerts * _srcVaryingDesc.stride;
evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
_evaluatorCache, _srcVaryingDesc, dstDesc, _deviceContext);
EVALUATOR::EvalStencils(_srcVaryingData, _srcVaryingDesc,
_srcVaryingData, dstDesc,
_varyingStencils,
evalInstance,
_deviceContext);
}
virtual void EvalPatches() {
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
_evaluatorCache, _srcDesc, _vertexDesc, _deviceContext);
EVALUATOR::EvalPatches(
_srcData, _srcDesc,
_vertexData, _vertexDesc,
_patchCoords->GetNumVertices(),
_patchCoords,
_patchTable, evalInstance, _deviceContext);
}
virtual void EvalPatchesWithDerivatives() {
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
_evaluatorCache, _srcDesc, _vertexDesc, _deviceContext);
EVALUATOR::EvalPatches(
_srcData, _srcDesc,
_vertexData, _vertexDesc,
_derivatives, _duDesc,
_derivatives, _dvDesc,
_patchCoords->GetNumVertices(),
_patchCoords,
_patchTable, evalInstance, _deviceContext);
}
virtual void EvalPatchesVarying() {
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
_evaluatorCache, _srcVaryingDesc, _varyingDesc, _deviceContext);
EVALUATOR::EvalPatches(
_srcVaryingData, _srcVaryingDesc,
// varyingdata is interleved in vertexData.
_vertexData, _varyingDesc,
_patchCoords->GetNumVertices(),
_patchCoords,
_patchTable, evalInstance, _deviceContext);
}
virtual void UpdatePatchCoords(
std::vector<Osd::PatchCoord> const &patchCoords) {
if (_patchCoords and
_patchCoords->GetNumVertices() != (int)patchCoords.size()) {
delete _patchCoords;
_patchCoords = NULL;
}
if (not _patchCoords) {
_patchCoords = EVAL_VERTEX_BUFFER::Create(5,
(int)patchCoords.size(),
_deviceContext);
}
_patchCoords->UpdateData((float*)&patchCoords[0], 0, (int)patchCoords.size(), _deviceContext);
}
private:
SRC_VERTEX_BUFFER *_srcData;
SRC_VERTEX_BUFFER *_srcVaryingData;
EVAL_VERTEX_BUFFER *_vertexData;
EVAL_VERTEX_BUFFER *_derivatives;
EVAL_VERTEX_BUFFER *_varyingData;
EVAL_VERTEX_BUFFER *_patchCoords;
PATCH_TABLE *_patchTable;
Osd::VertexBufferDescriptor _srcDesc;
Osd::VertexBufferDescriptor _srcVaryingDesc;
Osd::VertexBufferDescriptor _vertexDesc;
Osd::VertexBufferDescriptor _varyingDesc;
Osd::VertexBufferDescriptor _duDesc;
Osd::VertexBufferDescriptor _dvDesc;
int _numCoarseVerts;
STENCIL_TABLE const *_vertexStencils;
STENCIL_TABLE const *_varyingStencils;
EvaluatorCache *_evaluatorCache;
DEVICE_CONTEXT *_deviceContext;
};
EvalOutputBase *g_evalOutput = NULL;
STParticles * g_particles=0;
//------------------------------------------------------------------------------
static void
updateGeom() {
int nverts = (int)g_orgPositions.size() / 3;
const float *p = &g_orgPositions[0];
@ -255,28 +457,18 @@ updateGeom() {
Stopwatch s;
s.Start();
g_vertexData->UpdateData( &g_positions[0], 0, nverts);
// update coarse vertices
g_evalOutput->UpdateData(&g_positions[0], 0, nverts);
if (! g_topologyRefiner) return;
// update coarse varying
if (g_drawMode == kVARYING) {
g_evalOutput->UpdateVaryingData(&g_varyingColors[0], 0, nverts);
// note that for patch eval we need coarse+refined combined buffer.
int nCoarseVertices = g_topologyRefiner->GetLevel(0).GetNumVertices();
Osd::CpuEvaluator::EvalStencils(g_vertexData,
Osd::VertexBufferDescriptor(0, 3, 3),
g_vertexData,
Osd::VertexBufferDescriptor(
nCoarseVertices*3, 3, 3),
g_vertexStencils);
if (g_varyingData) {
Osd::CpuEvaluator::EvalStencils(g_varyingData,
Osd::VertexBufferDescriptor(0, 3, 3),
g_varyingData,
Osd::VertexBufferDescriptor(
nCoarseVertices*3, 3, 3),
g_varyingStencils);
}
// Refine
g_evalOutput->Refine();
s.Stop();
g_computeTime = float(s.GetElapsed() * 1000.0f);
@ -287,60 +479,28 @@ updateGeom() {
// Apply 'dynamics' update
assert(g_particles);
g_particles->Update(g_evalTime); // XXXX g_evalTime is not really elapsed time...
std::vector<OpenSubdiv::Osd::PatchCoord> const &patchCoords
= g_particles->GetPatchCoords();
// resolve particle positions into patch handles
// XXX: this process should be handled by OsdKernel in parallel
g_patchCoords.clear();
for (int i = 0; i < g_particles->GetNumParticles(); ++i) {
STParticles::Position const &position = g_particles->GetPositions()[i];
Far::PatchTable::PatchHandle const *handle =
g_patchMap->FindPatch(position.ptexIndex, position.s, position.t);
if (handle) {
g_patchCoords.push_back(Osd::PatchCoord(
*handle, position.s, position.t));
}
}
// update patchcoord to be evaluated
g_evalOutput->UpdatePatchCoords(patchCoords);
// Evaluate the positions of the samples on the limit surface
if (g_drawMode == kNORMAL || g_drawMode == kSHADE) {
// evaluate positions and derivatives
g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(
g_vertexData, g_idesc,
g_outVertexData, g_odesc,
g_outDerivatives, g_duDesc,
g_outDerivatives, g_dvDesc,
(int)g_patchCoords.size(),
&g_patchCoords[0],
g_patchTable, NULL);
g_evalOutput->EvalPatchesWithDerivatives();
} else {
// evaluate positions
g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(
g_vertexData, g_idesc,
g_outVertexData, g_odesc,
(int)g_patchCoords.size(),
&g_patchCoords[0],
g_patchTable, NULL);
g_evalOutput->EvalPatches();
}
// color
if (g_drawMode == kUV) {
// store patchCoords as colors
float *p = g_outVertexData->BindCpuBuffer() + g_vdesc.offset;
for (int i = 0; i < (int)g_patchCoords.size(); ++i) {
p[0] = g_patchCoords[i].s;
p[1] = g_patchCoords[i].t;
p[2] = 0;
p += g_vdesc.stride;
}
} else if (g_drawMode == kVARYING) {
if (g_drawMode == kVARYING) {
// XXX: is this really varying?
Osd::CpuEvaluator::EvalPatches(g_varyingData, g_idesc,
g_outVertexData, g_vdesc,
(int)g_patchCoords.size(),
&g_patchCoords[0],
g_patchTable, NULL);
g_evalOutput->EvalPatchesVarying();
}
s.Stop();
@ -359,8 +519,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape);
OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape);
delete g_topologyRefiner;
g_topologyRefiner =
Far::TopologyRefiner *topologyRefiner =
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Create(*shape,
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Options(sdctype, sdcoptions));
@ -371,22 +530,16 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
float speed = g_particles ? g_particles->GetSpeed() : 0.2f;
// Create the 'uv particles' manager - this class manages the limit
// location samples (ptex face index, (s,t) and updates them between frames.
// Note: the number of limit locations can be entirely arbitrary
delete g_particles;
g_particles = new STParticles(*g_topologyRefiner, g_nsamples, !g_randomStart);
g_nparticles = g_particles->GetNumParticles();
g_particles->SetSpeed(speed);
createCoarseMesh(*g_topologyRefiner);
createCoarseMesh(*topologyRefiner);
Far::StencilTable const * vertexStencils = NULL;
Far::StencilTable const * varyingStencils = NULL;
int nverts=0;
{
// Apply feature adaptive refinement to the mesh so that we can use the
// limit evaluation API features.
Far::TopologyRefiner::AdaptiveOptions options(level);
g_topologyRefiner->RefineAdaptive(options);
topologyRefiner->RefineAdaptive(options);
// Generate stencil table to update the bi-cubic patches control
// vertices after they have been re-posed (both for vertex & varying
@ -395,27 +548,33 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
soptions.generateOffsets=true;
soptions.generateIntermediateLevels=true;
Far::StencilTable const * vertexStencils =
Far::StencilTableFactory::Create(*g_topologyRefiner, soptions);
vertexStencils =
Far::StencilTableFactory::Create(*topologyRefiner, soptions);
soptions.interpolationMode = Far::StencilTableFactory::INTERPOLATE_VARYING;
Far::StencilTable const * varyingStencils =
Far::StencilTableFactory::Create(*g_topologyRefiner, soptions);
varyingStencils =
Far::StencilTableFactory::Create(*topologyRefiner, soptions);
// Generate bi-cubic patch table for the limit surface
Far::PatchTableFactory::Options poptions;
poptions.SetEndCapType(
Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS);
if (g_endCap == kEndCapBSplineBasis) {
poptions.SetEndCapType(
Far::PatchTableFactory::Options::ENDCAP_BSPLINE_BASIS);
} else {
poptions.SetEndCapType(
Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS);
}
Far::PatchTable const * patchTable =
Far::PatchTableFactory::Create(*g_topologyRefiner, poptions);
Far::PatchTableFactory::Create(*topologyRefiner, poptions);
// append endcap stencils
if (Far::StencilTable const *endCapVertexStencilTable =
patchTable->GetEndCapVertexStencilTable()) {
Far::StencilTable const *table =
Far::StencilTableFactory::AppendEndCapStencilTable(
*g_topologyRefiner,
*topologyRefiner,
vertexStencils, endCapVertexStencilTable);
delete vertexStencils;
vertexStencils = table;
@ -424,7 +583,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
patchTable->GetEndCapVaryingStencilTable()) {
Far::StencilTable const *table =
Far::StencilTableFactory::AppendEndCapStencilTable(
*g_topologyRefiner,
*topologyRefiner,
varyingStencils, endCapVaryingStencilTable);
delete varyingStencils;
varyingStencils = table;
@ -434,45 +593,100 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
nverts = vertexStencils->GetNumControlVertices() +
vertexStencils->GetNumStencils();
if (g_vertexStencils) delete g_vertexStencils;
g_vertexStencils = vertexStencils;
if (g_varyingStencils) delete g_varyingStencils;
g_varyingStencils = varyingStencils;
if (g_patchTable) delete g_patchTable;
g_patchTable = patchTable;
// Create a far patch map
if (g_patchMap) delete g_patchMap;
g_patchMap = new Far::PatchMap(*g_patchTable);
}
{ // Create vertex primvar buffer for the CVs
delete g_vertexData;
g_vertexData = Osd::CpuVertexBuffer::Create(3, nverts);
// note that for patch eval we need coarse+refined combined buffer.
int nCoarseVertices = topologyRefiner->GetLevel(0).GetNumVertices();
// Create varying primvar buffer for the CVs with random colors.
// These are immediately interpolated (once) and saved for display.
delete g_varyingData; g_varyingData = 0;
if (g_drawMode==kVARYING) {
g_varyingData = Osd::CpuVertexBuffer::Create(3, nverts);
g_varyingData->UpdateData(
&g_varyingColors[0], 0, (int)g_varyingColors.size()/3 );
}
// Create output buffers for the limit samples (position & tangents)
delete g_outVertexData;
g_outVertexData = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
memset(g_outVertexData->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
if (g_drawMode==kRANDOM) {
createRandomColors(g_nparticles, 6, g_outVertexData->BindCpuBuffer()+3);
}
delete g_outDerivatives;
g_outDerivatives = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
memset(g_outDerivatives->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
delete g_evalOutput;
if (g_kernel == kCPU) {
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
Osd::CpuGLVertexBuffer,
Far::StencilTable,
Osd::CpuPatchTable,
Osd::CpuEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable);
#ifdef OPENSUBDIV_HAS_OPENMP
} else if (g_kernel == kOPENMP) {
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
Osd::CpuGLVertexBuffer,
Far::StencilTable,
Osd::CpuPatchTable,
Osd::OmpEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable);
#endif
#ifdef OPENSUBDIV_HAS_TBB
} else if (g_kernel == kTBB) {
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
Osd::CpuGLVertexBuffer,
Far::StencilTable,
Osd::CpuPatchTable,
Osd::TbbEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if (g_kernel == kCUDA) {
g_evalOutput = new EvalOutput<Osd::CudaVertexBuffer,
Osd::CudaGLVertexBuffer,
Osd::CudaStencilTable,
Osd::CudaPatchTable,
Osd::CudaEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
} else if (g_kernel == kCL) {
static Osd::EvaluatorCacheT<Osd::CLEvaluator> clEvaluatorCache;
g_evalOutput = new EvalOutput<Osd::CLVertexBuffer,
Osd::CLGLVertexBuffer,
Osd::CLStencilTable,
Osd::CLPatchTable,
Osd::CLEvaluator,
CLDeviceContext>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable,
&clEvaluatorCache, &g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
} else if (g_kernel == kGLXFB) {
static Osd::EvaluatorCacheT<Osd::GLXFBEvaluator> glXFBEvaluatorCache;
g_evalOutput = new EvalOutput<Osd::GLVertexBuffer,
Osd::GLVertexBuffer,
Osd::GLStencilTableTBO,
Osd::GLPatchTable,
Osd::GLXFBEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable,
&glXFBEvaluatorCache);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
} else if (g_kernel == kGLCompute) {
static Osd::EvaluatorCacheT<Osd::GLComputeEvaluator> glComputeEvaluatorCache;
g_evalOutput = new EvalOutput<Osd::GLVertexBuffer,
Osd::GLVertexBuffer,
Osd::GLStencilTableSSBO,
Osd::GLPatchTable,
Osd::GLComputeEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable,
&glComputeEvaluatorCache);
#endif
}
// Create the 'uv particles' manager - this class manages the limit
// location samples (ptex face index, (s,t) and updates them between frames.
// Note: the number of limit locations can be entirely arbitrary
delete g_particles;
g_particles = new STParticles(*topologyRefiner, g_patchTable,
g_nParticles, !g_randomStart);
g_nParticles = g_particles->GetNumParticles();
g_particles->SetSpeed(speed);
updateGeom();
}
@ -505,32 +719,33 @@ linkDefaultProgram() {
"in vec3 color;\n"
"in vec3 tangentU;\n"
"in vec3 tangentV;\n"
"in vec2 patchCoord;\n"
"out vec4 fragColor;\n"
"out vec3 normal;\n"
"uniform mat4 ModelViewMatrix;\n"
"uniform mat4 ProjectionMatrix;\n"
"uniform int DrawMode;\n"
"void main() {\n"
" fragColor = vec4(color, 1);\n"
// XXX: fix the normal transform
" normal = (ModelViewMatrix * vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n"
" vec3 normal = (ModelViewMatrix * "
" vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n"
" gl_Position = ProjectionMatrix * ModelViewMatrix * "
" vec4(position, 1);\n"
" if (DrawMode == 0) {\n" // UV
" fragColor = vec4(patchCoord.x, patchCoord.y, 0, 1);\n"
" } else if (DrawMode == 2) {\n"
" fragColor = vec4(normal*0.5+vec3(0.5), 1);\n"
" } else if (DrawMode == 3) {\n"
" fragColor = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n"
" } else {\n" // varying
" fragColor = vec4(color, 1);\n"
" }\n"
"}\n";
static const char *fsSrc =
GLSL_VERSION_DEFINE
"in vec4 fragColor;\n"
"in vec3 normal;\n"
"uniform int DrawMode;\n"
"out vec4 color;\n"
"void main() {\n"
" if (DrawMode == 3) {\n"
" color = vec4(normal*0.5+vec3(0.5), 1);\n"
" } else if (DrawMode == 4) {\n"
" color = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n"
" } else {\n"
" color = fragColor;\n"
" }\n"
" color = fragColor;\n"
"}\n";
GLuint program = glCreateProgram();
@ -544,6 +759,7 @@ linkDefaultProgram() {
glBindAttribLocation(program, 1, "color");
glBindAttribLocation(program, 2, "tangentU");
glBindAttribLocation(program, 3, "tangentV");
glBindAttribLocation(program, 4, "patchCoord");
glBindFragDataLocation(program, 0, "color");
glLinkProgram(program);
@ -571,6 +787,7 @@ linkDefaultProgram() {
g_defaultProgram.attrColor = glGetAttribLocation(program, "color");
g_defaultProgram.attrTangentU = glGetAttribLocation(program, "tangentU");
g_defaultProgram.attrTangentV = glGetAttribLocation(program, "tangentV");
g_defaultProgram.attrPatchCoord = glGetAttribLocation(program, "patchCoord");
return true;
}
@ -621,6 +838,7 @@ drawCageEdges() {
glEnableVertexAttribArray(g_defaultProgram.attrColor);
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
glVertexAttribPointer(g_defaultProgram.attrPosition,
3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
glVertexAttribPointer(g_defaultProgram.attrColor,
@ -680,6 +898,7 @@ drawCageVertices() {
glEnableVertexAttribArray(g_defaultProgram.attrColor);
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
glVertexAttribPointer(g_defaultProgram.attrPosition,
3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
glVertexAttribPointer(g_defaultProgram.attrColor,
@ -711,27 +930,33 @@ drawSamples() {
glEnableVertexAttribArray(g_defaultProgram.attrTangentU);
glEnableVertexAttribArray(g_defaultProgram.attrTangentV);
glBindBuffer(GL_ARRAY_BUFFER, g_outVertexData->BindVBO());
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindVertexData());
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
glBindBuffer(GL_ARRAY_BUFFER, g_outDerivatives->BindVBO());
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindDerivatives());
glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
glVertexAttribPointer(3, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindPatchCoords());
glVertexAttribPointer(4, 2, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 5, (float*)12);
glEnableVertexAttribArray(g_defaultProgram.attrPosition);
glEnableVertexAttribArray(g_defaultProgram.attrColor);
glEnableVertexAttribArray(g_defaultProgram.attrTangentU);
glEnableVertexAttribArray(g_defaultProgram.attrTangentV);
glEnableVertexAttribArray(g_defaultProgram.attrPatchCoord);
glPointSize(2.0f);
glDrawArrays(GL_POINTS, 0, g_nparticles);
int nPatchCoords = (int)g_particles->GetPatchCoords().size();
glDrawArrays(GL_POINTS, 0, nPatchCoords);
glPointSize(1.0f);
glDisableVertexAttribArray(g_defaultProgram.attrPosition);
glDisableVertexAttribArray(g_defaultProgram.attrColor);
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
glBindVertexArray(0);
@ -789,9 +1014,10 @@ display() {
double fps = 1.0/g_fpsTimer.GetElapsed();
g_fpsTimer.Start();
int nPatchCoords = (int)g_particles->GetPatchCoords().size();
g_hud.DrawString(10, -150, "Particle Speed ([) (]): %.1f", g_particles->GetSpeed());
g_hud.DrawString(10, -120, "# Samples : (%d/%d)",
g_nsamplesFound, g_outVertexData->GetNumVertices());
g_hud.DrawString(10, -120, "# Samples : (%d / %d)", nPatchCoords, g_nParticles);
g_hud.DrawString(10, -100, "Compute : %.3f ms", g_computeTime);
g_hud.DrawString(10, -80, "Eval : %.3f ms", g_evalTime * 1000.f);
g_hud.DrawString(10, -60, "GPU Draw : %.3f ms", drawGpuTime);
@ -884,9 +1110,11 @@ void windowClose(GLFWwindow*) {
//------------------------------------------------------------------------------
static void
setSamples(bool add) {
g_nsamples += add ? 50 : -50;
g_nsamples = std::max(0, g_nsamples);
if (add) {
g_nParticles = g_nParticles * 2;
} else {
g_nParticles = std::max(1, g_nParticles / 2);
}
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
}
@ -936,6 +1164,40 @@ callbackModel(int m) {
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
}
//------------------------------------------------------------------------------
static void
callbackEndCap(int endCap) {
g_endCap = endCap;
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
}
//------------------------------------------------------------------------------
static void
callbackKernel(int k) {
g_kernel = k;
#ifdef OPENSUBDIV_HAS_OPENCL
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
if (g_clDeviceContext.Initialize() == false) {
printf("Error in initializing OpenCL\n");
exit(1);
}
}
#endif
#ifdef OPENSUBDIV_HAS_CUDA
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
if (g_cudaDeviceContext.Initialize() == false) {
printf("Error in initializing Cuda\n");
exit(1);
}
}
#endif
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
}
//------------------------------------------------------------------------------
static void
callbackLevel(int l) {
@ -1001,10 +1263,40 @@ initHUD() {
g_hud.AddCheckBox("Animate vertices (M)", g_moveScale != 0, 10, 50, callbackAnimate, 0, 'm');
g_hud.AddCheckBox("Freeze (spc)", false, 10, 70, callbackFreeze, 0, ' ');
g_hud.AddCheckBox("Random Start", false, 10, 120, callbackCentered, g_randomStart);
g_hud.AddCheckBox("Random Start", g_randomStart, 10, 120, callbackCentered, 0);
int compute_pulldown = g_hud.AddPullDown("Compute (K)", 475, 10, 300,
callbackKernel, 'k');
g_hud.AddPullDownButton(compute_pulldown, "CPU", kCPU);
#ifdef OPENSUBDIV_HAS_OPENMP
g_hud.AddPullDownButton(compute_pulldown, "OPENMP", kOPENMP);
#endif
#ifdef OPENSUBDIV_HAS_TBB
g_hud.AddPullDownButton(compute_pulldown, "TBB", kTBB);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
g_hud.AddPullDownButton(compute_pulldown, "GL XFB", kGLXFB);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
g_hud.AddPullDownButton(compute_pulldown, "GL Compute", kGLCompute);
#endif
int endcap_pulldown = g_hud.AddPullDown("End cap (E)", 10, 140, 200,
callbackEndCap, 'e');
g_hud.AddPullDownButton(endcap_pulldown, "BSpline",
kEndCapBSplineBasis,
g_endCap == kEndCapBSplineBasis);
g_hud.AddPullDownButton(endcap_pulldown, "GregoryBasis",
kEndCapGregoryBasis,
g_endCap == kEndCapGregoryBasis);
int shading_pulldown = g_hud.AddPullDown("Shading (W)", 250, 10, 250, callbackDisplayVaryingColors, 'w');
g_hud.AddPullDownButton(shading_pulldown, "Random", kRANDOM, g_drawMode==kRANDOM);
g_hud.AddPullDownButton(shading_pulldown, "(u,v)", kUV, g_drawMode==kUV);
g_hud.AddPullDownButton(shading_pulldown, "Varying", kVARYING, g_drawMode==kVARYING);
g_hud.AddPullDownButton(shading_pulldown, "Normal", kNORMAL, g_drawMode==kNORMAL);

View File

@ -25,38 +25,104 @@
#include "particles.h"
#include <far/ptexIndices.h>
#include <far/patchMap.h>
#ifdef OPENSUBDIV_HAS_TBB
#include <tbb/parallel_for.h>
#include <tbb/atomic.h>
tbb::atomic<int> g_tbbCounter;
class TbbUpdateKernel {
public:
TbbUpdateKernel(float speed,
STParticles::Position *positions,
float *velocities,
std::vector<STParticles::FaceInfo> const &adjacency,
OpenSubdiv::Osd::PatchCoord *patchCoords,
OpenSubdiv::Far::PatchMap const *patchMap) :
_speed(speed), _positions(positions), _velocities(velocities),
_adjacency(adjacency), _patchCoords(patchCoords), _patchMap(patchMap) {
}
void operator () (tbb::blocked_range<int> const &r) const {
for (int i = r.begin(); i < r.end(); ++i) {
STParticles::Position * p = _positions + i;
float *dp = _velocities + i*2;
// apply velocity
p->s += dp[0] * _speed;
p->t += dp[1] * _speed;
// make sure particles can't skip more than 1 face boundary at a time
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
// check if the particle is jumping a boundary
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
// this is not treated here.
int edge = -1;
if (p->s >= 1.0f) edge = 1;
if (p->s <= 0.0f) edge = 3;
if (p->t >= 1.0f) edge = 2;
if (p->t <= 0.0f) edge = 0;
if (edge>=0) {
// warp the particle to the other side of the boundary
STParticles::WarpParticle(_adjacency, edge, p, dp);
}
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
// resolve particle positions into patch handles
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
if (handle) {
int index = g_tbbCounter.fetch_and_add(1);
_patchCoords[index] =
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t);
}
}
}
private:
float _speed;
STParticles::Position *_positions;
float *_velocities;
std::vector<STParticles::FaceInfo> const &_adjacency;
OpenSubdiv::Osd::PatchCoord *_patchCoords;
OpenSubdiv::Far::PatchMap const *_patchMap;
};
#endif
#include <cassert>
STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered) :
STParticles::STParticles(Refiner const & refiner,
PatchTable const *patchTable,
int nParticles, bool centered) :
_speed(1.0f) {
OpenSubdiv::Far::PtexIndices ptexIndices(refiner);
int nptexfaces = ptexIndices.GetNumFaces(),
nsamples = nptexfaces * nparticles;
// Create a far patch map
_patchMap = new OpenSubdiv::Far::PatchMap(*patchTable);
int nPtexFaces = ptexIndices.GetNumFaces();
srand(static_cast<int>(2147483647));
{ // initialize positions
_positions.resize(nsamples);
_positions.resize(nParticles);
Position * pos = &_positions[0];
for (int i=0; i<nptexfaces; ++i) {
for (int j=0; j<nparticles; ++j) {
pos->ptexIndex = i;
pos->s = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
pos->t = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
++pos;
}
for (int i = 0; i < nParticles; ++i) {
pos->ptexIndex = (int)(((float)rand()/(float)RAND_MAX) * nPtexFaces);
pos->s = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
pos->t = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
++pos;
}
}
{ // initialize velocities
_velocities.resize(nsamples*2);
_velocities.resize(nParticles * 2);
for (int i=0; i<nsamples; ++i) {
for (int i = 0; i < nParticles; ++i) {
// initialize normalized random directions
float s = 2.0f*(float)rand()/(float)RAND_MAX - 1.0f,
t = 2.0f*(float)rand()/(float)RAND_MAX - 1.0f,
@ -68,7 +134,7 @@ STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered)
}
{ // initialize topology adjacency
_adjacency.resize(nptexfaces);
_adjacency.resize(nPtexFaces);
OpenSubdiv::Far::TopologyLevel const & refBaseLevel = refiner.GetLevel(0);
@ -97,42 +163,6 @@ STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered)
//std::cout << *this;
}
void
STParticles::Update(float deltaTime) {
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
Position * p = &_positions[0];
float * dp = &_velocities[0];
for (int i=0; i<GetNumParticles(); ++i, ++p, dp+=2) {
// apply velocity
p->s += dp[0] * speed;
p->t += dp[1] * speed;
// make sure particles can't skip more than 1 face boundary at a time
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
// check if the particle is jumping a boundary
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
// this is not treated here.
int edge = -1;
if (p->s >= 1.0f) edge = 1;
if (p->s <= 0.0f) edge = 3;
if (p->t >= 1.0f) edge = 2;
if (p->t <= 0.0f) edge = 0;
if (edge>=0) {
// warp the particle to the other side of the boundary
warpParticle(edge, p, dp);
}
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
}
inline void
FlipS(STParticles::Position * p, float * dp) {
p->s = 1.0f-p->s;
@ -163,6 +193,15 @@ Rotate(int rot, STParticles::Position * p, float * dp) {
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
inline void
Trim(STParticles::Position * p) {
if (p->s <0.0f) p->s = 1.0f + p->s;
if (p->s>=1.0f) p->s = p->s - 1.0f;
if (p->t <0.0f) p->t = 1.0f + p->t;
if (p->t>=1.0f) p->t = p->t - 1.0f;
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
inline void
Clamp(STParticles::Position * p) {
if (p->s<0.0f) {
@ -192,21 +231,12 @@ Bounce(int edge, STParticles::Position * p, float * dp) {
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
inline void
Trim(STParticles::Position * p) {
if (p->s <0.0f) p->s = 1.0f + p->s;
if (p->s>=1.0f) p->s = p->s - 1.0f;
if (p->t <0.0f) p->t = 1.0f + p->t;
if (p->t>=1.0f) p->t = p->t - 1.0f;
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
void
STParticles::warpParticle(int edge, Position * p, float * dp) {
assert(p->ptexIndex<(int)_adjacency.size() and (edge>=0 and edge<4));
STParticles::WarpParticle(std::vector<FaceInfo> const &adjacency,
int edge, Position * p, float * dp) {
assert(p->ptexIndex<(int)adjacency.size() and (edge>=0 and edge<4));
FaceInfo const & f = _adjacency[p->ptexIndex];
FaceInfo const & f = adjacency[p->ptexIndex];
int afid = f.adjface(edge),
aeid = f.adjedge(edge);
@ -215,7 +245,7 @@ STParticles::warpParticle(int edge, Position * p, float * dp) {
// boundary detected: bounce the particle
Bounce(edge, p, dp);
} else {
FaceInfo const & af = _adjacency[afid];
FaceInfo const & af = adjacency[afid];
int rot = edge - aeid + 2;
bool fIsSubface = f.isSubface(),
@ -233,6 +263,66 @@ STParticles::warpParticle(int edge, Position * p, float * dp) {
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
STParticles::~STParticles() {
delete _patchMap;
}
void
STParticles::Update(float deltaTime) {
if (fabs(GetSpeed()) < 0.001f) return;
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
_patchCoords.clear();
// XXX: this process should be parallelized.
#ifdef OPENSUBDIV_HAS_TBB
_patchCoords.resize((int)GetNumParticles());
TbbUpdateKernel kernel(speed, &_positions[0], &_velocities[0],
_adjacency, &_patchCoords[0], _patchMap);;
g_tbbCounter = 0;
tbb::blocked_range<int> range(0, GetNumParticles(), 256);
tbb::parallel_for(range, kernel);
_patchCoords.resize(g_tbbCounter);
#else
Position * p = &_positions[0];
float * dp = &_velocities[0];
for (int i=0; i<GetNumParticles(); ++i, ++p, dp+=2) {
// apply velocity
p->s += dp[0] * speed;
p->t += dp[1] * speed;
// make sure particles can't skip more than 1 face boundary at a time
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
// check if the particle is jumping a boundary
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
// this is not treated here.
int edge = -1;
if (p->s >= 1.0f) edge = 1;
if (p->s <= 0.0f) edge = 3;
if (p->t >= 1.0f) edge = 2;
if (p->t <= 0.0f) edge = 0;
if (edge>=0) {
// warp the particle to the other side of the boundary
WarpParticle(_adjacency, edge, p, dp);
}
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
// resolve particle positions into patch handles
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
if (handle) {
_patchCoords.push_back(
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t));
}
}
#endif
}
// Dump adjacency info
std::ostream & operator << (std::ostream & os,
STParticles::FaceInfo const & f) {

View File

@ -26,7 +26,8 @@
#define ST_PARTICLES_H
#include <far/topologyRefiner.h>
#include <far/patchMap.h>
#include <osd/types.h>
#include <iostream>
//
@ -72,47 +73,6 @@ public:
float s, t; ///< parametric location on face
};
typedef OpenSubdiv::Far::TopologyRefiner Refiner;
STParticles(Refiner const & refiner, int nparticles, bool centered=false);
void Update(float deltaTime);
int GetNumParticles() const {
return (int)_positions.size();
}
void SetSpeed(float speed) {
_speed = std::max(-1.0f, std::min(1.0f, speed));
}
float GetSpeed() const {
return _speed;
}
std::vector<Position> & GetPositions() {
return _positions;
}
std::vector<float> & GetVelocities() {
return _velocities;
}
friend std::ostream & operator << (std::ostream & os, STParticles const & f);
private:
//
// Particle "Dynamics"
//
std::vector<Position> _positions;
std::vector<float> _velocities;
float _speed; // velocity multiplier
private:
//
// Topology adjacency (borrowed from Ptexture.h)
//
@ -152,11 +112,63 @@ private:
int adjfaces[4];
};
void warpParticle(int edge, Position * p, float * dp);
typedef OpenSubdiv::Far::TopologyRefiner Refiner;
typedef OpenSubdiv::Far::PatchTable PatchTable;
STParticles(Refiner const & refiner, PatchTable const *patchTable,
int nparticles, bool centered=false);
~STParticles();
void Update(float deltaTime);
int GetNumParticles() const {
return (int)_positions.size();
}
void SetSpeed(float speed) {
_speed = std::max(-1.0f, std::min(1.0f, speed));
}
float GetSpeed() const {
return _speed;
}
std::vector<Position> & GetPositions() {
return _positions;
}
std::vector<float> & GetVelocities() {
return _velocities;
}
std::vector<OpenSubdiv::Osd::PatchCoord> GetPatchCoords() const {
return _patchCoords;
}
friend std::ostream & operator << (std::ostream & os, STParticles const & f);
static void WarpParticle(std::vector<FaceInfo> const &adjacency,
int edge, Position * p, float * dp);
private:
//
// Particle "Dynamics"
//
std::vector<Position> _positions;
std::vector<float> _velocities;
std::vector<OpenSubdiv::Osd::PatchCoord> _patchCoords;
float _speed; // velocity multiplier
friend std::ostream & operator << (std::ostream & os, FaceInfo const & f);
std::vector<FaceInfo> _adjacency;
OpenSubdiv::Far::PatchMap const *_patchMap;
};
#endif // ST_PARTICLES_H

View File

@ -786,7 +786,7 @@ bindTextures() {
}
static GLenum
bindProgram(Effect effect, OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
@ -868,7 +868,7 @@ display() {
glBindVertexArray(g_vao);
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
if (g_displayStyle == kWire)
@ -879,7 +879,7 @@ display() {
// patch drawing
for (int i = 0; i < (int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
GLenum primType = bindProgram(GetEffect(), patch);
@ -909,7 +909,7 @@ display() {
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
for (int i = 0; i < (int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
GLenum primType = bindProgram(GetEffect(/*uvDraw=*/ true), patch);

4
examples/glImaging/glImaging.cpp Executable file → Normal file
View File

@ -422,11 +422,11 @@ void runTest(ShapeDesc const &shapeDesc, std::string const &kernel,
mesh->GetPatchTable()->GetPatchParamTextureBuffer());
}
Osd::GLPatchTable::PatchArrayVector const & patches =
Osd::PatchArrayVector const & patches =
mesh->GetPatchTable()->GetPatchArrays();
for (int i=0; i<(int)patches.size(); ++i) {
Osd::GLPatchTable::PatchArray const & patch = patches[i];
Osd::PatchArray const & patch = patches[i];
Far::PatchDescriptor desc = patch.GetDescriptor();
Far::PatchDescriptor::Type patchType = desc.GetType();

View File

@ -597,7 +597,7 @@ static void bindTextures(Effect effect) {
}
static GLuint
bindProgram(Effect effect, OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
@ -670,12 +670,12 @@ display() {
glBindVertexArray(g_vao);
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
// patch drawing
for (int i=0; i<(int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
GLenum primType = GL_PATCHES;
@ -807,13 +807,13 @@ drawStroke(int x, int y) {
effect.paint = 1;
bindTextures(effect);
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
// patch drawing
for (int i=0; i<(int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
GLenum primType = GL_PATCHES;

View File

@ -1249,7 +1249,7 @@ bindTextures() {
//------------------------------------------------------------------------------
static GLenum
bindProgram(Effect effect,
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
GLDrawConfig *config = g_shaderCache.GetDrawConfig(effectDesc);
@ -1296,10 +1296,10 @@ drawModel() {
glBindVertexArray(g_vao);
// patch drawing
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
for (int i = 0; i < (int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
Effect effect;
effect.value = 0;

View File

@ -1181,7 +1181,7 @@ bindTextures() {
static GLenum
bindProgram(Effect effect,
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
// only legacy gregory needs maxValence and numElements
@ -1299,7 +1299,7 @@ display() {
glBindVertexArray(g_vao);
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
// patch drawing
@ -1316,7 +1316,7 @@ display() {
// core draw-calls
for (int i=0; i<(int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();

View File

@ -29,6 +29,7 @@
set(CPU_SOURCE_FILES
cpuEvaluator.cpp
cpuKernel.cpp
cpuPatchTable.cpp
cpuVertexBuffer.cpp
)
@ -43,10 +44,12 @@ set(PRIVATE_HEADER_FILES
set(PUBLIC_HEADER_FILES
cpuEvaluator.h
cpuPatchTable.h
cpuVertexBuffer.h
mesh.h
nonCopyable.h
opengl.h
types.h
vertexDescriptor.h
)
@ -216,6 +219,7 @@ list(APPEND DOXY_HEADER_FILES ${DXSDK_PUBLIC_HEADERS})
# OpenCL code & dependencies
set(OPENCL_PUBLIC_HEADERS
clEvaluator.h
clPatchTable.h
clVertexBuffer.h
opencl.h
)
@ -223,6 +227,7 @@ set(OPENCL_PUBLIC_HEADERS
if ( OPENCL_FOUND )
list(APPEND GPU_SOURCE_FILES
clEvaluator.cpp
clPatchTable.cpp
clVertexBuffer.cpp
)
list(APPEND PUBLIC_HEADER_FILES ${OPENCL_PUBLIC_HEADERS})
@ -254,12 +259,14 @@ list(APPEND DOXY_HEADER_FILES ${OPENCL_PUBLIC_HEADERS})
# CUDA code & dependencies
set(CUDA_PUBLIC_HEADERS
cudaEvaluator.h
cudaPatchTable.h
cudaVertexBuffer.h
)
if( CUDA_FOUND )
list(APPEND GPU_SOURCE_FILES
cudaEvaluator.cpp
cudaPatchTable.cpp
cudaVertexBuffer.cpp
)
list(APPEND PUBLIC_HEADER_FILES ${CUDA_PUBLIC_HEADERS})

View File

@ -27,6 +27,7 @@
#include <sstream>
#include <string>
#include <vector>
#include <cstdio>
#include "../osd/opencl.h"
#include "../far/error.h"
@ -87,11 +88,12 @@ CLStencilTable::~CLStencilTable() {
CLEvaluator::CLEvaluator(cl_context context, cl_command_queue queue)
: _clContext(context), _clCommandQueue(queue),
_program(NULL), _stencilsKernel(NULL) {
_program(NULL), _stencilKernel(NULL), _patchKernel(NULL) {
}
CLEvaluator::~CLEvaluator() {
if (_stencilsKernel) clReleaseKernel(_stencilsKernel);
if (_stencilKernel) clReleaseKernel(_stencilKernel);
if (_patchKernel) clReleaseKernel(_patchKernel);
if (_program) clReleaseProgram(_program);
}
@ -145,7 +147,13 @@ CLEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
return false;
}
_stencilsKernel = clCreateKernel(_program, "computeStencils", &errNum);
_stencilKernel = clCreateKernel(_program, "computeStencils", &errNum);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
return false;
}
_patchKernel = clCreateKernel(_program, "computePatches", &errNum);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
@ -169,24 +177,24 @@ CLEvaluator::EvalStencils(cl_mem src,
size_t globalWorkSize = (size_t)(end - start);
clSetKernelArg(_stencilsKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_stencilsKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_stencilsKernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(_stencilsKernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(_stencilsKernel, 4, sizeof(cl_mem), &sizes);
clSetKernelArg(_stencilsKernel, 5, sizeof(cl_mem), &offsets);
clSetKernelArg(_stencilsKernel, 6, sizeof(cl_mem), &indices);
clSetKernelArg(_stencilsKernel, 7, sizeof(cl_mem), &weights);
clSetKernelArg(_stencilsKernel, 8, sizeof(int), &start);
clSetKernelArg(_stencilsKernel, 9, sizeof(int), &end);
clSetKernelArg(_stencilKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_stencilKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_stencilKernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(_stencilKernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(_stencilKernel, 4, sizeof(cl_mem), &sizes);
clSetKernelArg(_stencilKernel, 5, sizeof(cl_mem), &offsets);
clSetKernelArg(_stencilKernel, 6, sizeof(cl_mem), &indices);
clSetKernelArg(_stencilKernel, 7, sizeof(cl_mem), &weights);
clSetKernelArg(_stencilKernel, 8, sizeof(int), &start);
clSetKernelArg(_stencilKernel, 9, sizeof(int), &end);
cl_int errNum = clEnqueueNDRangeKernel(
_clCommandQueue, _stencilsKernel, 1, NULL,
_clCommandQueue, _stencilKernel, 1, NULL,
&globalWorkSize, NULL, 0, NULL, NULL);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyStencilTableKernel (%d) ", errNum);
"ApplyStencilKernel (%d) ", errNum);
return false;
}
@ -194,6 +202,51 @@ CLEvaluator::EvalStencils(cl_mem src,
return true;
}
bool
CLEvaluator::EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc,
cl_mem dst, VertexBufferDescriptor const &dstDesc,
cl_mem du, VertexBufferDescriptor const &duDesc,
cl_mem dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
cl_mem patchCoordsBuffer,
cl_mem patchArrayBuffer,
cl_mem patchIndexBuffer,
cl_mem patchParamBuffer) const {
size_t globalWorkSize = (size_t)(numPatchCoords);
clSetKernelArg(_patchKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(_patchKernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(_patchKernel, 4, sizeof(int), &numPatchCoords);
// clSetKernelArg(_patchKernel, 4, sizeof(cl_mem), &du);
// clSetKernelArg(_patchKernel, 5, sizeof(int), &duDesc.offset);
// clSetKernelArg(_patchKernel, 6, sizeof(int), &duDesc.stride);
// clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
// clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
// clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
clSetKernelArg(_patchKernel, 5, sizeof(cl_mem), &patchCoordsBuffer);
clSetKernelArg(_patchKernel, 6, sizeof(cl_mem), &patchArrayBuffer);
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &patchIndexBuffer);
clSetKernelArg(_patchKernel, 8, sizeof(cl_mem), &patchParamBuffer);
cl_int errNum = clEnqueueNDRangeKernel(
_clCommandQueue, _patchKernel, 1, NULL,
&globalWorkSize, NULL, 0, NULL, NULL);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyPatchKernel (%d) ", errNum);
return false;
}
clFinish(_clCommandQueue);
return true;
}
/* static */
void
CLEvaluator::Synchronize(cl_command_queue clCommandQueue) {

View File

@ -28,6 +28,7 @@
#include "../version.h"
#include "../osd/opencl.h"
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
@ -75,9 +76,6 @@ private:
// ---------------------------------------------------------------------------
/// \brief OpenCL stencil kernel
///
///
class CLEvaluator {
public:
typedef bool Instantiatable;
@ -107,6 +105,12 @@ public:
return NULL;
}
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
@ -124,7 +128,7 @@ public:
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// OpenCL memory interfaces.
/// SSBO interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
@ -137,25 +141,25 @@ public:
/// cl_command_queue GetCommandQueue()
/// methods.
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE,
typename DEVICE_CONTEXT>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext) {
template <typename SRC_BUFFER, typename DST_BUFFER,
typename STENCIL_TABLE, typename DEVICE_CONTEXT>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext) {
if (instance) {
return instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
return instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
} else {
// Create an instance on demand (slow)
instance = Create(srcDesc, dstDesc, deviceContext);
if (instance) {
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
bool r = instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
delete instance;
return r;
@ -167,15 +171,14 @@ public:
/// Generic compute function.
/// Dispatch the CL compute kernel asynchronously.
/// Returns false if the kernel hasn't been compiled yet.
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcVertexBuffer->BindCLBuffer(_clCommandQueue),
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue),
srcDesc,
dstVertexBuffer->BindCLBuffer(_clCommandQueue),
dstBuffer->BindCLBuffer(_clCommandQueue),
dstDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
@ -187,10 +190,8 @@ public:
/// Dispatch the CL compute kernel asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(cl_mem src,
VertexBufferDescriptor const &srcDesc,
cl_mem dst,
VertexBufferDescriptor const &dstDesc,
bool EvalStencils(cl_mem src, VertexBufferDescriptor const &srcDesc,
cl_mem dst, VertexBufferDescriptor const &dstDesc,
cl_mem sizes,
cl_mem offsets,
cl_mem indices,
@ -198,6 +199,278 @@ public:
int start,
int end) const;
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
///
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, deviceContext);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer
///
/// @param duDesc
///
/// @param dvBuffer
///
/// @param dvDesc
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, deviceContext);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
0, VertexBufferDescriptor(),
0, VertexBufferDescriptor(),
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivatives buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data of Du
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output V-derivatives buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data of Dv
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
bool EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc,
cl_mem dst, VertexBufferDescriptor const &dstDesc,
cl_mem du, VertexBufferDescriptor const &duDesc,
cl_mem dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
cl_mem patchCoordsBuffer,
cl_mem patchArrayBuffer,
cl_mem patchIndexBuffer,
cl_mem patchParamsBuffer) const;
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// Configure OpenCL kernel.
/// Returns false if it fails to compile the kernel.
bool Compile(VertexBufferDescriptor const &srcDesc,
@ -215,7 +488,8 @@ private:
cl_context _clContext;
cl_command_queue _clCommandQueue;
cl_program _program;
cl_kernel _stencilsKernel;
cl_kernel _stencilKernel;
cl_kernel _patchKernel;
};

View File

@ -85,3 +85,118 @@ __kernel void computeStencils(__global float * src,
writeVertex(dst, current, &v);
}
// ---------------------------------------------------------------------------
struct PatchArray {
int patchType;
int numPatches;
int indexBase; // an offset within the index buffer
int primitiveIdBase; // an offset within the patch param buffer
};
struct PatchCoord {
int arrayIndex;
int patchIndex;
int vertIndex;
float s;
float t;
};
struct PatchParam {
int faceIndex;
uint patchBits;
float sharpness;
};
static void getBSplineWeights(float t, float *point, float *deriv) {
// The four uniform cubic B-Spline basis functions evaluated at t:
float one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
point[3] = one6th * ( t3);
// Derivatives of the above four basis functions at t:
deriv[0] = -0.5f*t2 + t - 0.5f;
deriv[1] = 1.5f*t2 - 2.0f*t;
deriv[2] = -1.5f*t2 + t + 0.5f;
deriv[3] = 0.5f*t2;
}
__kernel void computePatches(__global float *src, int srcOffset,
__global float *dst, int dstOffset,
// __global float *du, int duOffset, int duStride,
// __global float *dv, int dvOffset, int dvStride,
int numPatchCoords,
__global struct PatchCoord *patchCoords,
__global struct PatchArray *patchArrayBuffer,
__global int *patchIndexBuffer,
__global struct PatchParam *patchParamBuffer) {
int current = get_global_id(0);
if (current > numPatchCoords) return;
src += srcOffset;
dst += dstOffset;
// du += duOffset;
// dv += dvOffset;
struct PatchCoord coord = patchCoords[current];
int patchIndex = coord.patchIndex;
// struct PatchArray array = patchArrayBuffer[coord.arrayIndex];
struct PatchArray array = patchArrayBuffer[0];
int patchType = 6; // array.x XXX: REGULAR only for now.
int numControlVertices = 16;
uint patchBits = patchParamBuffer[patchIndex].patchBits;
// vec2 uv = normalizePatchCoord(patchBits, vec2(coord.s, coord.t));
float dScale = 1.0f;//float(1 << getDepth(patchBits));
float uv[2] = {coord.s, coord.t};
float wP[20], wDs[20], wDt[20];
if (patchType == 6) { // REGULAR
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
getBSplineWeights(uv[0], sWeights, dsWeights);
getBSplineWeights(uv[1], tWeights, dtWeights);
// adjustBoundaryWeights(patchBits, sWeights, tWeights);
// adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 4; ++l) {
wP[4*k+l] = sWeights[l] * tWeights[k];
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
}
}
} else {
// TODO: GREGORY BASIS
}
struct Vertex v;
clear(&v);
#if 1
// debug
v.v[0] = uv[0];
v.v[1] = uv[1];
v.v[2] = patchIndexBuffer[current] * 0.1;
writeVertex(dst, current, &v);
return;
#endif
int indexBase = array.indexBase + coord.vertIndex;
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
if (index < 0) index = 0;
addWithWeight(&v, src, index, wP[i]);
}
writeVertex(dst, current, &v);
}

View File

@ -0,0 +1,99 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/clPatchTable.h"
#include "../far/error.h"
#include "../far/patchTable.h"
#include "../osd/opencl.h"
#include "../osd/cpuPatchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CLPatchTable::CLPatchTable() :
_patchArrays(NULL), _indexBuffer(NULL), _patchParamBuffer(NULL) {
}
CLPatchTable::~CLPatchTable() {
if (_patchArrays) clReleaseMemObject(_patchArrays);
if (_indexBuffer) clReleaseMemObject(_indexBuffer);
if (_patchParamBuffer) clReleaseMemObject(_patchParamBuffer);
}
CLPatchTable *
CLPatchTable::Create(Far::PatchTable const *farPatchTable,
cl_context clContext) {
CLPatchTable *instance = new CLPatchTable();
if (instance->allocate(farPatchTable, clContext)) return instance;
delete instance;
return 0;
}
bool
CLPatchTable::allocate(Far::PatchTable const *farPatchTable, cl_context clContext) {
CpuPatchTable patchTable(farPatchTable);
size_t numPatchArrays = patchTable.GetNumPatchArrays();
size_t indexSize = patchTable.GetPatchIndexSize();
size_t patchParamSize = patchTable.GetPatchParamSize();
cl_int err = 0;
_patchArrays = clCreateBuffer(clContext, CL_MEM_READ_WRITE,
numPatchArrays * sizeof(Osd::PatchArray),
(void*)patchTable.GetPatchArrayBuffer(),
&err);
if (err != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
return false;
}
_indexBuffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE,
indexSize * sizeof(int),
(void*)patchTable.GetPatchIndexBuffer(),
&err);
if (err != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
return false;
}
_patchParamBuffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE,
patchParamSize * sizeof(Osd::PatchParam),
(void*)patchTable.GetPatchParamBuffer(),
&err);
if (err != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
return false;
}
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,91 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_CL_PATCH_TABLE_H
#define OPENSUBDIV3_OSD_CL_PATCH_TABLE_H
#include "../version.h"
#include "../osd/opencl.h"
#include "../osd/nonCopyable.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far{
class PatchTable;
};
namespace Osd {
/// \brief CL patch table
///
/// This class is a CL buffer representation of Far::PatchTable.
///
/// CLEvaluator consumes this table to evaluate on the patches.
///
///
class CLPatchTable : private NonCopyable<CLPatchTable> {
public:
/// Creator. Returns NULL if error
static CLPatchTable *Create(Far::PatchTable const *patchTable,
cl_context clContext);
template <typename DEVICE_CONTEXT>
static CLPatchTable * Create(Far::PatchTable const *patchTable,
DEVICE_CONTEXT context) {
return Create(patchTable, context->GetContext());
}
/// Destructor
~CLPatchTable();
/// Returns the CL memory of the array of Osd::PatchArray buffer
cl_mem GetPatchArrayBuffer() const { return _patchArrays; }
/// Returns the CL memory of the patch control vertices
cl_mem GetPatchIndexBuffer() const { return _indexBuffer; }
/// Returns the CL memory of the array of Osd::PatchParam buffer
cl_mem GetPatchParamBuffer() const { return _patchParamBuffer; }
protected:
CLPatchTable();
bool allocate(Far::PatchTable const *patchTable, cl_context clContext);
cl_mem _patchArrays;
cl_mem _indexBuffer;
cl_mem _patchParamBuffer;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV3_OSD_CL_PATCH_TABLE_H

View File

@ -45,6 +45,12 @@ public:
/// Creator. Returns NULL if error.
static CLVertexBuffer * Create(int numElements, int numVertices, cl_context clContext);
template <typename DEVICE_CONTEXT>
static CLVertexBuffer * Create(int numElements, int numVertices,
DEVICE_CONTEXT context) {
return Create(numElements, numVertices, context->GetContext());
}
/// Destructor.
~CLVertexBuffer();
@ -52,6 +58,12 @@ public:
/// vertices data to Osd.
void UpdateData(const float *src, int startVertex, int numVertices, cl_command_queue clQueue);
template<typename DEVICE_CONTEXT>
void UpdateData(const float *src, int startVertex, int numVertices,
DEVICE_CONTEXT context) {
UpdateData(src, startVertex, numVertices, context->GetCommandQueue());
}
/// Returns how many elements defined in this vertex buffer.
int GetNumElements() const;

View File

@ -24,6 +24,7 @@
#include "../osd/cpuEvaluator.h"
#include "../osd/cpuKernel.h"
#include "../far/patchBasis.h"
#include <cstdlib>
@ -34,15 +35,15 @@ namespace Osd {
/* static */
bool
CpuEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
CpuEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
@ -55,30 +56,28 @@ CpuEvaluator::EvalStencils(const float *src,
/* static */
bool
CpuEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDs,
VertexBufferDescriptor const &dstDsDesc,
float *dstDt,
VertexBufferDescriptor const &dstDtDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end) {
CpuEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
if (srcDesc.length != dstDsDesc.length) return false;
if (srcDesc.length != dstDtDesc.length) return false;
if (srcDesc.length != duDesc.length) return false;
if (srcDesc.length != dvDesc.length) return false;
CpuEvalStencils(src, srcDesc,
dst, dstDesc,
dstDs, dstDsDesc,
dstDt, dstDtDesc,
du, duDesc,
dv, dvDesc,
sizes, offsets, indices,
weights, duWeights, dvWeights,
start, end);
@ -123,10 +122,13 @@ CpuEvaluator::EvalPatches(const float *src,
float *dst,
VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable) {
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer){
src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
else return false;
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
@ -134,14 +136,38 @@ CpuEvaluator::EvalPatches(const float *src,
float wP[20], wDs[20], wDt[20];
for (int i = 0; i < numPatchCoords; ++i) {
PatchCoord const &coords = patchCoords[i];
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt);
int patchType = array.GetPatchType();
// XXX: patchIndex is absolute. not sure it's consistent.
// (should be offsetted by array.primitiveIdBase?)
// patchParamBuffer[array.primitiveIdBase + coord.handle.patchIndex]
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&patchParamBuffer[coord.handle.patchIndex].patchBits;
Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle);
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
assert(0);
return false;
}
const int *cvs =
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
for (int j = 0; j < cvs.size(); ++j) {
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
}
++dstT;
@ -151,47 +177,67 @@ CpuEvaluator::EvalPatches(const float *src,
/* static */
bool
CpuEvaluator::EvalPatches(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDs,
VertexBufferDescriptor const &dstDsDesc,
float *dstDt,
VertexBufferDescriptor const &dstDtDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable) {
CpuEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer) {
src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (dstDs) dstDs += dstDsDesc.offset;
if (dstDt) dstDt += dstDtDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
BufferAdapter<float> dstDsT(dstDs, dstDsDesc.length, dstDsDesc.stride);
BufferAdapter<float> dstDtT(dstDt, dstDtDesc.length, dstDtDesc.stride);
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
BufferAdapter<float> duT (du, duDesc.length, duDesc.stride);
BufferAdapter<float> dvT (dv, dvDesc.length, dvDesc.stride);
float wP[20], wDs[20], wDt[20];
for (int i = 0; i < numPatchCoords; ++i) {
PatchCoord const &coords = patchCoords[i];
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt);
int patchType = array.GetPatchType();
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&patchParamBuffer[coord.handle.patchIndex].patchBits;
Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle);
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
assert(0);
}
const int *cvs =
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
dstDsT.Clear();
dstDtT.Clear();
for (int j = 0; j < cvs.size(); ++j) {
duT.Clear();
dvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
dstDsT.AddWithWeight(srcT[cvs[j]], wDs[j]);
dstDtT.AddWithWeight(srcT[cvs[j]], wDt[j]);
duT.AddWithWeight (srcT[cvs[j]], wDs[j]);
dvT.AddWithWeight (srcT[cvs[j]], wDt[j]);
}
++dstT;
++dstDsT;
++dstDtT;
++duT;
++dvT;
}
return true;
}

View File

@ -29,33 +29,14 @@
#include <cstddef>
#include <vector>
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
#include "../far/patchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Coordinates set on a patch table
/// XXX: this is a temporary structure, exists during Osd refactoring work.
///
struct PatchCoord {
/// \brief Constructor
///
/// @param p patch handle
///
/// @param s parametric location on the patch
///
/// @param t parametric location on the patch
///
PatchCoord(Far::PatchTable::PatchHandle handle, float s, float t) :
handle(handle), s(s), t(t) { }
Far::PatchTable::PatchHandle handle; ///< patch handle
float s, t; ///< parametric location on patch
};
class CpuEvaluator {
public:
/// ----------------------------------------------------------------------
@ -80,7 +61,7 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
@ -89,20 +70,18 @@ public:
/// @param deviceContext not used in the cpu kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *instance = NULL,
void * deviceContext = NULL) {
return EvalStencils(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
@ -125,24 +104,27 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
/// @param sizes pointer to the sizes buffer of the stencil table
/// to apply for the range [start, end)
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param deviceContext not used in the cpu kernel
/// @param indices pointer to the indices buffer of the stencil table
///
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
@ -161,19 +143,19 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param dstDsBuffer Output s-derivative buffer
/// @param duBuffer Output U-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDsDesc vertex buffer descriptor for the output buffer
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dstDtBuffer Output t-derivative buffer
/// @param dvBuffer Output V-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDtDesc vertex buffer descriptor for the output buffer
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
@ -182,28 +164,22 @@ public:
/// @param deviceContext not used in the cpu kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
DST_BUFFER *dstDsBuffer,
VertexBufferDescriptor const &dstDsDesc,
DST_BUFFER *dstDtBuffer,
VertexBufferDescriptor const &dstDtDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *evaluator = NULL,
void * deviceContext = NULL) {
(void)evaluator; // unused
(void)deviceContext; // unused
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *instance = NULL,
void * deviceContext = NULL) {
return EvalStencils(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
dstDsBuffer->BindCpuBuffer(),
dstDsDesc,
dstDtBuffer->BindCpuBuffer(),
dstDtDesc,
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
@ -228,40 +204,44 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param dstDs Output s-derivatives pointer. An offset of
/// dstDsDesc will be applied internally.
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param dstDsDesc vertex buffer descriptor for the output buffer
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dstDt Output t-derivatives pointer. An offset of
/// dstDtDesc will be applied internally.
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dstDtDesc vertex buffer descriptor for the output buffer
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param deviceContext not used in the cpu kernel
/// @param indices pointer to the indices buffer of the stencil table
///
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDs,
VertexBufferDescriptor const &dstDsDesc,
float *dstDt,
VertexBufferDescriptor const &dstDtDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start,
int end);
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
@ -289,32 +269,35 @@ public:
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER>
static bool EvalPatches(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable,
CpuEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CpuEvaluator const *instance = NULL,
void * deviceContext = NULL) {
return EvalPatches(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
numPatchCoords,
patchCoords,
patchTable);
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
@ -333,56 +316,59 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param dstDsBuffer Output s-derivatives buffer
/// @param duBuffer Output U-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDsDesc vertex buffer descriptor for the dstDsBuffer
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dstDtBuffer Output t-derivatives buffer
/// @param dvBuffer Output V-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDtDesc vertex buffer descriptor for the dstDtBuffer
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER>
static bool EvalPatches(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
DST_BUFFER *dstDsBuffer,
VertexBufferDescriptor const &dstDsDesc,
DST_BUFFER *dstDtBuffer,
VertexBufferDescriptor const &dstDtDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable,
CpuEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CpuEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
dstDsBuffer->BindCpuBuffer(),
dstDsDesc,
dstDtBuffer->BindCpuBuffer(),
dstDtDesc,
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
// ideally all buffer classes should have templated by datatype
// so that downcast isn't needed there.
// (e.g. Osd::CpuBuffer<PatchCoord> )
//
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
patchCoords,
patchTable);
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
@ -403,20 +389,23 @@ public:
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable on which primvars are evaluated
/// for the patchCoords
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param instance not used in the cpu evaluator
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param deviceContext not used in the cpu evaluator
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable);
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
@ -432,38 +421,45 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param dstDs Output s-derivatives pointer. An offset of
/// dstDsDesc will be applied internally.
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param dstDsDesc vertex buffer descriptor for the dstDs buffer
/// @param duDesc vertex buffer descriptor for the du buffer
///
/// @param dstDt Output t-derivatives pointer. An offset of
/// dstDtDesc will be applied internally.
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dstDtDesc vertex buffer descriptor for the dstDt buffer
/// @param dvDesc vertex buffer descriptor for the dv buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable on which primvars are evaluated
/// for the patchCoords
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param instance not used in the cpu evaluator
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param deviceContext not used in the cpu evaluator
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDs,
VertexBufferDescriptor const &dstDsDesc,
float *dstDt,
VertexBufferDescriptor const &dstDtDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable);
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// \brief synchronize all asynchronous computation invoked on this device.
static void Synchronize(void * /*deviceContext = NULL*/) {

View File

@ -0,0 +1,106 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cpuPatchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CpuPatchTable::CpuPatchTable(const Far::PatchTable *farPatchTable) {
int nPatchArrays = farPatchTable->GetNumPatchArrays();
// count
int numPatches = 0;
int numIndices = 0;
for (int j = 0; j < nPatchArrays; ++j) {
int nPatch = farPatchTable->GetNumPatches(j);
int nCV = farPatchTable->GetPatchArrayDescriptor(j).GetNumControlVertices();
numPatches += nPatch;
numIndices += nPatch * nCV;
}
_patchArrays.reserve(nPatchArrays);
_indexBuffer.reserve(numIndices);
_patchParamBuffer.reserve(numPatches);
// for each patchArray
for (int j = 0; j < nPatchArrays; ++j) {
PatchArray patchArray(farPatchTable->GetPatchArrayDescriptor(j),
farPatchTable->GetNumPatches(j),
(int)_indexBuffer.size(),
(int)_patchParamBuffer.size());
_patchArrays.push_back(patchArray);
// indices
Far::ConstIndexArray indices = farPatchTable->GetPatchArrayVertices(j);
for (int k = 0; k < indices.size(); ++k) {
_indexBuffer.push_back(indices[k]);
}
// patchParams bundling
// XXX: this process won't be needed if Far::PatchParam includes
// sharpness.
#if 0
// XXX: we need sharpness interface for patcharray or put sharpness
// into patchParam.
Far::ConstPatchParamArray patchParams =
farPatchTable->GetPatchParams(j);
for (int k = 0; k < patchParams.size(); ++k) {
float sharpness = 0.0;
_patchParamBuffer.push_back(patchParams[k].faceIndex);
_patchParamBuffer.push_back(patchParams[k].bitField.field);
_patchParamBuffer.push_back(*((unsigned int *)&sharpness));
}
#else
// XXX: workaround. GetPatchParamTable() will be deprecated though.
Far::PatchParamTable const & patchParamTable =
farPatchTable->GetPatchParamTable();
std::vector<Far::Index> const &sharpnessIndexTable =
farPatchTable->GetSharpnessIndexTable();
int numPatches = farPatchTable->GetNumPatches(j);
for (int k = 0; k < numPatches; ++k) {
float sharpness = 0.0;
int patchIndex = (int)_patchParamBuffer.size();
if (patchIndex < (int)sharpnessIndexTable.size()) {
int sharpnessIndex = sharpnessIndexTable[patchIndex];
if (sharpnessIndex >= 0)
sharpness = farPatchTable->GetSharpnessValues()[sharpnessIndex];
}
PatchParam param;
//param.patchParam = patchParamTable[patchIndex];
param.faceIndex = patchParamTable[patchIndex].faceIndex;
param.patchBits = patchParamTable[patchIndex].bitField.field;
param.sharpness = sharpness;
_patchParamBuffer.push_back(param);
}
#endif
}
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,102 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H
#define OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H
#include "../version.h"
#include <vector>
#include "../far/patchDescriptor.h"
#include "../osd/nonCopyable.h"
#include "../osd/opengl.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far{
class PatchTable;
};
namespace Osd {
/// \brief Cpu patch table
///
/// XXX: We can use just Far::PatchTable for typical CpuEval use cases.
///
/// Currently this class exists because of the template resolution
/// for the CpuEvaluator's generic interface functions
/// (glEvalLimit example uses), and
/// device-specific patch tables such as GLPatchTables internally use
/// as a staging buffer to splice patcharray and interleave sharpnesses.
///
/// Ideally Far::PatchTables should have the same data representation
/// and accessors so that we don't have to copy data unnecessarily.
///
class CpuPatchTable {
public:
static CpuPatchTable *Create(const Far::PatchTable *patchTable,
void *deviceContext = NULL) {
(void)deviceContext; // unused
return new CpuPatchTable(patchTable);
}
explicit CpuPatchTable(const Far::PatchTable *patchTable);
~CpuPatchTable() {}
const PatchArray *GetPatchArrayBuffer() const {
return &_patchArrays[0];
}
const int *GetPatchIndexBuffer() const {
return &_indexBuffer[0];
}
const PatchParam *GetPatchParamBuffer() const {
return &_patchParamBuffer[0];
}
size_t GetNumPatchArrays() const {
return _patchArrays.size();
}
size_t GetPatchIndexSize() const {
return _indexBuffer.size();
}
size_t GetPatchParamSize() const {
return _patchParamBuffer.size();
}
protected:
PatchArrayVector _patchArrays;
std::vector<int> _indexBuffer;
PatchParamVector _patchParamBuffer;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H

View File

@ -52,7 +52,8 @@ CpuVertexBuffer::Create(int numElements, int numVertices,
}
void
CpuVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices) {
CpuVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices,
void * /*deviceContext*/) {
memcpy(_cpuBuffer + startVertex * _numElements,
src, GetNumElements() * numVertices * sizeof(float));

View File

@ -50,7 +50,8 @@ public:
/// This method is meant to be used in client code in order to provide
/// coarse vertices data to Osd.
void UpdateData(const float *src, int startVertex, int numVertices);
void UpdateData(const float *src, int startVertex, int numVertices,
void *deviceContext = NULL);
/// Returns how many elements defined in this vertex buffer.
int GetNumElements() const;

View File

@ -28,6 +28,7 @@
#include <vector>
#include "../far/stencilTable.h"
#include "../osd/types.h"
extern "C" {
void CudaEvalStencils(const float *src,
@ -41,6 +42,25 @@ extern "C" {
const float * weights,
int start,
int end);
void CudaEvalPatches(
const float *src, float *dst,
int length, int srcStride, int dstStride,
int numPatchCoords,
const void *patchCoords,
const void *patchArrays,
const int *patchIndices,
const void *patchParams);
void CudaEvalPatchesWithDerivatives(
const float *src, float *dst, float *du, float *dv,
int length,
int srcStride, int dstStride, int dvStride, int duStride,
int numPatchCoords,
const void *patchCoords,
const void *patchArrays,
const int *patchIndices,
const void *patchParams);
}
namespace OpenSubdiv {
@ -102,6 +122,8 @@ CudaEvaluator::EvalStencils(const float *src,
const float * weights,
int start,
int end) {
if (dst == NULL) return false;
CudaEvalStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
@ -112,6 +134,105 @@ CudaEvaluator::EvalStencils(const float *src,
return true;
}
/* static */
bool
CudaEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDu,
VertexBufferDescriptor const &dstDuDesc,
float *dstDv,
VertexBufferDescriptor const &dstDvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start,
int end) {
// PERFORMANCE: need to combine 3 launches together
if (dst) {
CudaEvalStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDesc.stride,
sizes, offsets, indices, weights,
start, end);
}
if (dstDu) {
CudaEvalStencils(src + srcDesc.offset,
dstDu + dstDuDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDuDesc.stride,
sizes, offsets, indices, duWeights,
start, end);
}
if (dstDv) {
CudaEvalStencils(src + srcDesc.offset,
dstDv + dstDvDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDvDesc.stride,
sizes, offsets, indices, dvWeights,
start, end);
}
return true;
}
/* static */
bool
CudaEvaluator::EvalPatches(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams) {
src += srcDesc.offset;
if (dst)
dst += dstDesc.offset;
CudaEvalPatches(src, dst,
srcDesc.length, srcDesc.stride, dstDesc.stride,
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
return true;
}
/* static */
bool
CudaEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams) {
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
CudaEvalPatchesWithDerivatives(
src, dst, du, dv,
srcDesc.length, srcDesc.stride,
dstDesc.stride, duDesc.stride, dvDesc.stride,
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
return true;
}
/* static */
void
CudaEvaluator::Synchronize(void * /*deviceContext*/) {

View File

@ -29,11 +29,13 @@
#include <vector>
#include "../osd/vertexDescriptor.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far {
class PatchTable;
class StencilTable;
}
@ -43,7 +45,7 @@ namespace Osd {
///
/// This class is a cuda buffer representation of Far::StencilTable.
///
/// CudaComputeKernel consumes this table to apply stencils
/// CudaEvaluator consumes this table to apply stencils
///
///
class CudaStencilTable {
@ -72,10 +74,14 @@ private:
int _numStencils;
};
// ---------------------------------------------------------------------------
class CudaEvaluator {
public:
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
@ -99,21 +105,18 @@ public:
///
/// @param deviceContext not used in the CudaEvaluator
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const void *instance = NULL,
void * deviceContext = NULL) {
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const void *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcVertexBuffer->BindCudaBuffer(),
srcDesc,
dstVertexBuffer->BindCudaBuffer(),
dstDesc,
return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
(int const *)stencilTable->GetSizesBuffer(),
(int const *)stencilTable->GetOffsetsBuffer(),
(int const *)stencilTable->GetIndicesBuffer(),
@ -122,17 +125,369 @@ public:
/*end = */ stencilTable->GetNumStencils());
}
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
/// \brief Static eval stencils function which takes raw cuda buffers for
/// input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivative buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dvBuffer Output V-derivative buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
///
/// @param instance not used in the cuda kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the cuda kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
const CudaEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
duBuffer->BindCudaBuffer(), duDesc,
dvBuffer->BindCudaBuffer(), dvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw cuda pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCudaBuffer() method returning an
/// array of PatchCoord struct in cuda memory.
///
/// @param patchTable CudaPatchTable or equivalent
///
/// @param instance not used in the cuda evaluator
///
/// @param deviceContext not used in the cuda evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
numPatchCoords,
(const PatchCoord *)patchCoords->BindCudaBuffer(),
(const PatchArray *)patchTable->GetPatchArrayBuffer(),
(const int *)patchTable->GetPatchIndexBuffer(),
(const PatchParam *)patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivatives buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output V-derivatives buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CudaPatchTable or equivalent
///
/// @param instance not used in the cuda evaluator
///
/// @param deviceContext not used in the cuda evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
duBuffer->BindCudaBuffer(), duDesc,
dvBuffer->BindCudaBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord *)patchCoords->BindCudaBuffer(),
(const PatchArray *)patchTable->GetPatchArrayBuffer(),
(const int *)patchTable->GetPatchIndexBuffer(),
(const PatchParam *)patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndices an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParams an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the du buffer
///
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dv buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndices an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParams an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams);
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
static void Synchronize(void *deviceContext = NULL);
};

View File

@ -238,6 +238,204 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs,
// -----------------------------------------------------------------------------
// Osd::PatchCoord osd/types.h
struct PatchCoord {
int arrayIndex;
int patchIndex;
int vertIndex;
float s;
float t;
};
struct PatchArray {
int patchType; // Far::PatchDescriptor::Type
int numPatches;
int indexBase; // offset in the index buffer
int primitiveIdBase; // offset in the patch param buffer
};
struct PatchParam {
int faceIndex;
unsigned int bitField;
float sharpness;
};
__device__ void
getBSplineWeights(float t, float point[4], float deriv[4]) {
// The four uniform cubic B-Spline basis functions evaluated at t:
float const one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
point[3] = one6th * ( t3);
// Derivatives of the above four basis functions at t:
if (deriv) {
deriv[0] = -0.5f*t2 + t - 0.5f;
deriv[1] = 1.5f*t2 - 2.0f*t;
deriv[2] = -1.5f*t2 + t + 0.5f;
deriv[3] = 0.5f*t2;
}
}
__device__ void
adjustBoundaryWeights(unsigned int bits, float sWeights[4], float tWeights[4]) {
int boundary = ((bits >> 4) & 0xf); // far/patchParam.h
if (boundary & 1) {
tWeights[2] -= tWeights[0];
tWeights[1] += 2*tWeights[0];
tWeights[0] = 0;
}
if (boundary & 2) {
sWeights[1] -= sWeights[3];
sWeights[2] += 2*sWeights[3];
sWeights[3] = 0;
}
if (boundary & 4) {
tWeights[1] -= tWeights[3];
tWeights[2] += 2*tWeights[3];
tWeights[3] = 0;
}
if (boundary & 8) {
sWeights[2] -= sWeights[0];
sWeights[1] += 2*sWeights[0];
sWeights[0] = 0;
}
}
__device__
int getDepth(unsigned int patchBits) {
return (patchBits & 0x7);
}
__device__
float getParamFraction(unsigned int patchBits) {
bool nonQuadRoot = (patchBits >> 3) & 0x1;
int depth = getDepth(patchBits);
if (nonQuadRoot) {
return 1.0f / float( 1 << (depth-1) );
} else {
return 1.0f / float( 1 << depth );
}
}
__device__
void normalizePatchCoord(unsigned int patchBits, float *u, float *v) {
float frac = getParamFraction(patchBits);
int iu = (patchBits >> 22) & 0x3ff;
int iv = (patchBits >> 12) & 0x3ff;
// top left corner
float pu = (float)iu*frac;
float pv = (float)iv*frac;
// normalize u,v coordinates
*u = (*u - pu) / frac,
*v = (*v - pv) / frac;
}
// Far::PatchDescriptor::Type
enum Type {
NON_PATCH = 0, ///< undefined
POINTS, ///< points (useful for cage drawing)
LINES, ///< lines (useful for cage drawing)
QUADS, ///< bilinear quads-only patches
TRIANGLES, ///< bilinear triangles-only mesh
LOOP, ///< Loop patch
REGULAR, ///< feature-adaptive bicubic patches
GREGORY,
GREGORY_BOUNDARY,
GREGORY_BASIS
};
__global__ void
computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
int numPatchCoords, const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
int first = threadIdx.x + blockIdx.x * blockDim.x;
// PERFORMANCE: not yet optimized
float wP[20], wDs[20], wDt[20];
for (int i = first; i < numPatchCoords; i += blockDim.x * gridDim.x) {
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrayBuffer[coord.arrayIndex];
int patchType = array.patchType;
int numControlVertices = 16;
// note: patchIndex is absolute.
unsigned int patchBits = patchParamBuffer[coord.patchIndex].bitField;
// normalize
float s = coord.s;
float t = coord.t;
normalizePatchCoord(patchBits, &s, &t);
float dScale = (float)(1 << getDepth(patchBits));
if (patchType == REGULAR) {
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
getBSplineWeights(s, sWeights, dsWeights);
getBSplineWeights(t, tWeights, dtWeights);
// Compute the tensor product weight of the (s,t) basis function
// corresponding to each control vertex:
adjustBoundaryWeights(patchBits, sWeights, tWeights);
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 4; ++l) {
wP[4*k+l] = sWeights[l] * tWeights[k];
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
}
}
} else if (patchType == GREGORY_BASIS) {
// XXX: not yet implemented.
continue;
} else {
// unknown patchType
continue;
}
const int *cvs =
&patchIndexBuffer[array.indexBase + coord.vertIndex];
float * dstVert = dst + i * dstStride;
clear(dstVert, length);
for (int j = 0; j < numControlVertices; ++j) {
const float * srcVert = src + cvs[j] * srcStride;
addWithWeight(dstVert, srcVert, wP[j], length);
}
if (dstDu) {
float *d = dstDu + i * dstDuStride;
clear(d, length);
for (int j = 0; j < numControlVertices; ++j) {
const float * srcVert = src + cvs[j] * srcStride;
addWithWeight(d, srcVert, wDs[j], length);
}
}
if (dstDv) {
float *d = dstDv + i * dstDvStride;
clear(d, length);
for (int j = 0; j < numControlVertices; ++j) {
const float * srcVert = src + cvs[j] * srcStride;
addWithWeight(d, srcVert, wDt[j], length);
}
}
}
}
// -----------------------------------------------------------------------------
#include "../version.h"
#define OPT_KERNEL(NUM_ELEMENTS, KERNEL, X, Y, ARG) \
@ -257,20 +455,12 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs,
extern "C" {
void CudaEvalStencils(const float *src,
float *dst,
int length,
int srcStride,
int dstStride,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end)
{
// assert(cvs and dst and sizes and offsets and indices and weights and (end>=start));
void CudaEvalStencils(
const float *src, float *dst,
int length, int srcStride, int dstStride,
const int * sizes, const int * offsets, const int * indices,
const float * weights,
int start, int end) {
if (length == 0 or srcStride == 0 or dstStride == 0 or (end <= start)) {
return;
}
@ -301,4 +491,36 @@ void CudaEvalStencils(const float *src,
// -----------------------------------------------------------------------------
void CudaEvalPatches(
const float *src, float *dst,
int length, int srcStride, int dstStride,
int numPatchCoords, const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
// PERFORMANCE: not optimized at all
computePatches <<<512, 32>>>(
src, dst, NULL, NULL, length, srcStride, dstStride, 0, 0,
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
}
void CudaEvalPatchesWithDerivatives(
const float *src, float *dst, float *dstDu, float *dstDv,
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
int numPatchCoords, const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
// PERFORMANCE: not optimized at all
computePatches <<<512, 32>>>(
src, dst, dstDu, dstDv, length, srcStride, dstStride, dstDuStride, dstDvStride,
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
}
} /* extern "C" */

View File

@ -0,0 +1,103 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cudaPatchTable.h"
#include <cuda_runtime.h>
#include "../far/patchTable.h"
#include "../osd/cpuPatchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CudaPatchTable::CudaPatchTable() :
_patchArrays(NULL), _indexBuffer(NULL), _patchParamBuffer(NULL) {
}
CudaPatchTable::~CudaPatchTable() {
if (_patchArrays) cudaFree(_patchArrays);
if (_indexBuffer) cudaFree(_indexBuffer);
if (_patchParamBuffer) cudaFree(_patchParamBuffer);
}
CudaPatchTable *
CudaPatchTable::Create(Far::PatchTable const *farPatchTable,
void * /*deviceContext*/) {
CudaPatchTable *instance = new CudaPatchTable();
if (instance->allocate(farPatchTable)) return instance;
delete instance;
return 0;
}
bool
CudaPatchTable::allocate(Far::PatchTable const *farPatchTable) {
CpuPatchTable patchTable(farPatchTable);
size_t numPatchArrays = patchTable.GetNumPatchArrays();
size_t indexSize = patchTable.GetPatchIndexSize();
size_t patchParamSize = patchTable.GetPatchParamSize();
cudaError_t err;
err = cudaMalloc(&_patchArrays, numPatchArrays * sizeof(Osd::PatchArray));
if (err != cudaSuccess) return false;
err = cudaMalloc(&_indexBuffer, indexSize * sizeof(int));
if (err != cudaSuccess) return false;
err = cudaMalloc(&_patchParamBuffer, patchParamSize * sizeof(Osd::PatchParam));
if (err != cudaSuccess) return false;
// copy patch array
err = cudaMemcpy(_patchArrays,
patchTable.GetPatchArrayBuffer(),
numPatchArrays * sizeof(Osd::PatchArray),
cudaMemcpyHostToDevice);
if (err != cudaSuccess) return false;
// copy index buffer
err = cudaMemcpy(_indexBuffer,
patchTable.GetPatchIndexBuffer(),
indexSize * sizeof(int),
cudaMemcpyHostToDevice);
if (err != cudaSuccess) return false;
// patch param buffer
err = cudaMemcpy(_patchParamBuffer,
patchTable.GetPatchParamBuffer(),
patchParamSize * sizeof(Osd::PatchParam),
cudaMemcpyHostToDevice);
if (err != cudaSuccess) return false;
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,83 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H
#define OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H
#include "../version.h"
#include "../osd/nonCopyable.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far{
class PatchTable;
};
namespace Osd {
/// \brief CUDA patch table
///
/// This class is a cuda buffer representation of Far::PatchTable.
///
/// CudaEvaluator consumes this table to evaluate on the patches.
///
///
class CudaPatchTable : private NonCopyable<CudaPatchTable> {
public:
/// Creator. Returns NULL if error
static CudaPatchTable *Create(Far::PatchTable const *patchTable,
void *deviceContext = NULL);
/// Destructor
~CudaPatchTable();
/// Returns the cuda memory of the array of Osd::PatchArray buffer
void *GetPatchArrayBuffer() const { return _patchArrays; }
/// Returns the cuda memory of the patch control vertices
void *GetPatchIndexBuffer() const { return _indexBuffer; }
/// Returns the cuda memory of the array of Osd::PatchParam buffer
void *GetPatchParamBuffer() const { return _patchParamBuffer; }
protected:
CudaPatchTable();
bool allocate(Far::PatchTable const *patchTable);
void *_patchArrays;
void *_indexBuffer;
void *_patchParamBuffer;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H

View File

@ -43,7 +43,8 @@ CudaVertexBuffer::~CudaVertexBuffer() {
}
CudaVertexBuffer *
CudaVertexBuffer::Create(int numElements, int numVertices) {
CudaVertexBuffer::Create(int numElements, int numVertices,
void * /*deviceContext */) {
CudaVertexBuffer *instance =
new CudaVertexBuffer(numElements, numVertices);
if (instance->allocate()) return instance;
@ -52,7 +53,8 @@ CudaVertexBuffer::Create(int numElements, int numVertices) {
}
void
CudaVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices) {
CudaVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices,
void * /*deviceContext*/) {
size_t size = _numElements * numVertices * sizeof(float);

View File

@ -27,6 +27,8 @@
#include "../version.h"
#include <cstddef>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -41,14 +43,16 @@ class CudaVertexBuffer {
public:
/// Creator. Returns NULL if error.
static CudaVertexBuffer * Create(int numElements, int numVertices);
static CudaVertexBuffer * Create(int numElements, int numVertices,
void *deviceContext = NULL);
/// Destructor.
~CudaVertexBuffer();
/// This method is meant to be used in client code in order to provide coarse
/// vertices data to Osd.
void UpdateData(const float *src, int startVertex, int numVertices);
void UpdateData(const float *src, int startVertex, int numVertices,
void *deviceContext=NULL);
/// Returns how many elements defined in this vertex buffer.
int GetNumElements() const;

View File

@ -30,6 +30,7 @@
#include <vector>
#include "../far/patchDescriptor.h"
#include "../osd/nonCopyable.h"
#include "../osd/types.h"
struct ID3D11Buffer;
struct ID3D11ShaderResourceView;
@ -49,33 +50,6 @@ class D3D11PatchTable : private NonCopyable<D3D11PatchTable> {
public:
typedef ID3D11Buffer * VertexBufferBinding;
// XXX: this struct will be further refactored.
class PatchArray {
public:
PatchArray(Far::PatchDescriptor desc, int numPatches,
int indexBase, int primitiveIdBase) :
desc(desc), numPatches(numPatches), indexBase(indexBase),
primitiveIdBase(primitiveIdBase) {}
Far::PatchDescriptor const &GetDescriptor() const {
return desc;
}
int GetNumPatches() const {
return numPatches;
}
int GetIndexBase() const {
return indexBase;
}
int GetPrimitiveIdBase() const {
return primitiveIdBase;
}
private:
Far::PatchDescriptor desc;
int numPatches;
int indexBase; // an offset within the index buffer
int primitiveIdBase; // an offset within the patch param buffer
};
typedef std::vector<PatchArray> PatchArrayVector;
D3D11PatchTable();
~D3D11PatchTable();

View File

@ -88,16 +88,66 @@ GLStencilTableSSBO::~GLStencilTableSSBO() {
// ---------------------------------------------------------------------------
GLComputeEvaluator::GLComputeEvaluator() :
_program(0), _workGroupSize(64) {
GLComputeEvaluator::GLComputeEvaluator() : _workGroupSize(64) {
memset (&_stencilKernel, 0, sizeof(_stencilKernel));
memset (&_patchKernel, 0, sizeof(_patchKernel));
}
GLComputeEvaluator::~GLComputeEvaluator() {
if (_program) {
glDeleteProgram(_program);
if (_stencilKernel.program) {
glDeleteProgram(_stencilKernel.program);
}
if (_patchKernel.program) {
glDeleteProgram(_patchKernel.program);
}
}
static GLuint
compileKernel(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
const char *kernelDefine,
int workGroupSize) {
GLuint program = glCreateProgram();
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< "#define DST_STRIDE " << dstDesc.stride << "\n"
<< "#define WORK_GROUP_SIZE " << workGroupSize << "\n"
<< kernelDefine << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3] = {"#version 430\n", 0, 0};
shaderSources[1] = defineStr.c_str();
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(program, shader);
GLint linked = 0;
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(shader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(program);
return 0;
}
glDeleteShader(shader);
return program;
}
bool
GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc) {
@ -108,58 +158,55 @@ GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
return false;
}
if (_program) {
glDeleteProgram(_program);
_program = 0;
// create stencil kernel
if (_stencilKernel.program) {
glDeleteProgram(_stencilKernel.program);
}
_program = glCreateProgram();
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< "#define DST_STRIDE " << dstDesc.stride << "\n"
<< "#define WORK_GROUP_SIZE " << _workGroupSize << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3] = {"#version 430\n", 0, 0};
shaderSources[1] = defineStr.c_str();
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(_program, shader);
GLint linked = 0;
glLinkProgram(_program);
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(shader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(_program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(_program);
_program = 0;
return false;
}
glDeleteShader(shader);
_stencilKernel.program = compileKernel(
srcDesc, dstDesc,
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS",
_workGroupSize);
if (_stencilKernel.program == 0) return false;
// store uniform locations for the compute kernel program.
_uniformSizes = glGetUniformLocation(_program, "stencilSizes");
_uniformOffsets = glGetUniformLocation(_program, "stencilOffsets");
_uniformIndices = glGetUniformLocation(_program, "stencilIndices");
_uniformWeights = glGetUniformLocation(_program, "stencilIWeights");
_stencilKernel.uniformSizes =
glGetUniformLocation(_stencilKernel.program, "stencilSizes");
_stencilKernel.uniformOffsets =
glGetUniformLocation(_stencilKernel.program, "stencilOffsets");
_stencilKernel.uniformIndices =
glGetUniformLocation(_stencilKernel.program, "stencilIndices");
_stencilKernel.uniformWeights =
glGetUniformLocation(_stencilKernel.program, "stencilIWeights");
_stencilKernel.uniformStart =
glGetUniformLocation(_stencilKernel.program, "batchStart");
_stencilKernel.uniformEnd =
glGetUniformLocation(_stencilKernel.program, "batchEnd");
_stencilKernel.uniformSrcOffset =
glGetUniformLocation(_stencilKernel.program, "srcOffset");
_stencilKernel.uniformDstOffset =
glGetUniformLocation(_stencilKernel.program, "dstOffset");
_uniformStart = glGetUniformLocation(_program, "batchStart");
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
// create patch kernel
if (_patchKernel.program) {
glDeleteProgram(_patchKernel.program);
}
_patchKernel.program = compileKernel(
srcDesc, dstDesc,
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES",
_workGroupSize);
if (_patchKernel.program == 0) return false;
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
_uniformDstOffset = glGetUniformLocation(_program, "dstOffset");
// uniform locaitons
_patchKernel.uniformSrcOffset =
glGetUniformLocation(_patchKernel.program, "srcOffset");
_patchKernel.uniformDstOffset =
glGetUniformLocation(_patchKernel.program, "dstOffset");
_patchKernel.uniformPatchArray =
glGetUniformLocation(_patchKernel.program, "patchArray");
_patchKernel.uniformDuDesc =
glGetUniformLocation(_patchKernel.program, "dstDuDesc");
_patchKernel.uniformDvDesc =
glGetUniformLocation(_patchKernel.program, "dstDvDesc");
return true;
}
@ -183,7 +230,7 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
GLuint weightsBuffer,
int start,
int end) const {
if (!_program) return false;
if (!_stencilKernel.program) return false;
int count = end - start;
if (count <= 0) {
return true;
@ -196,12 +243,12 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, indicesBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, weightsBuffer);
glUseProgram(_program);
glUseProgram(_stencilKernel.program);
glUniform1i(_uniformStart, start);
glUniform1i(_uniformEnd, end);
glUniform1i(_uniformSrcOffset, srcDesc.offset);
glUniform1i(_uniformDstOffset, dstDesc.offset);
glUniform1i(_stencilKernel.uniformStart, start);
glUniform1i(_stencilKernel.uniformEnd, end);
glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
glUniform1i(_stencilKernel.uniformDstOffset, dstDesc.offset);
glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
@ -218,6 +265,52 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
return true;
}
bool
GLComputeEvaluator::EvalPatches(
GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexBuffer,
GLuint patchParamsBuffer) const {
if (!_patchKernel.program) return false;
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, patchCoordsBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchIndexBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchParamsBuffer);
glUseProgram(_patchKernel.program);
glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset);
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
(const GLint*)&patchArrays[0]);
glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride);
glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride);
glDispatchCompute((numPatchCoords + _workGroupSize - 1) / _workGroupSize, 1, 1);
glUseProgram(0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0);
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -28,6 +28,7 @@
#include "../version.h"
#include "../osd/opengl.h"
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
@ -92,26 +93,32 @@ public:
/// Destructor. note that the GL context must be made current.
~GLComputeEvaluator();
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a
/// const float pointer for read
/// GL buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a
/// float pointer for write
/// GL buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param evaluator cached compiled instance. Clients are supposed to
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
@ -119,25 +126,25 @@ public:
///
/// @param deviceContext not used in the GLSL kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GLComputeEvaluator const *instance,
void * deviceContext = NULL) {
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GLComputeEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
return instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
} else {
// Create a kernel on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
bool r = instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
delete instance;
return r;
@ -148,15 +155,14 @@ public:
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcVertexBuffer->BindVBO(),
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcBuffer->BindVBO(),
srcDesc,
dstVertexBuffer->BindVBO(),
dstBuffer->BindVBO(),
dstDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
@ -168,10 +174,8 @@ public:
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(GLuint srcBuffer,
VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer,
VertexBufferDescriptor const &dstDesc,
bool EvalStencils(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint sizesBuffer,
GLuint offsetsBuffer,
GLuint indicesBuffer,
@ -179,6 +183,271 @@ public:
int start,
int end) const;
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
///
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a GL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a GL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GLComputeEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a GL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a GL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer
///
/// @param duDesc
///
/// @param dvBuffer
///
/// @param dvDesc
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GLComputeEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBOBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
dstBuffer->BindVBO(), dstDesc,
0, VertexBufferDescriptor(),
0, VertexBufferDescriptor(),
numPatchCoords,
patchCoords->BindVBO(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivatives buffer
/// must have BindVBO() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output V-derivatives buffer
/// must have BindVBO() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable GLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
dstBuffer->BindVBO(), dstDesc,
duBuffer->BindVBO(), duDesc,
dvBuffer->BindVBO(), dvDesc,
numPatchCoords,
patchCoords->BindVBO(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
bool EvalPatches(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexBuffer,
GLuint patchParamsBuffer) const;
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// Configure GLSL kernel. A valid GL context must be made current before
/// calling this function. Returns false if it fails to compile the kernel.
bool Compile(VertexBufferDescriptor const &srcDesc,
@ -188,18 +457,27 @@ public:
static void Synchronize(void *deviceContext);
private:
GLuint _program;
struct _StencilKernel {
GLuint program;
GLuint uniformSizes;
GLuint uniformOffsets;
GLuint uniformIndices;
GLuint uniformWeights;
GLuint uniformStart;
GLuint uniformEnd;
GLuint uniformSrcOffset;
GLuint uniformDstOffset;
} _stencilKernel;
GLuint _uniformSizes, // stencil table
_uniformOffsets,
_uniformIndices,
_uniformWeights,
struct _PatchKernel {
GLuint program;
GLuint uniformSrcOffset;
GLuint uniformDstOffset;
GLuint uniformPatchArray;
GLuint uniformDuDesc;
GLuint uniformDvDesc;
_uniformStart, // range
_uniformEnd,
_uniformSrcOffset, // src buffer offset (in elements)
_uniformDstOffset; // dst buffer offset (in elements)
} _patchKernel;
int _workGroupSize;
};

View File

@ -26,6 +26,7 @@
#include "../far/patchTable.h"
#include "../osd/opengl.h"
#include "../osd/cpuPatchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -33,11 +34,14 @@ namespace OPENSUBDIV_VERSION {
namespace Osd {
GLPatchTable::GLPatchTable() :
_indexBuffer(0), _patchParamTexture(0) {
_patchIndexBuffer(0), _patchParamBuffer(0),
_patchIndexTexture(0), _patchParamTexture(0) {
}
GLPatchTable::~GLPatchTable() {
if (_indexBuffer) glDeleteBuffers(1, &_indexBuffer);
if (_patchIndexBuffer) glDeleteBuffers(1, &_patchIndexBuffer);
if (_patchParamBuffer) glDeleteBuffers(1, &_patchParamBuffer);
if (_patchIndexTexture) glDeleteTextures(1, &_patchIndexTexture);
if (_patchParamTexture) glDeleteTextures(1, &_patchParamTexture);
}
@ -52,84 +56,56 @@ GLPatchTable::Create(Far::PatchTable const *farPatchTable,
bool
GLPatchTable::allocate(Far::PatchTable const *farPatchTable) {
glGenBuffers(1, &_indexBuffer);
glGenBuffers(1, &_patchIndexBuffer);
glGenBuffers(1, &_patchParamBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _indexBuffer);
std::vector<int> buffer;
std::vector<unsigned int> ppBuffer;
CpuPatchTable patchTable(farPatchTable);
// needs reserve?
size_t numPatchArrays = patchTable.GetNumPatchArrays();
GLsizei indexSize = (GLsizei)patchTable.GetPatchIndexSize();
GLsizei patchParamSize = (GLsizei)patchTable.GetPatchParamSize();
int nPatchArrays = farPatchTable->GetNumPatchArrays();
// for each patchArray
for (int j = 0; j < nPatchArrays; ++j) {
PatchArray patchArray(farPatchTable->GetPatchArrayDescriptor(j),
farPatchTable->GetNumPatches(j),
(int)buffer.size(),
(int)ppBuffer.size()/3);
_patchArrays.push_back(patchArray);
// indices
Far::ConstIndexArray indices = farPatchTable->GetPatchArrayVertices(j);
for (int k = 0; k < indices.size(); ++k) {
buffer.push_back(indices[k]);
}
// patchParams
#if 0
// XXX: we need sharpness interface for patcharray or put sharpness
// into patchParam.
Far::ConstPatchParamArray patchParams =
farPatchTable->GetPatchParams(j);
for (int k = 0; k < patchParams.size(); ++k) {
float sharpness = 0.0;
ppBuffer.push_back(patchParams[k].faceIndex);
ppBuffer.push_back(patchParams[k].bitField.field);
ppBuffer.push_back(*((unsigned int *)&sharpness));
}
#else
// XXX: workaround. GetPatchParamTable() will be deprecated though.
Far::PatchParamTable const & patchParamTable =
farPatchTable->GetPatchParamTable();
std::vector<Far::Index> const &sharpnessIndexTable =
farPatchTable->GetSharpnessIndexTable();
int numPatches = farPatchTable->GetNumPatches(j);
for (int k = 0; k < numPatches; ++k) {
float sharpness = 0.0;
int patchIndex = (int)ppBuffer.size()/3;
if (patchIndex < (int)sharpnessIndexTable.size()) {
int sharpnessIndex = sharpnessIndexTable[patchIndex];
if (sharpnessIndex >= 0)
sharpness = farPatchTable->GetSharpnessValues()[sharpnessIndex];
}
ppBuffer.push_back(patchParamTable[patchIndex].faceIndex);
ppBuffer.push_back(patchParamTable[patchIndex].bitField.field);
ppBuffer.push_back(*((unsigned int *)&sharpness));
}
#endif
}
// copy patch array
_patchArrays.insert(_patchArrays.end(),
patchTable.GetPatchArrayBuffer(),
patchTable.GetPatchArrayBuffer() + numPatchArrays);
// copy index buffer
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _patchIndexBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER,
(int)buffer.size()*sizeof(int), &buffer[0], GL_STATIC_DRAW);
// patchParam is currently expected to be texture (it can be SSBO)
GLuint texBuffer = 0;
glGenBuffers(1, &texBuffer);
glBindBuffer(GL_ARRAY_BUFFER, texBuffer);
glBufferData(GL_ARRAY_BUFFER, ppBuffer.size()*sizeof(unsigned int),
&ppBuffer[0], GL_STATIC_DRAW);
glGenTextures(1, &_patchParamTexture);
glBindTexture(GL_TEXTURE_BUFFER, _patchParamTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGB32I, texBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glDeleteBuffers(1, &texBuffer);
indexSize * sizeof(GLint),
patchTable.GetPatchIndexBuffer(),
GL_STATIC_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
// copy patchparam buffer
glBindBuffer(GL_ARRAY_BUFFER, _patchParamBuffer);
glBufferData(GL_ARRAY_BUFFER,
patchParamSize * sizeof(PatchParam),
patchTable.GetPatchParamBuffer(),
GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
// make both buffer as texture buffers too.
glGenTextures(1, &_patchIndexTexture);
glGenTextures(1, &_patchParamTexture);
GLuint buffer;
glGenBuffers(1, &buffer);
glBindBuffer(GL_ARRAY_BUFFER, buffer);
glBufferData(GL_ARRAY_BUFFER,
indexSize * sizeof(GLint),
patchTable.GetPatchIndexBuffer(),
GL_STATIC_DRAW);
glBindTexture(GL_TEXTURE_BUFFER, _patchIndexTexture);
// glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, _patchIndexBuffer);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, buffer);
glBindTexture(GL_TEXTURE_BUFFER, _patchParamTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGB32I, _patchParamBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
return true;
}

View File

@ -27,10 +27,9 @@
#include "../version.h"
#include <vector>
#include "../far/patchDescriptor.h"
#include "../osd/nonCopyable.h"
#include "../osd/opengl.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -45,34 +44,6 @@ class GLPatchTable : private NonCopyable<GLPatchTable> {
public:
typedef GLuint VertexBufferBinding;
// XXX: this struct will be further refactored.
class PatchArray {
public:
PatchArray(Far::PatchDescriptor desc, int numPatches,
int indexBase, int primitiveIdBase) :
desc(desc), numPatches(numPatches), indexBase(indexBase),
primitiveIdBase(primitiveIdBase) {}
Far::PatchDescriptor const &GetDescriptor() const {
return desc;
}
int GetNumPatches() const {
return numPatches;
}
int GetIndexBase() const {
return indexBase;
}
int GetPrimitiveIdBase() const {
return primitiveIdBase;
}
private:
Far::PatchDescriptor desc;
int numPatches;
int indexBase; // an offset within the index buffer
int primitiveIdBase; // an offset within the patch param buffer
};
typedef std::vector<PatchArray> PatchArrayVector;
GLPatchTable();
~GLPatchTable();
static GLPatchTable *Create(Far::PatchTable const *farPatchTable,
@ -84,7 +55,17 @@ public:
/// Returns the GL index buffer containing the patch control vertices
GLuint GetPatchIndexBuffer() const {
return _indexBuffer;
return _patchIndexBuffer;
}
/// Returns the GL index buffer containing the patch parameter
GLuint GetPatchParamBuffer() const {
return _patchParamBuffer;
}
/// Returns the GL texture buffer containing the patch control vertices
GLuint GetPatchIndexTextureBuffer() const {
return _patchIndexTexture;
}
/// Returns the GL texture buffer containing the patch parameter
@ -93,11 +74,17 @@ public:
}
protected:
GLPatchTable();
// allocate buffers from patchTable
bool allocate(Far::PatchTable const *farPatchTable);
PatchArrayVector _patchArrays;
GLuint _indexBuffer;
GLuint _patchIndexBuffer;
GLuint _patchParamBuffer;
GLuint _patchIndexTexture;
GLuint _patchParamTexture;
};

View File

@ -108,37 +108,36 @@ GLStencilTableTBO::~GLStencilTableTBO() {
// ---------------------------------------------------------------------------
GLXFBEvaluator::GLXFBEvaluator() :
_program(0), _srcBufferTexture(0),
_uniformSrcBufferTexture(0), _uniformSizesTexture(0),
_uniformOffsetsTexture(0), _uniformIndicesTexture(0),
_uniformWeightsTexture(0), _uniformStart(0), _uniformEnd(0),
_uniformSrcOffset(0) {
GLXFBEvaluator::GLXFBEvaluator() : _srcBufferTexture(0) {
memset (&_stencilKernel, 0, sizeof(_stencilKernel));
memset (&_patchKernel, 0, sizeof(_patchKernel));
}
GLXFBEvaluator::~GLXFBEvaluator() {
if (_program) {
glDeleteProgram(_program);
if (_stencilKernel.program) {
glDeleteProgram(_stencilKernel.program);
}
if (_patchKernel.program) {
glDeleteProgram(_patchKernel.program);
}
if (_srcBufferTexture) {
glDeleteTextures(1, &_srcBufferTexture);
}
}
bool
GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc) {
if (_program) {
glDeleteProgram(_program);
_program = 0;
}
_program = glCreateProgram();
static GLuint
compileKernel(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
const char *kernelDefine) {
GLuint program = glCreateProgram();
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n";
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< kernelDefine << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3] = {"#version 410\n", NULL, NULL};
@ -147,7 +146,7 @@ GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(_program, shader);
glAttachShader(program, shader);
std::vector<std::string> outputs;
std::vector<const char *> pOutputs;
@ -183,40 +182,83 @@ GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
}
}
glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(),
glTransformFeedbackVaryings(program, (GLsizei)outputs.size(),
&pOutputs[0], GL_INTERLEAVED_ATTRIBS);
GLint linked = 0;
glLinkProgram(_program);
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(shader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(_program, 1024, NULL, buffer);
glGetProgramInfoLog(program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(_program);
_program = 0;
return false;
glDeleteProgram(program);
program = 0;
}
glDeleteShader(shader);
// set uniform locations for compute kernels
_uniformSrcBufferTexture = glGetUniformLocation(_program, "vertexBuffer");
return program;
}
_uniformSizesTexture = glGetUniformLocation(_program, "sizes");
_uniformOffsetsTexture = glGetUniformLocation(_program, "offsets");
_uniformIndicesTexture = glGetUniformLocation(_program, "indices");
_uniformWeightsTexture = glGetUniformLocation(_program, "weights");
bool
GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc) {
_uniformStart = glGetUniformLocation(_program, "batchStart");
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
// create stencil kernel
if (_stencilKernel.program) {
glDeleteProgram(_stencilKernel.program);
}
_stencilKernel.program = compileKernel(
srcDesc, dstDesc,
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS");
if (_stencilKernel.program == 0) return false;
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
// cache uniform locations
_stencilKernel.uniformSrcBufferTexture
= glGetUniformLocation(_stencilKernel.program, "vertexBuffer");
_stencilKernel.uniformSrcOffset
= glGetUniformLocation(_stencilKernel.program, "srcOffset");
_stencilKernel.uniformSizesTexture
= glGetUniformLocation(_stencilKernel.program, "sizes");
_stencilKernel.uniformOffsetsTexture
= glGetUniformLocation(_stencilKernel.program, "offsets");
_stencilKernel.uniformIndicesTexture
= glGetUniformLocation(_stencilKernel.program, "indices");
_stencilKernel.uniformWeightsTexture
= glGetUniformLocation(_stencilKernel.program, "weights");
_stencilKernel.uniformStart
= glGetUniformLocation(_stencilKernel.program, "batchStart");
_stencilKernel.uniformEnd
= glGetUniformLocation(_stencilKernel.program, "batchEnd");
// create patch kernel
if (_patchKernel.program) {
glDeleteProgram(_patchKernel.program);
}
_patchKernel.program = compileKernel(
srcDesc, dstDesc,
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES");
if (_patchKernel.program == 0) return false;
// cache uniform locations
_patchKernel.uniformSrcBufferTexture
= glGetUniformLocation(_patchKernel.program, "vertexBuffer");
_patchKernel.uniformSrcOffset
= glGetUniformLocation(_patchKernel.program, "srcOffset");
_patchKernel.uniformPatchArray
= glGetUniformLocation(_patchKernel.program, "patchArray");
_patchKernel.uniformPatchParamTexture
= glGetUniformLocation(_patchKernel.program, "patchParamBuffer");
_patchKernel.uniformPatchIndexTexture
= glGetUniformLocation(_patchKernel.program, "patchIndexBuffer");
// create a texture for input buffer
if (!_srcBufferTexture) {
@ -255,7 +297,7 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
GLuint weightsTexture,
int start,
int end) const {
if (!_program) return false;
if (!_stencilKernel.program) return false;
int count = end - start;
if (count <= 0) {
return true;
@ -268,25 +310,25 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
glBindVertexArray(vao);
glEnable(GL_RASTERIZER_DISCARD);
glUseProgram(_program);
glUseProgram(_stencilKernel.program);
// Set input VBO as a texture buffer.
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
bindTexture(_uniformSrcBufferTexture, _srcBufferTexture, 0);
bindTexture(_stencilKernel.uniformSrcBufferTexture, _srcBufferTexture, 0);
// bind stencil table textures.
bindTexture(_uniformSizesTexture, sizesTexture, 1);
bindTexture(_uniformOffsetsTexture, offsetsTexture, 2);
bindTexture(_uniformIndicesTexture, indicesTexture, 3);
bindTexture(_uniformWeightsTexture, weightsTexture, 4);
bindTexture(_stencilKernel.uniformSizesTexture, sizesTexture, 1);
bindTexture(_stencilKernel.uniformOffsetsTexture, offsetsTexture, 2);
bindTexture(_stencilKernel.uniformIndicesTexture, indicesTexture, 3);
bindTexture(_stencilKernel.uniformWeightsTexture, weightsTexture, 4);
// set batch range
glUniform1i(_uniformStart, start);
glUniform1i(_uniformEnd, end);
glUniform1i(_uniformSrcOffset, srcDesc.offset);
glUniform1i(_stencilKernel.uniformStart, start);
glUniform1i(_stencilKernel.uniformEnd, end);
glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
// The destination buffer is bound at vertex boundary.
//
@ -347,6 +389,94 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
return true;
}
bool
GLXFBEvaluator::EvalPatches(
GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint duBuffer, VertexBufferDescriptor const & /*duDesc*/,
GLuint dvBuffer, VertexBufferDescriptor const & /*dvDesc*/,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexTexture,
GLuint patchParamTexture) const {
if (!_patchKernel.program) return false;
if (duBuffer != 0 || dvBuffer != 0) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"GLXFBEvaluator doesn't support derivative evaluation yet.\n");
}
// bind vertex array
// always create new one, to be safe with multiple contexts (slow though)
GLuint vao = 0;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glEnable(GL_RASTERIZER_DISCARD);
glUseProgram(_patchKernel.program);
// Set input VBO as a texture buffer.
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
bindTexture(_patchKernel.uniformSrcBufferTexture, _srcBufferTexture, 0);
// bind patch index and patch param textures.
bindTexture(_patchKernel.uniformPatchParamTexture, patchParamTexture, 1);
bindTexture(_patchKernel.uniformPatchIndexTexture, patchIndexTexture, 2);
// set other uniforms
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
(const GLint*)&patchArrays[0]);
glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
// input patchcoords
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
int stride = sizeof(int) * 5; // patchcoord = int*5 struct
glBindBuffer(GL_ARRAY_BUFFER, patchCoordsBuffer);
glVertexAttribIPointer(0, 3, GL_UNSIGNED_INT, stride, (void*)0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, stride, (void*)(sizeof(int)*3));
int dstBufferBindOffset =
dstDesc.offset - (dstDesc.offset % dstDesc.stride);
// bind destination buffer
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
0, dstBuffer,
dstBufferBindOffset * sizeof(float),
numPatchCoords * dstDesc.stride * sizeof(float));
glBeginTransformFeedback(GL_POINTS);
glDrawArrays(GL_POINTS, 0, numPatchCoords);
glEndTransformFeedback();
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
// unbind textures
for (int i = 0; i < 3; ++i) {
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
glDisable(GL_RASTERIZER_DISCARD);
glUseProgram(0);
glActiveTexture(GL_TEXTURE0);
glDisableVertexAttribArray(0);
glDisableVertexAttribArray(1);
// revert vao
glBindVertexArray(0);
glDeleteVertexArrays(1, &vao);
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -28,6 +28,7 @@
#include "../version.h"
#include "../osd/opengl.h"
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
@ -93,6 +94,12 @@ public:
/// Destructor. note that the GL context must be made current.
~GLXFBEvaluator();
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static stencil function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
@ -120,25 +127,25 @@ public:
///
/// @param deviceContext not used in the GLSLTransformFeedback kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GLXFBEvaluator const *instance,
void * deviceContext = NULL) {
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GLXFBEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
return instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
bool r = instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
delete instance;
return r;
@ -147,18 +154,30 @@ public:
}
}
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcVertexBuffer->BindVBO(),
srcDesc,
dstVertexBuffer->BindVBO(),
dstDesc,
/// \brief dispatch eval stencils function.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a GL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a GL
/// buffer object for destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcBuffer->BindVBO(), srcDesc,
dstBuffer->BindVBO(), dstDesc,
stencilTable->GetSizesTexture(),
stencilTable->GetOffsetsTexture(),
stencilTable->GetIndicesTexture(),
@ -167,12 +186,31 @@ public:
/* end = */ stencilTable->GetNumStencils());
}
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(GLuint srcBuffer,
VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer,
VertexBufferDescriptor const &dstDesc,
/// \brief Static eval stencils function, dispatch the GLSL XFB kernel on
/// on GPU asynchronously.
///
/// @param srcBuffer GL buffer of input primvars.
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer GL buffer of output primvars.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param sizesBuffer GL buffer of the sizes in the stencil table
///
/// @param offsetsBuffer GL buffer of the offsets in the stencil table
///
/// @param indicesBuffer GL buffer of the indices in the stencil table
///
/// @param weightsBuffer GL buffer of the weifgrs in the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
bool EvalStencils(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint sizesBuffer,
GLuint offsetsBuffer,
GLuint indicesBuffer,
@ -180,6 +218,270 @@ public:
int start,
int end) const;
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
///
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a GL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a GL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GLXFBEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a GL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a GL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer
///
/// @param duDesc
///
/// @param dvBuffer
///
/// @param dvDesc
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GLXFBEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCudaBuffer() method returning an
/// array of PatchCoord struct in cuda memory.
///
/// @param patchTable GLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
dstBuffer->BindVBO(), dstDesc,
0, VertexBufferDescriptor(),
0, VertexBufferDescriptor(),
numPatchCoords,
patchCoords->BindVBO(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexTextureBuffer(),
patchTable->GetPatchParamTextureBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output s-derivatives buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output t-derivatives buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable GLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
dstBuffer->BindVBO(), dstDesc,
duBuffer->BindVBO(), duDesc,
dvBuffer->BindVBO(), dvDesc,
numPatchCoords,
patchCoords->BindVBO(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexTextureBuffer(),
patchTable->GetPatchParamTextureBuffer());
}
bool EvalPatches(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexBuffer,
GLuint patchParamsBuffer) const;
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// Configure GLSL kernel. A valid GL context must be made current before
/// calling this function. Returns false if it fails to compile the kernel.
bool Compile(VertexBufferDescriptor const &srcDesc,
@ -189,19 +491,31 @@ public:
static void Synchronize(void *kernel);
private:
GLuint _program;
GLuint _srcBufferTexture;
GLuint _uniformSrcBufferTexture;
GLuint _uniformSizesTexture;
GLuint _uniformOffsetsTexture;
GLuint _uniformIndicesTexture;
GLuint _uniformWeightsTexture;
struct _StencilKernel {
GLuint program;
GLuint uniformSrcBufferTexture;
GLuint uniformSrcOffset; // src buffer offset (in elements)
GLuint uniformSizesTexture;
GLuint uniformOffsetsTexture;
GLuint uniformIndicesTexture;
GLuint uniformWeightsTexture;
GLuint uniformStart; // range
GLuint uniformEnd;
} _stencilKernel;
struct _PatchKernel {
GLuint program;
GLuint uniformSrcBufferTexture;
GLuint uniformSrcOffset; // src buffer offset (in elements)
GLuint uniformPatchArray;
GLuint uniformPatchParamTexture;
GLuint uniformPatchIndexTexture;
} _patchKernel;
GLuint _uniformStart; // range
GLuint _uniformEnd;
GLuint _uniformSrcOffset; // src buffer offset (in elements)
};
} // end namespace Osd

View File

@ -24,18 +24,10 @@
//------------------------------------------------------------------------------
uniform int batchStart = 0;
uniform int batchEnd = 0;
uniform int srcOffset = 0;
uniform int dstOffset = 0;
layout(binding=0) buffer src_buffer { float srcVertexBuffer[]; };
layout(binding=1) buffer dst_buffer { float dstVertexBuffer[]; };
layout(binding=2) buffer stencilSizes { int _sizes[]; };
layout(binding=3) buffer stencilOffsets { int _offsets[]; };
layout(binding=4) buffer stencilIndices { int _indices[]; };
layout(binding=5) buffer stencilWeights { float _weights[]; };
layout(local_size_x=WORK_GROUP_SIZE, local_size_y=1, local_size_z=1) in;
//------------------------------------------------------------------------------
@ -73,6 +65,15 @@ void addWithWeight(inout Vertex v, const Vertex src, float weight) {
}
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
uniform int batchStart = 0;
uniform int batchEnd = 0;
layout(binding=2) buffer stencilSizes { int _sizes[]; };
layout(binding=3) buffer stencilOffsets { int _offsets[]; };
layout(binding=4) buffer stencilIndices { int _indices[]; };
layout(binding=5) buffer stencilWeights { float _weights[]; };
void main() {
int current = int(gl_GlobalInvocationID.x) + batchStart;
@ -94,4 +95,194 @@ void main() {
writeVertex(current, dst);
}
#endif
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES)
// PERFORMANCE: stride could be constant, but not as significant as length
//struct PatchArray {
// int patchType;
// int numPatches;
// int indexBase; // an offset within the index buffer
// int primitiveIdBase; // an offset within the patch param buffer
//};
// # of patcharrays is 1 or 2.
uniform ivec4 patchArray[2];
uniform ivec3 dstDuDesc;
uniform ivec3 dstDvDesc;
layout(binding=2) buffer du_buffer { float dstDuBuffer[]; };
layout(binding=3) buffer dv_buffer { float dstDvBuffer[]; };
struct PatchCoord {
int arrayIndex;
int patchIndex;
int vertIndex;
float s;
float t;
};
struct PatchParam {
int faceIndex;
uint patchBits;
float sharpness;
};
layout(binding=4) buffer patchCoord_buffer { PatchCoord patchCoords[]; };
layout(binding=5) buffer patchIndex_buffer { int patchIndexBuffer[]; };
layout(binding=6) buffer patchParam_buffer { PatchParam patchParamBuffer[]; };
void writeDu(int index, Vertex du) {
int duIndex = dstDuDesc.x + index * dstDuDesc.z;
for (int i = 0; i < LENGTH; ++i) {
dstDuBuffer[duIndex + i] = du.vertexData[i];
}
}
void writeDv(int index, Vertex dv) {
int dvIndex = dstDvDesc.x + index * dstDvDesc.z;
for (int i = 0; i < LENGTH; ++i) {
dstDvBuffer[dvIndex + i] = dv.vertexData[i];
}
}
void getBSplineWeights(float t, inout vec4 point, inout vec4 deriv) {
// The four uniform cubic B-Spline basis functions evaluated at t:
float one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
point.x = one6th * (1.0f - 3.0f*(t - t2) - t3);
point.y = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
point.z = one6th * (1.0f + 3.0f*(t + t2 - t3));
point.w = one6th * ( t3);
// Derivatives of the above four basis functions at t:
deriv.x = -0.5f*t2 + t - 0.5f;
deriv.y = 1.5f*t2 - 2.0f*t;
deriv.z = -1.5f*t2 + t + 0.5f;
deriv.w = 0.5f*t2;
}
uint getDepth(uint patchBits) {
return (patchBits & 0x7);
}
float getParamFraction(uint patchBits) {
uint nonQuadRoot = (patchBits >> 3) & 0x1;
uint depth = getDepth(patchBits);
if (nonQuadRoot == 1) {
return 1.0f / float( 1 << (depth-1) );
} else {
return 1.0f / float( 1 << depth );
}
}
vec2 normalizePatchCoord(uint patchBits, vec2 uv) {
float frac = getParamFraction(patchBits);
uint iu = (patchBits >> 22) & 0x3ff;
uint iv = (patchBits >> 12) & 0x3ff;
// top left corner
float pu = float(iu*frac);
float pv = float(iv*frac);
// normalize u,v coordinates
return vec2((uv.x - pu) / frac, (uv.y - pv) / frac);
}
void adjustBoundaryWeights(uint bits, inout vec4 sWeights, inout vec4 tWeights) {
uint boundary = ((bits >> 4) & 0xf);
if ((boundary & 1) != 0) {
tWeights[2] -= tWeights[0];
tWeights[1] += 2*tWeights[0];
tWeights[0] = 0;
}
if ((boundary & 2) != 0) {
sWeights[1] -= sWeights[3];
sWeights[2] += 2*sWeights[3];
sWeights[3] = 0;
}
if ((boundary & 4) != 0) {
tWeights[1] -= tWeights[3];
tWeights[2] += 2*tWeights[3];
tWeights[3] = 0;
}
if ((boundary & 8) != 0) {
sWeights[2] -= sWeights[0];
sWeights[1] += 2*sWeights[0];
sWeights[0] = 0;
}
}
void main() {
int current = int(gl_GlobalInvocationID.x);
PatchCoord coord = patchCoords[current];
int patchIndex = coord.patchIndex;
ivec4 array = patchArray[coord.arrayIndex];
int patchType = 6; // array.x XXX: REGULAR only for now.
int numControlVertices = 16;
uint patchBits = patchParamBuffer[patchIndex].patchBits;
vec2 uv = normalizePatchCoord(patchBits, vec2(coord.s, coord.t));
float dScale = float(1 << getDepth(patchBits));
float wP[20], wDs[20], wDt[20];
if (patchType == 6) { // REGULAR
vec4 sWeights, tWeights, dsWeights, dtWeights;
getBSplineWeights(uv.x, sWeights, dsWeights);
getBSplineWeights(uv.y, tWeights, dtWeights);
adjustBoundaryWeights(patchBits, sWeights, tWeights);
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 4; ++l) {
wP[4*k+l] = sWeights[l] * tWeights[k];
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
}
}
} else {
// TODO: GREGORY BASIS
}
Vertex dst;
clear(dst);
int indexBase = array.z + coord.vertIndex;
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(dst, readVertex(index), wP[i]);
}
writeVertex(current, dst);
if (dstDuDesc.y > 0) { // length
Vertex du;
clear(du);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(du, readVertex(index), wDs[i]);
}
writeDu(current, du);
}
if (dstDvDesc.y > 0) {
Vertex dv;
clear(dv);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(dv, readVertex(index), wDt[i]);
}
writeDv(current, dv);
}
}
#endif

View File

@ -25,18 +25,8 @@
//------------------------------------------------------------------------------
uniform samplerBuffer vertexBuffer;
out float outVertexBuffer[LENGTH];
uniform usamplerBuffer sizes;
uniform isamplerBuffer offsets;
uniform isamplerBuffer indices;
uniform samplerBuffer weights;
uniform int batchStart = 0;
uniform int batchEnd = 0;
uniform int srcOffset = 0;
out float outVertexBuffer[LENGTH];
//------------------------------------------------------------------------------
@ -72,6 +62,16 @@ void writeVertex(Vertex v) {
}
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS)
uniform usamplerBuffer sizes;
uniform isamplerBuffer offsets;
uniform isamplerBuffer indices;
uniform samplerBuffer weights;
uniform int batchStart = 0;
uniform int batchEnd = 0;
void main() {
int current = gl_VertexID + batchStart;
@ -97,4 +97,146 @@ void main() {
writeVertex(dst);
}
#endif
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES)
layout (location = 0) in ivec3 patchHandles;
layout (location = 1) in vec2 patchCoords;
//struct PatchArray {
// int patchType;
// int numPatches;
// int indexBase; // an offset within the index buffer
// int primitiveIdBase; // an offset within the patch param buffer
//};
// # of patcharrays is 1 or 2.
uniform ivec4 patchArray[2];
uniform isamplerBuffer patchParamBuffer;
uniform isamplerBuffer patchIndexBuffer;
void getBSplineWeights(float t, inout vec4 point, vec4 deriv) {
// The four uniform cubic B-Spline basis functions evaluated at t:
float one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
point.x = one6th * (1.0f - 3.0f*(t - t2) - t3);
point.y = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
point.z = one6th * (1.0f + 3.0f*(t + t2 - t3));
point.w = one6th * ( t3);
// Derivatives of the above four basis functions at t:
/* if (deriv) { */
/* deriv[0] = -0.5f*t2 + t - 0.5f; */
/* deriv[1] = 1.5f*t2 - 2.0f*t; */
/* deriv[2] = -1.5f*t2 + t + 0.5f; */
/* deriv[3] = 0.5f*t2; */
/* } */
}
uint getDepth(uint patchBits) {
return (patchBits & 0x7);
}
float getParamFraction(uint patchBits) {
uint nonQuadRoot = (patchBits >> 3) & 0x1;
uint depth = getDepth(patchBits);
if (nonQuadRoot == 1) {
return 1.0f / float( 1 << (depth-1) );
} else {
return 1.0f / float( 1 << depth );
}
}
vec2 normalizePatchCoord(uint patchBits, vec2 uv) {
float frac = getParamFraction(patchBits);
uint iu = (patchBits >> 22) & 0x3ff;
uint iv = (patchBits >> 12) & 0x3ff;
// top left corner
float pu = float(iu*frac);
float pv = float(iv*frac);
// normalize u,v coordinates
return vec2((uv.x - pu) / frac, (uv.y - pv) / frac);
}
void adjustBoundaryWeights(uint bits, inout vec4 sWeights, inout vec4 tWeights) {
uint boundary = ((bits >> 4) & 0xf);
if ((boundary & 1) != 0) {
tWeights[2] -= tWeights[0];
tWeights[1] += 2*tWeights[0];
tWeights[0] = 0;
}
if ((boundary & 2) != 0) {
sWeights[1] -= sWeights[3];
sWeights[2] += 2*sWeights[3];
sWeights[3] = 0;
}
if ((boundary & 4) != 0) {
tWeights[1] -= tWeights[3];
tWeights[2] += 2*tWeights[3];
tWeights[3] = 0;
}
if ((boundary & 8) != 0) {
sWeights[2] -= sWeights[0];
sWeights[1] += 2*sWeights[0];
sWeights[0] = 0;
}
}
void main() {
int current = gl_VertexID;
ivec3 handle = patchHandles;
int patchIndex = handle.y;
vec2 coord = patchCoords;
ivec4 array = patchArray[handle.x];
int patchType = array.x;
int numControlVertices = 16;
uint patchBits = texelFetch(patchParamBuffer, patchIndex).y;
// normalize
coord = normalizePatchCoord(patchBits, coord);
// XXX: dScale for derivative
// if regular
float wP[20];
{
vec4 sWeights, tWeights, dsWeights, dtWeights;
getBSplineWeights(coord.s, sWeights, dsWeights);
getBSplineWeights(coord.t, tWeights, dtWeights);
adjustBoundaryWeights(patchBits, sWeights, tWeights);
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 4; ++l) {
wP[4*k+l] = sWeights[l] * tWeights[k];
}
}
}
Vertex dst;
clear(dst);
int indexBase = array.z + handle.z;
for (int i = 0; i < numControlVertices; ++i) {
int index = texelFetch(patchIndexBuffer, indexBase + i).x;
addWithWeight(dst, readVertex(index), wP[i]);
}
writeVertex(dst);
}
#endif

View File

@ -171,7 +171,12 @@ public:
for(typename Evaluators::iterator it = _evaluators.begin();
it != _evaluators.end(); ++it) {
if (it->srcDesc.length == srcDesc.length and
// Note: XFB kernel needs to be configured with the local offset
// of the dstDesc to skip preceding primvars.
int dstOffset1 = it->dstDesc.offset % it->dstDesc.stride;
int dstOffset2 = dstDesc.offset % dstDesc.stride;
if (dstOffset1 == dstOffset2 and
it->srcDesc.length == srcDesc.length and
it->srcDesc.stride == srcDesc.stride and
it->dstDesc.length == dstDesc.length and
it->dstDesc.stride == dstDesc.stride) {

View File

@ -24,6 +24,7 @@
#include "../osd/ompEvaluator.h"
#include "../osd/ompKernel.h"
#include "../far/patchBasis.h"
#include <omp.h>
namespace OpenSubdiv {
@ -33,24 +34,212 @@ namespace Osd {
/* static */
bool
OmpEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
if (end <= start) return true;
OmpEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
// we can probably expand cpuKernel.cpp to here.
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
// XXX: we can probably expand cpuKernel.cpp to here.
OmpEvalStencils(src, srcDesc, dst, dstDesc,
sizes, offsets, indices, weights, start, end);
return true;
}
/* static */
bool
OmpEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
if (srcDesc.length != duDesc.length) return false;
if (srcDesc.length != dvDesc.length) return false;
OmpEvalStencils(src, srcDesc,
dst, dstDesc,
du, duDesc,
dv, dvDesc,
sizes, offsets, indices,
weights, duWeights, dvWeights,
start, end);
return true;
}
template <typename T>
struct BufferAdapter {
BufferAdapter(T *p, int length, int stride) :
_p(p), _length(length), _stride(stride) { }
void Clear() {
for (int i = 0; i < _length; ++i) _p[i] = 0;
}
void AddWithWeight(T const *src, float w) {
if (_p) {
// TODO: derivatives.
for (int i = 0; i < _length; ++i) {
_p[i] += src[i] * w;
}
}
}
const T *operator[] (int index) const {
return _p + _stride * index;
}
BufferAdapter<T> & operator ++() {
if (_p) {
_p += _stride;
}
return *this;
}
T *_p;
int _length;
int _stride;
};
/* static */
bool
OmpEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer){
src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
else return false;
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
#pragma omp parallel for
for (int i = 0; i < numPatchCoords; ++i) {
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
float wP[20], wDs[20], wDt[20];
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
int patchType = array.GetPatchType();
// XXX: patchIndex is absolute. not sure it's consistent.
// (should be offsetted by array.primitiveIdBase?)
// patchParamBuffer[array.primitiveIdBase + coord.handle.patchIndex]
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&patchParamBuffer[coord.handle.patchIndex].patchBits;
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
continue;
}
const int *cvs =
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
}
}
return true;
}
/* static */
bool
OmpEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer) {
src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
#pragma omp parallel for
for (int i = 0; i < numPatchCoords; ++i) {
float wP[20], wDs[20], wDt[20];
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
BufferAdapter<float> duT(du + duDesc.stride*i, duDesc.length, duDesc.stride);
BufferAdapter<float> dvT(dv + dvDesc.stride*i, dvDesc.length, dvDesc.stride);
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
int patchType = array.GetPatchType();
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&patchParamBuffer[coord.handle.patchIndex].patchBits;
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
continue;
}
const int *cvs =
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
duT.Clear();
dvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
duT.AddWithWeight(srcT[cvs[j]], wDs[j]);
dvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
}
++dstT;
++duT;
++dvT;
}
return true;
}
/* static */
void
OmpEvaluator::Synchronize(void * /*deviceContext*/) {

View File

@ -28,7 +28,7 @@
#include "../version.h"
#include <cstddef>
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
@ -38,9 +38,15 @@ namespace Osd {
class OmpEvaluator {
public:
/// \brief Generic static compute function. This function has a same
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static eval stencils function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
/// in the same way from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
@ -54,29 +60,27 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the omp kernel
/// @param instance not used in the omp kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the omp kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
OmpEvaluator const * instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused;
(void)deviceContext; // unused;
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const OmpEvaluator *instance = NULL,
void * deviceContext = NULL) {
return EvalStencils(srcVertexBuffer->BindCpuBuffer(),
srcDesc,
dstVertexBuffer->BindCpuBuffer(),
dstDesc,
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
@ -85,17 +89,376 @@ public:
/*end = */ stencilTable->GetNumStencils());
}
/// stencil compute function.
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
/// \brief Static eval stencils function which takes raw CPU pointers for
/// input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
/// to apply for the range [start, end)
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dvBuffer Output V-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the omp kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the omp kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
const OmpEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw CPU pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the omp evaluator
///
/// @param deviceContext not used in the omp evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
OmpEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output V-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the omp evaluator
///
/// @param deviceContext not used in the omp evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
OmpEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
// ideally all buffer classes should have templated by datatype
// so that downcast isn't needed there.
// (e.g. Osd::CpuBuffer<PatchCoord> )
//
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the du buffer
///
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dv buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
static void Synchronize(void *deviceContext = NULL);

View File

@ -117,10 +117,82 @@ OmpEvalStencils(float const * src,
copy(dst, i, threadResult, dstDesc);
}
}
void
OmpEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
float * dstDu,
VertexBufferDescriptor const &dstDuDesc,
float * dstDv,
VertexBufferDescriptor const &dstDvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end) {
if (start > 0) {
sizes += start;
indices += offsets[start];
weights += offsets[start];
duWeights += offsets[start];
dvWeights += offsets[start];
}
src += srcDesc.offset;
dst += dstDesc.offset;
dstDu += dstDuDesc.offset;
dstDv += dstDvDesc.offset;
int numThreads = omp_get_max_threads();
int n = end - start;
float * result = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
float * resultDu = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
float * resultDv = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
#pragma omp parallel for
for (int i = 0; i < n; ++i) {
int index = i + (start > 0 ? start : 0); // Stencil index
// Get thread-local pointers
int const * threadIndices = indices + offsets[index];
float const * threadWeights = weights + offsets[index];
float const * threadWeightsDu = duWeights + offsets[index];
float const * threadWeightsDv = dvWeights + offsets[index];
int threadId = omp_get_thread_num();
float * threadResult = result + threadId*srcDesc.length;
float * threadResultDu = resultDu + threadId*srcDesc.length;
float * threadResultDv = resultDv + threadId*srcDesc.length;
clear(threadResult, dstDesc);
clear(threadResultDu, dstDuDesc);
clear(threadResultDv, dstDvDesc);
for (int j=0; j<(int)sizes[index]; ++j) {
addWithWeight(threadResult, src,
threadIndices[j], threadWeights[j], srcDesc);
addWithWeight(threadResultDu, src,
threadIndices[j], threadWeightsDu[j], srcDesc);
addWithWeight(threadResultDv, src,
threadIndices[j], threadWeightsDv[j], srcDesc);
}
copy(dst, i, threadResult, dstDesc);
copy(dstDu, i, threadResultDu, dstDuDesc);
copy(dstDv, i, threadResultDv, dstDvDesc);
}
}
} // end namespace Osd
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -45,6 +45,23 @@ OmpEvalStencils(float const * src,
float const * weights,
int start, int end);
void
OmpEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
float * dstDu,
VertexBufferDescriptor const &dstDuDesc,
float * dstDv,
VertexBufferDescriptor const &dstDvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end);
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -34,15 +34,15 @@ namespace Osd {
/* static */
bool
TbbEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
TbbEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
if (end <= start) return true;
TbbEvalStencils(src, srcDesc, dst, dstDesc,
@ -51,6 +51,82 @@ TbbEvaluator::EvalStencils(const float *src,
return true;
}
/* static */
bool
TbbEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
if (srcDesc.length != duDesc.length) return false;
if (srcDesc.length != dvDesc.length) return false;
TbbEvalStencils(src, srcDesc,
dst, dstDesc,
du, duDesc,
dv, dvDesc,
sizes, offsets, indices,
weights, duWeights, dvWeights,
start, end);
return true;
}
/* static */
bool
TbbEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
if (srcDesc.length != dstDesc.length) return false;
TbbEvalPatches(src, srcDesc, dst, dstDesc,
NULL, VertexBufferDescriptor(),
NULL, VertexBufferDescriptor(),
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
return true;
}
/* static */
bool
TbbEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
if (srcDesc.length != dstDesc.length) return false;
TbbEvalPatches(src, srcDesc, dst, dstDesc,
du, duDesc, dv, dvDesc,
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
return true;
}
/* static */
void
TbbEvaluator::Synchronize(void *) {

View File

@ -26,7 +26,9 @@
#define OPENSUBDIV3_OSD_TBB_EVALUATOR_H
#include "../version.h"
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
#include "../far/patchTable.h"
#include <cstddef>
@ -37,9 +39,15 @@ namespace Osd {
class TbbEvaluator {
public:
/// \brief Generic static stencil eval function. This function has a same
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static eval stencils function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
/// in the same way from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
@ -55,27 +63,25 @@ public:
///
/// @param stencilTable stencil table to be applied.
///
/// @param instance not used in the tbb kernel
/// @param instance not used in the tbb kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the tbb kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
TbbEvaluator const *instance = NULL,
void *deviceContext = NULL) {
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
TbbEvaluator const *instance = NULL,
void *deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcVertexBuffer->BindCpuBuffer(),
srcDesc,
dstVertexBuffer->BindCpuBuffer(),
dstDesc,
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
@ -84,19 +90,381 @@ public:
/*end = */ stencilTable->GetNumStencils());
}
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int *sizes,
const int *offsets,
const int *indices,
const float *weights,
int start,
int end);
/// \brief Static eval stencils function which takes raw CPU pointers for
/// input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
/// to apply for the range [start, end)
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int *sizes,
const int *offsets,
const int *indices,
const float *weights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dvBuffer Output V-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
///
/// @param instance not used in the tbb kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the tbb kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
const TbbEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw CPU pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output s-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dv Output t-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
/// to apply for the range [start, end)
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the u-weights buffer of the stencil table
///
/// @param dvWeights pointer to the v-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
TbbEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output s-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output t-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
TbbEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(
srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output s-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the du buffer
///
/// @param dv Output t-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dv buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// \brief synchronize all asynchronous computation invoked on this device.
static void Synchronize(void *deviceContext = NULL);
/// \brief initialize tbb task schedular
/// (optional: client may use tbb::task_scheduler_init)
///
/// @param numThreads how many threads
///
static void SetNumThreads(int numThreads);
};

View File

@ -24,7 +24,9 @@
#include "../osd/cpuKernel.h"
#include "../osd/tbbKernel.h"
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
#include "../far/patchBasis.h"
#include <cassert>
#include <cstdlib>
@ -187,6 +189,274 @@ TbbEvalStencils(float const * src,
tbb::parallel_for(range, kernel);
}
void
TbbEvalStencils(float const * src, VertexBufferDescriptor const &srcDesc,
float * dst, VertexBufferDescriptor const &dstDesc,
float * du, VertexBufferDescriptor const &duDesc,
float * dv, VertexBufferDescriptor const &dvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end) {
if (start > 0) {
sizes += start;
indices += offsets[start];
weights += offsets[start];
duWeights += offsets[start];
dvWeights += offsets[start];
}
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
// PERFORMANCE: need to combine 3 launches together
if (dst) {
TBBStencilKernel kernel(src, srcDesc, dst, dstDesc,
sizes, offsets, indices, weights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
if (du) {
TBBStencilKernel kernel(src, srcDesc, du, duDesc,
sizes, offsets, indices, duWeights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
if (dv) {
TBBStencilKernel kernel(src, srcDesc, dv, dvDesc,
sizes, offsets, indices, dvWeights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
}
// ---------------------------------------------------------------------------
template <typename T>
struct BufferAdapter {
BufferAdapter(T *p, int length, int stride) :
_p(p), _length(length), _stride(stride) { }
void Clear() {
for (int i = 0; i < _length; ++i) _p[i] = 0;
}
void AddWithWeight(T const *src, float w) {
if (_p) {
for (int i = 0; i < _length; ++i) {
_p[i] += src[i] * w;
}
}
}
const T *operator[] (int index) const {
return _p + _stride * index;
}
BufferAdapter<T> & operator ++() {
if (_p) {
_p += _stride;
}
return *this;
}
T *_p;
int _length;
int _stride;
};
class TbbEvalPatchesKernel {
VertexBufferDescriptor _srcDesc;
VertexBufferDescriptor _dstDesc;
VertexBufferDescriptor _dstDuDesc;
VertexBufferDescriptor _dstDvDesc;
float const * _src;
float * _dst;
float * _dstDu;
float * _dstDv;
int _numPatchCoords;
const PatchCoord *_patchCoords;
const PatchArray *_patchArrayBuffer;
const int *_patchIndexBuffer;
const PatchParam *_patchParamBuffer;
public:
TbbEvalPatchesKernel(float const *src,
VertexBufferDescriptor srcDesc,
float *dst,
VertexBufferDescriptor dstDesc,
float *dstDu,
VertexBufferDescriptor dstDuDesc,
float *dstDv,
VertexBufferDescriptor dstDvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) :
_srcDesc(srcDesc), _dstDesc(dstDesc),
_dstDuDesc(dstDuDesc), _dstDvDesc(dstDvDesc),
_src(src), _dst(dst), _dstDu(dstDu), _dstDv(dstDv),
_numPatchCoords(numPatchCoords),
_patchCoords(patchCoords),
_patchArrayBuffer(patchArrayBuffer),
_patchIndexBuffer(patchIndexBuffer),
_patchParamBuffer(patchParamBuffer) {
}
void operator() (tbb::blocked_range<int> const &r) const {
if (_dstDu == NULL && _dstDv == NULL) {
compute(r);
} else {
computeWithDerivative(r);
}
}
void compute(tbb::blocked_range<int> const &r) const {
float wP[20], wDs[20], wDt[20];
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
_srcDesc.length,
_srcDesc.stride);
BufferAdapter<float> dstT(_dst + _dstDesc.offset
+ r.begin() * _dstDesc.stride,
_dstDesc.length,
_dstDesc.stride);
BufferAdapter<float> dstDuT(_dstDu,
_dstDuDesc.length,
_dstDuDesc.stride);
BufferAdapter<float> dstDvT(_dstDv,
_dstDvDesc.length,
_dstDvDesc.stride);
for (int i = r.begin(); i < r.end(); ++i) {
PatchCoord const &coord = _patchCoords[i];
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
int patchType = array.GetPatchType();
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&_patchParamBuffer[coord.handle.patchIndex].patchBits;
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
assert(0);
}
const int *cvs =
&_patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
}
++dstT;
}
}
void computeWithDerivative(tbb::blocked_range<int> const &r) const {
float wP[20], wDs[20], wDt[20];
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
_srcDesc.length,
_srcDesc.stride);
BufferAdapter<float> dstT(_dst + _dstDesc.offset
+ r.begin() * _dstDesc.stride,
_dstDesc.length,
_dstDesc.stride);
BufferAdapter<float> dstDuT(_dstDu + _dstDuDesc.offset
+ r.begin() * _dstDuDesc.stride,
_dstDuDesc.length,
_dstDuDesc.stride);
BufferAdapter<float> dstDvT(_dstDv + _dstDvDesc.offset
+ r.begin() * _dstDvDesc.stride,
_dstDvDesc.length,
_dstDvDesc.stride);
for (int i = r.begin(); i < r.end(); ++i) {
PatchCoord const &coord = _patchCoords[i];
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
int patchType = array.GetPatchType();
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&_patchParamBuffer[coord.handle.patchIndex].patchBits;
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
assert(0);
}
const int *cvs =
&_patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
dstDuT.Clear();
dstDvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
dstDuT.AddWithWeight(srcT[cvs[j]], wDs[j]);
dstDvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
}
++dstT;
++dstDuT;
++dstDvT;
}
}
};
void
TbbEvalPatches(float const *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDu,
VertexBufferDescriptor const &dstDuDesc,
float *dstDv,
VertexBufferDescriptor const &dstDvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
TbbEvalPatchesKernel kernel(src, srcDesc, dst, dstDesc,
dstDu, dstDuDesc, dstDv, dstDvDesc,
numPatchCoords, patchCoords,
patchArrayBuffer,
patchIndexBuffer,
patchParamBuffer);
tbb::blocked_range<int> range(0, numPatchCoords, grain_size);
tbb::parallel_for(range, kernel);
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -32,6 +32,9 @@ namespace OPENSUBDIV_VERSION {
namespace Osd {
struct PatchArray;
struct PatchCoord;
struct PatchParam;
struct VertexBufferDescriptor;
void
@ -45,6 +48,38 @@ TbbEvalStencils(float const * src,
float const * weights,
int start, int end);
void
TbbEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
float * dstDu,
VertexBufferDescriptor const &dstDuDesc,
float * dstDv,
VertexBufferDescriptor const &dstDvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end);
void
TbbEvalPatches(float const *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDu,
VertexBufferDescriptor const &dstDuDesc,
float *dstDv,
VertexBufferDescriptor const &dstDvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

111
opensubdiv/osd/types.h Normal file
View File

@ -0,0 +1,111 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_TYPES_H
#define OPENSUBDIV3_OSD_TYPES_H
#include "../version.h"
#include "../far/patchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Coordinates set on a patch table
///
/// XXX: this class may be moved into Far
///
struct PatchCoord {
// 5-ints struct.
/// \brief Constructor
///
/// @param handle patch handle
///
/// @param s parametric location on the patch
///
/// @param t parametric location on the patch
///
PatchCoord(Far::PatchTable::PatchHandle handle, float s, float t) :
handle(handle), s(s), t(t) { }
PatchCoord() : s(0), t(0) {
handle.arrayIndex = 0;
handle.patchIndex = 0;
handle.vertIndex = 0;
}
Far::PatchTable::PatchHandle handle; ///< patch handle
float s, t; ///< parametric location on patch
};
struct PatchArray {
// 4-ints struct.
PatchArray(Far::PatchDescriptor desc, int numPatches,
int indexBase, int primitiveIdBase) :
desc(desc), numPatches(numPatches), indexBase(indexBase),
primitiveIdBase(primitiveIdBase) {}
Far::PatchDescriptor const &GetDescriptor() const {
return desc;
}
int GetPatchType() const {
return desc.GetType();
}
int GetNumPatches() const {
return numPatches;
}
int GetIndexBase() const {
return indexBase;
}
int GetPrimitiveIdBase() const {
return primitiveIdBase;
}
Far::PatchDescriptor desc;
int numPatches;
int indexBase; // an offset within the index buffer
int primitiveIdBase; // an offset within the patch param buffer
};
struct PatchParam {
// int3 struct.
int faceIndex;
unsigned int patchBits;
float sharpness;
};
typedef std::vector<PatchArray> PatchArrayVector;
typedef std::vector<PatchParam> PatchParamVector;
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV3_OSD_TYPES_H