mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2025-01-09 00:00:18 +00:00
Osd API refactor: EvalStencils and EvalPatches
Add EvalStencils and EvalPatches API for most of CPU and GPU evaluators. with this change, Eval API in the osd layer consists of following parts: - Evaluators (Cpu, Omp, Tbb, Cuda, CL, GLXFB, GLCompute, D3D11Compute) implements EvalStencils and EvalPatches(*). Both supports derivatives (not fully implemented though) - Interop vertex buffer classes (optional, same as before) Note that these classes are not necessary to use Evaluators. All evaluators have EvalStencils/Patches which take device-specific buffer objects. For example, GLXFBEvaluator can take GLuint directly for both stencil tables and input primvars. Although using these interop classes makes it easy to integrate osd into relatively simple applications. - device-dependent StencilTable and PatchTable (optional) These are also optional, but can be used simply a substitute of Far::StencilTable and Far::PatchTable for osd evaluators. - PatchArray, PatchCoord, PatchParam They are tiny structs used for GPU based patch evaluation. (*) TODO and known issues: - CLEvaluator and D3D11Evaluator's EvalPatches() have not been implemented. - GPU Gregory patch evaluation has not been implemented in EvalPatches(). - CudaEvaluator::EvalPatches() is very unstable. - All patch evaluation kernels have not been well optimized. - Currently GLXFB kernel doesn't support derivative evaluation. There's a technical difficulty for the multi-stream output.
This commit is contained in:
parent
d335c7249e
commit
541aeddd3a
6
examples/dxPtexViewer/dxPtexViewer.cpp
Executable file → Normal file
6
examples/dxPtexViewer/dxPtexViewer.cpp
Executable file → Normal file
@ -819,7 +819,7 @@ createOsdMesh(int level, int kernel) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch) {
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
|
||||
@ -984,7 +984,7 @@ drawModel() {
|
||||
UINT hOffsets = 0;
|
||||
g_pd3dDeviceContext->IASetVertexBuffers(0, 1, &buffer, &hStrides, &hOffsets);
|
||||
|
||||
OpenSubdiv::Osd::D3D11PatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
g_pd3dDeviceContext->IASetIndexBuffer(
|
||||
@ -993,7 +993,7 @@ drawModel() {
|
||||
|
||||
// patch drawing
|
||||
for (int i = 0; i < (int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();
|
||||
|
||||
|
@ -630,7 +630,7 @@ ShaderCache g_shaderCache;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch) {
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
typedef OpenSubdiv::Far::PatchDescriptor Descriptor;
|
||||
@ -822,7 +822,7 @@ display() {
|
||||
UINT hOffsets = 0;
|
||||
g_pd3dDeviceContext->IASetVertexBuffers(0, 1, &buffer, &hStrides, &hOffsets);
|
||||
|
||||
OpenSubdiv::Osd::D3D11PatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
g_pd3dDeviceContext->IASetIndexBuffer(
|
||||
@ -834,7 +834,7 @@ display() {
|
||||
int numDrawCalls = 0;
|
||||
|
||||
for (int i=0; i<(int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
|
||||
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();
|
||||
|
630
examples/glEvalLimit/glEvalLimit.cpp
Executable file → Normal file
630
examples/glEvalLimit/glEvalLimit.cpp
Executable file → Normal file
@ -44,15 +44,54 @@ GLFWmonitor* g_primary=0;
|
||||
|
||||
#include <osd/cpuEvaluator.h>
|
||||
#include <osd/cpuVertexBuffer.h>
|
||||
#include <osd/cpuPatchTable.h>
|
||||
#include <osd/cpuGLVertexBuffer.h>
|
||||
#include <osd/mesh.h>
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
#include <osd/tbbEvaluator.h>
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENMP
|
||||
#include <osd/ompEvaluator.h>
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
#include <osd/cudaEvaluator.h>
|
||||
#include <osd/cudaVertexBuffer.h>
|
||||
#include <osd/cudaGLVertexBuffer.h>
|
||||
#include <osd/cudaPatchTable.h>
|
||||
#include "../common/cudaDeviceContext.h"
|
||||
|
||||
CudaDeviceContext g_cudaDeviceContext;
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
#include <osd/clVertexBuffer.h>
|
||||
#include <osd/clGLVertexBuffer.h>
|
||||
#include <osd/clEvaluator.h>
|
||||
#include <osd/clPatchTable.h>
|
||||
#include "../common/clDeviceContext.h"
|
||||
CLDeviceContext g_clDeviceContext;
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
#include <osd/glXFBEvaluator.h>
|
||||
#include <osd/glVertexBuffer.h>
|
||||
#include <osd/glPatchTable.h>
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
|
||||
#include <osd/glComputeEvaluator.h>
|
||||
#include <osd/glVertexBuffer.h>
|
||||
#include <osd/glPatchTable.h>
|
||||
#endif
|
||||
|
||||
#include <far/gregoryBasis.h>
|
||||
#include <far/endCapGregoryBasisPatchFactory.h>
|
||||
#include <far/topologyRefiner.h>
|
||||
#include <far/stencilTableFactory.h>
|
||||
#include <far/patchTableFactory.h>
|
||||
#include <far/patchMap.h>
|
||||
|
||||
#include <far/error.h>
|
||||
|
||||
@ -75,32 +114,44 @@ GLFWmonitor* g_primary=0;
|
||||
using namespace OpenSubdiv;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
enum KernelType { kCPU = 0,
|
||||
kOPENMP = 1,
|
||||
kTBB = 2,
|
||||
kCUDA = 3,
|
||||
kCL = 4,
|
||||
kGLXFB = 5,
|
||||
kGLCompute = 6 };
|
||||
|
||||
enum EndCap { kEndCapBSplineBasis,
|
||||
kEndCapGregoryBasis };
|
||||
|
||||
enum DrawMode { kUV,
|
||||
kVARYING,
|
||||
kNORMAL,
|
||||
kSHADE,
|
||||
kFACEVARYING };
|
||||
|
||||
std::vector<float> g_orgPositions,
|
||||
g_positions,
|
||||
g_varyingColors;
|
||||
|
||||
int g_currentShape = 0,
|
||||
g_level = 3,
|
||||
g_kernel = kCPU,
|
||||
g_endCap = kEndCapBSplineBasis,
|
||||
g_numElements = 3;
|
||||
|
||||
std::vector<int> g_coarseEdges;
|
||||
std::vector<float> g_coarseEdgeSharpness;
|
||||
std::vector<float> g_coarseVertexSharpness;
|
||||
|
||||
enum DrawMode { kRANDOM=0,
|
||||
kUV,
|
||||
kVARYING,
|
||||
kNORMAL,
|
||||
kSHADE,
|
||||
kFACEVARYING };
|
||||
|
||||
int g_running = 1,
|
||||
g_width = 1024,
|
||||
g_height = 1024,
|
||||
g_fullscreen = 0,
|
||||
g_drawCageEdges = 1,
|
||||
g_drawCageVertices = 1,
|
||||
g_drawMode = kVARYING,
|
||||
g_drawMode = kUV,
|
||||
g_prev_x = 0,
|
||||
g_prev_y = 0,
|
||||
g_mbutton[3] = {0, 0, 0},
|
||||
@ -131,11 +182,9 @@ float g_computeTime = 0;
|
||||
Stopwatch g_fpsTimer;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
int g_nparticles=0,
|
||||
g_nsamples=101,
|
||||
g_nsamplesFound=0;
|
||||
int g_nParticles = 65536;
|
||||
|
||||
bool g_randomStart=false;
|
||||
bool g_randomStart = true;//false;
|
||||
|
||||
GLuint g_cageEdgeVAO = 0,
|
||||
g_cageEdgeVBO = 0,
|
||||
@ -155,6 +204,7 @@ struct Program {
|
||||
GLuint attrColor;
|
||||
GLuint attrTangentU;
|
||||
GLuint attrTangentV;
|
||||
GLuint attrPatchCoord;
|
||||
} g_defaultProgram;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -204,37 +254,189 @@ createCoarseMesh(OpenSubdiv::Far::TopologyRefiner const & refiner) {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
Far::TopologyRefiner * g_topologyRefiner = 0;
|
||||
|
||||
Far::StencilTable const * g_vertexStencils = NULL;
|
||||
Far::StencilTable const * g_varyingStencils = NULL;
|
||||
|
||||
Far::PatchTable const * g_patchTable = NULL;
|
||||
Far::PatchMap const * g_patchMap = NULL;
|
||||
std::vector<Osd::PatchCoord> g_patchCoords;
|
||||
|
||||
Osd::VertexBufferDescriptor g_idesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 3),
|
||||
g_odesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
|
||||
g_vdesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
|
||||
g_duDesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
|
||||
g_dvDesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
|
||||
g_fvidesc(/*offset*/ 0, /*legnth*/ 2, /*stride*/ 2),
|
||||
g_fvodesc(/*offset*/ 3, /*legnth*/ 2, /*stride*/ 6);
|
||||
// input and output vertex data
|
||||
class EvalOutputBase {
|
||||
public:
|
||||
virtual ~EvalOutputBase() {}
|
||||
virtual GLuint BindVertexData() const = 0;
|
||||
virtual GLuint BindDerivatives() const = 0;
|
||||
virtual GLuint BindPatchCoords() const = 0;
|
||||
virtual void UpdateData(const float *src, int startVertex, int numVertices) = 0;
|
||||
virtual void UpdateVaryingData(const float *src, int startVertex, int numVertices) = 0;
|
||||
virtual void Refine() = 0;
|
||||
virtual void EvalPatches() = 0;
|
||||
virtual void EvalPatchesWithDerivatives() = 0;
|
||||
virtual void EvalPatchesVarying() = 0;
|
||||
virtual void UpdatePatchCoords(
|
||||
std::vector<Osd::PatchCoord> const &patchCoords) = 0;
|
||||
};
|
||||
|
||||
// input vertex data (coarse + refined)
|
||||
Osd::CpuVertexBuffer * g_vertexData = 0;
|
||||
Osd::CpuVertexBuffer * g_varyingData = 0;
|
||||
// note: Since we don't have a class for device-patchcoord container in osd,
|
||||
// we cheat to use vertexbuffer as a patch-coord (5int) container.
|
||||
//
|
||||
// Please don't follow the pattern in your actual application.
|
||||
//
|
||||
template<typename SRC_VERTEX_BUFFER, typename EVAL_VERTEX_BUFFER,
|
||||
typename STENCIL_TABLE, typename PATCH_TABLE, typename EVALUATOR,
|
||||
typename DEVICE_CONTEXT = void>
|
||||
class EvalOutput : public EvalOutputBase {
|
||||
public:
|
||||
typedef OpenSubdiv::Osd::EvaluatorCacheT<EVALUATOR> EvaluatorCache;
|
||||
|
||||
// output vertex data (limit locations)
|
||||
Osd::CpuGLVertexBuffer * g_outVertexData = NULL;
|
||||
Osd::CpuGLVertexBuffer * g_outDerivatives = NULL;
|
||||
EvalOutput(Far::StencilTable const *vertexStencils,
|
||||
Far::StencilTable const *varyingStencils,
|
||||
int numCoarseVerts, int numTotalVerts, int numParticles,
|
||||
Far::PatchTable const *patchTable,
|
||||
EvaluatorCache *evaluatorCache = NULL,
|
||||
DEVICE_CONTEXT *deviceContext = NULL)
|
||||
: _srcDesc( /*offset*/ 0, /*length*/ 3, /*stride*/ 3),
|
||||
_srcVaryingDesc(/*offset*/ 0, /*length*/ 3, /*stride*/ 3),
|
||||
_vertexDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
|
||||
_varyingDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
|
||||
_duDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
|
||||
_dvDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
|
||||
_deviceContext(deviceContext) {
|
||||
_srcData = SRC_VERTEX_BUFFER::Create(3, numTotalVerts, _deviceContext);
|
||||
_srcVaryingData = SRC_VERTEX_BUFFER::Create(3, numTotalVerts, _deviceContext);
|
||||
_vertexData = EVAL_VERTEX_BUFFER::Create(6, numParticles, _deviceContext);
|
||||
_derivatives = EVAL_VERTEX_BUFFER::Create(6, numParticles, _deviceContext);
|
||||
_patchTable = PATCH_TABLE::Create(patchTable, _deviceContext);
|
||||
_patchCoords = NULL;
|
||||
_numCoarseVerts = numCoarseVerts;
|
||||
_vertexStencils =
|
||||
Osd::convertToCompatibleStencilTable<STENCIL_TABLE>(vertexStencils, _deviceContext);
|
||||
_varyingStencils =
|
||||
Osd::convertToCompatibleStencilTable<STENCIL_TABLE>(varyingStencils, _deviceContext);
|
||||
_evaluatorCache = evaluatorCache;
|
||||
}
|
||||
~EvalOutput() {
|
||||
delete _srcData;
|
||||
delete _srcVaryingData;
|
||||
delete _vertexData;
|
||||
delete _derivatives;
|
||||
delete _patchTable;
|
||||
delete _patchCoords;
|
||||
}
|
||||
virtual GLuint BindVertexData() const {
|
||||
return _vertexData->BindVBO();
|
||||
}
|
||||
virtual GLuint BindDerivatives() const {
|
||||
return _derivatives->BindVBO();
|
||||
}
|
||||
virtual GLuint BindPatchCoords() const {
|
||||
return _patchCoords->BindVBO();
|
||||
}
|
||||
virtual void UpdateData(const float *src, int startVertex, int numVertices) {
|
||||
_srcData->UpdateData(src, startVertex, numVertices, _deviceContext);
|
||||
}
|
||||
virtual void UpdateVaryingData(const float *src, int startVertex, int numVertices) {
|
||||
_srcVaryingData->UpdateData(src, startVertex, numVertices, _deviceContext);
|
||||
}
|
||||
virtual void Refine() {
|
||||
Osd::VertexBufferDescriptor dstDesc = _srcDesc;
|
||||
dstDesc.offset += _numCoarseVerts * _srcDesc.stride;
|
||||
|
||||
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
|
||||
_evaluatorCache, _srcDesc, dstDesc, _deviceContext);
|
||||
|
||||
EVALUATOR::EvalStencils(_srcData, _srcDesc,
|
||||
_srcData, dstDesc,
|
||||
_vertexStencils,
|
||||
evalInstance,
|
||||
_deviceContext);
|
||||
|
||||
dstDesc = _srcVaryingDesc;
|
||||
dstDesc.offset += _numCoarseVerts * _srcVaryingDesc.stride;
|
||||
evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
|
||||
_evaluatorCache, _srcVaryingDesc, dstDesc, _deviceContext);
|
||||
|
||||
EVALUATOR::EvalStencils(_srcVaryingData, _srcVaryingDesc,
|
||||
_srcVaryingData, dstDesc,
|
||||
_varyingStencils,
|
||||
evalInstance,
|
||||
_deviceContext);
|
||||
}
|
||||
virtual void EvalPatches() {
|
||||
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
|
||||
_evaluatorCache, _srcDesc, _vertexDesc, _deviceContext);
|
||||
|
||||
EVALUATOR::EvalPatches(
|
||||
_srcData, _srcDesc,
|
||||
_vertexData, _vertexDesc,
|
||||
_patchCoords->GetNumVertices(),
|
||||
_patchCoords,
|
||||
_patchTable, evalInstance, _deviceContext);
|
||||
}
|
||||
virtual void EvalPatchesWithDerivatives() {
|
||||
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
|
||||
_evaluatorCache, _srcDesc, _vertexDesc, _deviceContext);
|
||||
EVALUATOR::EvalPatches(
|
||||
_srcData, _srcDesc,
|
||||
_vertexData, _vertexDesc,
|
||||
_derivatives, _duDesc,
|
||||
_derivatives, _dvDesc,
|
||||
_patchCoords->GetNumVertices(),
|
||||
_patchCoords,
|
||||
_patchTable, evalInstance, _deviceContext);
|
||||
}
|
||||
virtual void EvalPatchesVarying() {
|
||||
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
|
||||
_evaluatorCache, _srcVaryingDesc, _varyingDesc, _deviceContext);
|
||||
|
||||
EVALUATOR::EvalPatches(
|
||||
_srcVaryingData, _srcVaryingDesc,
|
||||
// varyingdata is interleved in vertexData.
|
||||
_vertexData, _varyingDesc,
|
||||
_patchCoords->GetNumVertices(),
|
||||
_patchCoords,
|
||||
_patchTable, evalInstance, _deviceContext);
|
||||
}
|
||||
virtual void UpdatePatchCoords(
|
||||
std::vector<Osd::PatchCoord> const &patchCoords) {
|
||||
if (_patchCoords and
|
||||
_patchCoords->GetNumVertices() != (int)patchCoords.size()) {
|
||||
delete _patchCoords;
|
||||
_patchCoords = NULL;
|
||||
}
|
||||
if (not _patchCoords) {
|
||||
_patchCoords = EVAL_VERTEX_BUFFER::Create(5,
|
||||
(int)patchCoords.size(),
|
||||
_deviceContext);
|
||||
}
|
||||
_patchCoords->UpdateData((float*)&patchCoords[0], 0, (int)patchCoords.size(), _deviceContext);
|
||||
}
|
||||
private:
|
||||
SRC_VERTEX_BUFFER *_srcData;
|
||||
SRC_VERTEX_BUFFER *_srcVaryingData;
|
||||
EVAL_VERTEX_BUFFER *_vertexData;
|
||||
EVAL_VERTEX_BUFFER *_derivatives;
|
||||
EVAL_VERTEX_BUFFER *_varyingData;
|
||||
EVAL_VERTEX_BUFFER *_patchCoords;
|
||||
PATCH_TABLE *_patchTable;
|
||||
Osd::VertexBufferDescriptor _srcDesc;
|
||||
Osd::VertexBufferDescriptor _srcVaryingDesc;
|
||||
Osd::VertexBufferDescriptor _vertexDesc;
|
||||
Osd::VertexBufferDescriptor _varyingDesc;
|
||||
Osd::VertexBufferDescriptor _duDesc;
|
||||
Osd::VertexBufferDescriptor _dvDesc;
|
||||
int _numCoarseVerts;
|
||||
|
||||
STENCIL_TABLE const *_vertexStencils;
|
||||
STENCIL_TABLE const *_varyingStencils;
|
||||
|
||||
EvaluatorCache *_evaluatorCache;
|
||||
DEVICE_CONTEXT *_deviceContext;
|
||||
};
|
||||
|
||||
EvalOutputBase *g_evalOutput = NULL;
|
||||
|
||||
STParticles * g_particles=0;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
updateGeom() {
|
||||
|
||||
int nverts = (int)g_orgPositions.size() / 3;
|
||||
|
||||
const float *p = &g_orgPositions[0];
|
||||
@ -255,28 +457,18 @@ updateGeom() {
|
||||
Stopwatch s;
|
||||
s.Start();
|
||||
|
||||
g_vertexData->UpdateData( &g_positions[0], 0, nverts);
|
||||
// update coarse vertices
|
||||
g_evalOutput->UpdateData(&g_positions[0], 0, nverts);
|
||||
|
||||
if (! g_topologyRefiner) return;
|
||||
// update coarse varying
|
||||
if (g_drawMode == kVARYING) {
|
||||
g_evalOutput->UpdateVaryingData(&g_varyingColors[0], 0, nverts);
|
||||
|
||||
// note that for patch eval we need coarse+refined combined buffer.
|
||||
int nCoarseVertices = g_topologyRefiner->GetLevel(0).GetNumVertices();
|
||||
Osd::CpuEvaluator::EvalStencils(g_vertexData,
|
||||
Osd::VertexBufferDescriptor(0, 3, 3),
|
||||
g_vertexData,
|
||||
Osd::VertexBufferDescriptor(
|
||||
nCoarseVertices*3, 3, 3),
|
||||
g_vertexStencils);
|
||||
|
||||
if (g_varyingData) {
|
||||
Osd::CpuEvaluator::EvalStencils(g_varyingData,
|
||||
Osd::VertexBufferDescriptor(0, 3, 3),
|
||||
g_varyingData,
|
||||
Osd::VertexBufferDescriptor(
|
||||
nCoarseVertices*3, 3, 3),
|
||||
g_varyingStencils);
|
||||
}
|
||||
|
||||
// Refine
|
||||
g_evalOutput->Refine();
|
||||
|
||||
s.Stop();
|
||||
g_computeTime = float(s.GetElapsed() * 1000.0f);
|
||||
|
||||
@ -287,60 +479,28 @@ updateGeom() {
|
||||
|
||||
// Apply 'dynamics' update
|
||||
assert(g_particles);
|
||||
|
||||
g_particles->Update(g_evalTime); // XXXX g_evalTime is not really elapsed time...
|
||||
|
||||
std::vector<OpenSubdiv::Osd::PatchCoord> const &patchCoords
|
||||
= g_particles->GetPatchCoords();
|
||||
|
||||
// resolve particle positions into patch handles
|
||||
// XXX: this process should be handled by OsdKernel in parallel
|
||||
g_patchCoords.clear();
|
||||
for (int i = 0; i < g_particles->GetNumParticles(); ++i) {
|
||||
STParticles::Position const &position = g_particles->GetPositions()[i];
|
||||
Far::PatchTable::PatchHandle const *handle =
|
||||
g_patchMap->FindPatch(position.ptexIndex, position.s, position.t);
|
||||
if (handle) {
|
||||
g_patchCoords.push_back(Osd::PatchCoord(
|
||||
*handle, position.s, position.t));
|
||||
}
|
||||
}
|
||||
// update patchcoord to be evaluated
|
||||
g_evalOutput->UpdatePatchCoords(patchCoords);
|
||||
|
||||
// Evaluate the positions of the samples on the limit surface
|
||||
if (g_drawMode == kNORMAL || g_drawMode == kSHADE) {
|
||||
// evaluate positions and derivatives
|
||||
g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(
|
||||
g_vertexData, g_idesc,
|
||||
g_outVertexData, g_odesc,
|
||||
g_outDerivatives, g_duDesc,
|
||||
g_outDerivatives, g_dvDesc,
|
||||
(int)g_patchCoords.size(),
|
||||
&g_patchCoords[0],
|
||||
g_patchTable, NULL);
|
||||
g_evalOutput->EvalPatchesWithDerivatives();
|
||||
} else {
|
||||
// evaluate positions
|
||||
g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(
|
||||
g_vertexData, g_idesc,
|
||||
g_outVertexData, g_odesc,
|
||||
(int)g_patchCoords.size(),
|
||||
&g_patchCoords[0],
|
||||
g_patchTable, NULL);
|
||||
g_evalOutput->EvalPatches();
|
||||
}
|
||||
|
||||
// color
|
||||
if (g_drawMode == kUV) {
|
||||
// store patchCoords as colors
|
||||
float *p = g_outVertexData->BindCpuBuffer() + g_vdesc.offset;
|
||||
for (int i = 0; i < (int)g_patchCoords.size(); ++i) {
|
||||
p[0] = g_patchCoords[i].s;
|
||||
p[1] = g_patchCoords[i].t;
|
||||
p[2] = 0;
|
||||
p += g_vdesc.stride;
|
||||
}
|
||||
} else if (g_drawMode == kVARYING) {
|
||||
if (g_drawMode == kVARYING) {
|
||||
// XXX: is this really varying?
|
||||
Osd::CpuEvaluator::EvalPatches(g_varyingData, g_idesc,
|
||||
g_outVertexData, g_vdesc,
|
||||
(int)g_patchCoords.size(),
|
||||
&g_patchCoords[0],
|
||||
g_patchTable, NULL);
|
||||
g_evalOutput->EvalPatchesVarying();
|
||||
}
|
||||
|
||||
s.Stop();
|
||||
@ -359,8 +519,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape);
|
||||
OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape);
|
||||
|
||||
delete g_topologyRefiner;
|
||||
g_topologyRefiner =
|
||||
Far::TopologyRefiner *topologyRefiner =
|
||||
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Create(*shape,
|
||||
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Options(sdctype, sdcoptions));
|
||||
|
||||
@ -371,22 +530,16 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
|
||||
float speed = g_particles ? g_particles->GetSpeed() : 0.2f;
|
||||
|
||||
// Create the 'uv particles' manager - this class manages the limit
|
||||
// location samples (ptex face index, (s,t) and updates them between frames.
|
||||
// Note: the number of limit locations can be entirely arbitrary
|
||||
delete g_particles;
|
||||
g_particles = new STParticles(*g_topologyRefiner, g_nsamples, !g_randomStart);
|
||||
g_nparticles = g_particles->GetNumParticles();
|
||||
g_particles->SetSpeed(speed);
|
||||
|
||||
createCoarseMesh(*g_topologyRefiner);
|
||||
createCoarseMesh(*topologyRefiner);
|
||||
|
||||
Far::StencilTable const * vertexStencils = NULL;
|
||||
Far::StencilTable const * varyingStencils = NULL;
|
||||
int nverts=0;
|
||||
{
|
||||
// Apply feature adaptive refinement to the mesh so that we can use the
|
||||
// limit evaluation API features.
|
||||
Far::TopologyRefiner::AdaptiveOptions options(level);
|
||||
g_topologyRefiner->RefineAdaptive(options);
|
||||
topologyRefiner->RefineAdaptive(options);
|
||||
|
||||
// Generate stencil table to update the bi-cubic patches control
|
||||
// vertices after they have been re-posed (both for vertex & varying
|
||||
@ -395,27 +548,33 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
soptions.generateOffsets=true;
|
||||
soptions.generateIntermediateLevels=true;
|
||||
|
||||
Far::StencilTable const * vertexStencils =
|
||||
Far::StencilTableFactory::Create(*g_topologyRefiner, soptions);
|
||||
vertexStencils =
|
||||
Far::StencilTableFactory::Create(*topologyRefiner, soptions);
|
||||
|
||||
soptions.interpolationMode = Far::StencilTableFactory::INTERPOLATE_VARYING;
|
||||
Far::StencilTable const * varyingStencils =
|
||||
Far::StencilTableFactory::Create(*g_topologyRefiner, soptions);
|
||||
|
||||
varyingStencils =
|
||||
Far::StencilTableFactory::Create(*topologyRefiner, soptions);
|
||||
|
||||
// Generate bi-cubic patch table for the limit surface
|
||||
Far::PatchTableFactory::Options poptions;
|
||||
poptions.SetEndCapType(
|
||||
Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS);
|
||||
if (g_endCap == kEndCapBSplineBasis) {
|
||||
poptions.SetEndCapType(
|
||||
Far::PatchTableFactory::Options::ENDCAP_BSPLINE_BASIS);
|
||||
} else {
|
||||
poptions.SetEndCapType(
|
||||
Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS);
|
||||
}
|
||||
|
||||
Far::PatchTable const * patchTable =
|
||||
Far::PatchTableFactory::Create(*g_topologyRefiner, poptions);
|
||||
Far::PatchTableFactory::Create(*topologyRefiner, poptions);
|
||||
|
||||
// append endcap stencils
|
||||
if (Far::StencilTable const *endCapVertexStencilTable =
|
||||
patchTable->GetEndCapVertexStencilTable()) {
|
||||
Far::StencilTable const *table =
|
||||
Far::StencilTableFactory::AppendEndCapStencilTable(
|
||||
*g_topologyRefiner,
|
||||
*topologyRefiner,
|
||||
vertexStencils, endCapVertexStencilTable);
|
||||
delete vertexStencils;
|
||||
vertexStencils = table;
|
||||
@ -424,7 +583,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
patchTable->GetEndCapVaryingStencilTable()) {
|
||||
Far::StencilTable const *table =
|
||||
Far::StencilTableFactory::AppendEndCapStencilTable(
|
||||
*g_topologyRefiner,
|
||||
*topologyRefiner,
|
||||
varyingStencils, endCapVaryingStencilTable);
|
||||
delete varyingStencils;
|
||||
varyingStencils = table;
|
||||
@ -434,45 +593,100 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
nverts = vertexStencils->GetNumControlVertices() +
|
||||
vertexStencils->GetNumStencils();
|
||||
|
||||
if (g_vertexStencils) delete g_vertexStencils;
|
||||
g_vertexStencils = vertexStencils;
|
||||
if (g_varyingStencils) delete g_varyingStencils;
|
||||
g_varyingStencils = varyingStencils;
|
||||
|
||||
if (g_patchTable) delete g_patchTable;
|
||||
g_patchTable = patchTable;
|
||||
|
||||
// Create a far patch map
|
||||
if (g_patchMap) delete g_patchMap;
|
||||
g_patchMap = new Far::PatchMap(*g_patchTable);
|
||||
}
|
||||
|
||||
{ // Create vertex primvar buffer for the CVs
|
||||
delete g_vertexData;
|
||||
g_vertexData = Osd::CpuVertexBuffer::Create(3, nverts);
|
||||
// note that for patch eval we need coarse+refined combined buffer.
|
||||
int nCoarseVertices = topologyRefiner->GetLevel(0).GetNumVertices();
|
||||
|
||||
// Create varying primvar buffer for the CVs with random colors.
|
||||
// These are immediately interpolated (once) and saved for display.
|
||||
delete g_varyingData; g_varyingData = 0;
|
||||
if (g_drawMode==kVARYING) {
|
||||
g_varyingData = Osd::CpuVertexBuffer::Create(3, nverts);
|
||||
g_varyingData->UpdateData(
|
||||
&g_varyingColors[0], 0, (int)g_varyingColors.size()/3 );
|
||||
}
|
||||
|
||||
// Create output buffers for the limit samples (position & tangents)
|
||||
delete g_outVertexData;
|
||||
g_outVertexData = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
|
||||
memset(g_outVertexData->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
|
||||
if (g_drawMode==kRANDOM) {
|
||||
createRandomColors(g_nparticles, 6, g_outVertexData->BindCpuBuffer()+3);
|
||||
}
|
||||
|
||||
delete g_outDerivatives;
|
||||
g_outDerivatives = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
|
||||
memset(g_outDerivatives->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
|
||||
delete g_evalOutput;
|
||||
if (g_kernel == kCPU) {
|
||||
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
|
||||
Osd::CpuGLVertexBuffer,
|
||||
Far::StencilTable,
|
||||
Osd::CpuPatchTable,
|
||||
Osd::CpuEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable);
|
||||
#ifdef OPENSUBDIV_HAS_OPENMP
|
||||
} else if (g_kernel == kOPENMP) {
|
||||
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
|
||||
Osd::CpuGLVertexBuffer,
|
||||
Far::StencilTable,
|
||||
Osd::CpuPatchTable,
|
||||
Osd::OmpEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
} else if (g_kernel == kTBB) {
|
||||
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
|
||||
Osd::CpuGLVertexBuffer,
|
||||
Far::StencilTable,
|
||||
Osd::CpuPatchTable,
|
||||
Osd::TbbEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
} else if (g_kernel == kCUDA) {
|
||||
g_evalOutput = new EvalOutput<Osd::CudaVertexBuffer,
|
||||
Osd::CudaGLVertexBuffer,
|
||||
Osd::CudaStencilTable,
|
||||
Osd::CudaPatchTable,
|
||||
Osd::CudaEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
} else if (g_kernel == kCL) {
|
||||
static Osd::EvaluatorCacheT<Osd::CLEvaluator> clEvaluatorCache;
|
||||
g_evalOutput = new EvalOutput<Osd::CLVertexBuffer,
|
||||
Osd::CLGLVertexBuffer,
|
||||
Osd::CLStencilTable,
|
||||
Osd::CLPatchTable,
|
||||
Osd::CLEvaluator,
|
||||
CLDeviceContext>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable,
|
||||
&clEvaluatorCache, &g_clDeviceContext);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
} else if (g_kernel == kGLXFB) {
|
||||
static Osd::EvaluatorCacheT<Osd::GLXFBEvaluator> glXFBEvaluatorCache;
|
||||
g_evalOutput = new EvalOutput<Osd::GLVertexBuffer,
|
||||
Osd::GLVertexBuffer,
|
||||
Osd::GLStencilTableTBO,
|
||||
Osd::GLPatchTable,
|
||||
Osd::GLXFBEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable,
|
||||
&glXFBEvaluatorCache);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
|
||||
} else if (g_kernel == kGLCompute) {
|
||||
static Osd::EvaluatorCacheT<Osd::GLComputeEvaluator> glComputeEvaluatorCache;
|
||||
g_evalOutput = new EvalOutput<Osd::GLVertexBuffer,
|
||||
Osd::GLVertexBuffer,
|
||||
Osd::GLStencilTableSSBO,
|
||||
Osd::GLPatchTable,
|
||||
Osd::GLComputeEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable,
|
||||
&glComputeEvaluatorCache);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Create the 'uv particles' manager - this class manages the limit
|
||||
// location samples (ptex face index, (s,t) and updates them between frames.
|
||||
// Note: the number of limit locations can be entirely arbitrary
|
||||
delete g_particles;
|
||||
g_particles = new STParticles(*topologyRefiner, g_patchTable,
|
||||
g_nParticles, !g_randomStart);
|
||||
g_nParticles = g_particles->GetNumParticles();
|
||||
g_particles->SetSpeed(speed);
|
||||
|
||||
updateGeom();
|
||||
|
||||
}
|
||||
@ -505,32 +719,33 @@ linkDefaultProgram() {
|
||||
"in vec3 color;\n"
|
||||
"in vec3 tangentU;\n"
|
||||
"in vec3 tangentV;\n"
|
||||
"in vec2 patchCoord;\n"
|
||||
"out vec4 fragColor;\n"
|
||||
"out vec3 normal;\n"
|
||||
"uniform mat4 ModelViewMatrix;\n"
|
||||
"uniform mat4 ProjectionMatrix;\n"
|
||||
"uniform int DrawMode;\n"
|
||||
"void main() {\n"
|
||||
" fragColor = vec4(color, 1);\n"
|
||||
// XXX: fix the normal transform
|
||||
" normal = (ModelViewMatrix * vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n"
|
||||
" vec3 normal = (ModelViewMatrix * "
|
||||
" vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n"
|
||||
" gl_Position = ProjectionMatrix * ModelViewMatrix * "
|
||||
" vec4(position, 1);\n"
|
||||
" if (DrawMode == 0) {\n" // UV
|
||||
" fragColor = vec4(patchCoord.x, patchCoord.y, 0, 1);\n"
|
||||
" } else if (DrawMode == 2) {\n"
|
||||
" fragColor = vec4(normal*0.5+vec3(0.5), 1);\n"
|
||||
" } else if (DrawMode == 3) {\n"
|
||||
" fragColor = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n"
|
||||
" } else {\n" // varying
|
||||
" fragColor = vec4(color, 1);\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
static const char *fsSrc =
|
||||
GLSL_VERSION_DEFINE
|
||||
"in vec4 fragColor;\n"
|
||||
"in vec3 normal;\n"
|
||||
"uniform int DrawMode;\n"
|
||||
"out vec4 color;\n"
|
||||
"void main() {\n"
|
||||
" if (DrawMode == 3) {\n"
|
||||
" color = vec4(normal*0.5+vec3(0.5), 1);\n"
|
||||
" } else if (DrawMode == 4) {\n"
|
||||
" color = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n"
|
||||
" } else {\n"
|
||||
" color = fragColor;\n"
|
||||
" }\n"
|
||||
" color = fragColor;\n"
|
||||
"}\n";
|
||||
|
||||
GLuint program = glCreateProgram();
|
||||
@ -544,6 +759,7 @@ linkDefaultProgram() {
|
||||
glBindAttribLocation(program, 1, "color");
|
||||
glBindAttribLocation(program, 2, "tangentU");
|
||||
glBindAttribLocation(program, 3, "tangentV");
|
||||
glBindAttribLocation(program, 4, "patchCoord");
|
||||
glBindFragDataLocation(program, 0, "color");
|
||||
|
||||
glLinkProgram(program);
|
||||
@ -571,6 +787,7 @@ linkDefaultProgram() {
|
||||
g_defaultProgram.attrColor = glGetAttribLocation(program, "color");
|
||||
g_defaultProgram.attrTangentU = glGetAttribLocation(program, "tangentU");
|
||||
g_defaultProgram.attrTangentV = glGetAttribLocation(program, "tangentV");
|
||||
g_defaultProgram.attrPatchCoord = glGetAttribLocation(program, "patchCoord");
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -621,6 +838,7 @@ drawCageEdges() {
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrColor);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
|
||||
glVertexAttribPointer(g_defaultProgram.attrPosition,
|
||||
3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
|
||||
glVertexAttribPointer(g_defaultProgram.attrColor,
|
||||
@ -680,6 +898,7 @@ drawCageVertices() {
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrColor);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
|
||||
glVertexAttribPointer(g_defaultProgram.attrPosition,
|
||||
3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
|
||||
glVertexAttribPointer(g_defaultProgram.attrColor,
|
||||
@ -711,27 +930,33 @@ drawSamples() {
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrTangentU);
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrTangentV);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_outVertexData->BindVBO());
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindVertexData());
|
||||
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
|
||||
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_outDerivatives->BindVBO());
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindDerivatives());
|
||||
glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
|
||||
glVertexAttribPointer(3, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindPatchCoords());
|
||||
glVertexAttribPointer(4, 2, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 5, (float*)12);
|
||||
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrPosition);
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrColor);
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrTangentU);
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrTangentV);
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrPatchCoord);
|
||||
|
||||
glPointSize(2.0f);
|
||||
glDrawArrays(GL_POINTS, 0, g_nparticles);
|
||||
int nPatchCoords = (int)g_particles->GetPatchCoords().size();
|
||||
glDrawArrays(GL_POINTS, 0, nPatchCoords);
|
||||
glPointSize(1.0f);
|
||||
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrPosition);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrColor);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
|
||||
|
||||
glBindVertexArray(0);
|
||||
|
||||
@ -789,9 +1014,10 @@ display() {
|
||||
double fps = 1.0/g_fpsTimer.GetElapsed();
|
||||
g_fpsTimer.Start();
|
||||
|
||||
int nPatchCoords = (int)g_particles->GetPatchCoords().size();
|
||||
|
||||
g_hud.DrawString(10, -150, "Particle Speed ([) (]): %.1f", g_particles->GetSpeed());
|
||||
g_hud.DrawString(10, -120, "# Samples : (%d/%d)",
|
||||
g_nsamplesFound, g_outVertexData->GetNumVertices());
|
||||
g_hud.DrawString(10, -120, "# Samples : (%d / %d)", nPatchCoords, g_nParticles);
|
||||
g_hud.DrawString(10, -100, "Compute : %.3f ms", g_computeTime);
|
||||
g_hud.DrawString(10, -80, "Eval : %.3f ms", g_evalTime * 1000.f);
|
||||
g_hud.DrawString(10, -60, "GPU Draw : %.3f ms", drawGpuTime);
|
||||
@ -884,9 +1110,11 @@ void windowClose(GLFWwindow*) {
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
setSamples(bool add) {
|
||||
g_nsamples += add ? 50 : -50;
|
||||
|
||||
g_nsamples = std::max(0, g_nsamples);
|
||||
if (add) {
|
||||
g_nParticles = g_nParticles * 2;
|
||||
} else {
|
||||
g_nParticles = std::max(1, g_nParticles / 2);
|
||||
}
|
||||
|
||||
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
|
||||
}
|
||||
@ -936,6 +1164,40 @@ callbackModel(int m) {
|
||||
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
callbackEndCap(int endCap) {
|
||||
g_endCap = endCap;
|
||||
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
callbackKernel(int k) {
|
||||
|
||||
g_kernel = k;
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
|
||||
if (g_clDeviceContext.Initialize() == false) {
|
||||
printf("Error in initializing OpenCL\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
|
||||
if (g_cudaDeviceContext.Initialize() == false) {
|
||||
printf("Error in initializing Cuda\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
|
||||
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
callbackLevel(int l) {
|
||||
@ -1001,10 +1263,40 @@ initHUD() {
|
||||
g_hud.AddCheckBox("Animate vertices (M)", g_moveScale != 0, 10, 50, callbackAnimate, 0, 'm');
|
||||
g_hud.AddCheckBox("Freeze (spc)", false, 10, 70, callbackFreeze, 0, ' ');
|
||||
|
||||
g_hud.AddCheckBox("Random Start", false, 10, 120, callbackCentered, g_randomStart);
|
||||
g_hud.AddCheckBox("Random Start", g_randomStart, 10, 120, callbackCentered, 0);
|
||||
|
||||
int compute_pulldown = g_hud.AddPullDown("Compute (K)", 475, 10, 300,
|
||||
callbackKernel, 'k');
|
||||
g_hud.AddPullDownButton(compute_pulldown, "CPU", kCPU);
|
||||
#ifdef OPENSUBDIV_HAS_OPENMP
|
||||
g_hud.AddPullDownButton(compute_pulldown, "OPENMP", kOPENMP);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
g_hud.AddPullDownButton(compute_pulldown, "TBB", kTBB);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
g_hud.AddPullDownButton(compute_pulldown, "GL XFB", kGLXFB);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
|
||||
g_hud.AddPullDownButton(compute_pulldown, "GL Compute", kGLCompute);
|
||||
#endif
|
||||
|
||||
int endcap_pulldown = g_hud.AddPullDown("End cap (E)", 10, 140, 200,
|
||||
callbackEndCap, 'e');
|
||||
g_hud.AddPullDownButton(endcap_pulldown, "BSpline",
|
||||
kEndCapBSplineBasis,
|
||||
g_endCap == kEndCapBSplineBasis);
|
||||
g_hud.AddPullDownButton(endcap_pulldown, "GregoryBasis",
|
||||
kEndCapGregoryBasis,
|
||||
g_endCap == kEndCapGregoryBasis);
|
||||
|
||||
int shading_pulldown = g_hud.AddPullDown("Shading (W)", 250, 10, 250, callbackDisplayVaryingColors, 'w');
|
||||
g_hud.AddPullDownButton(shading_pulldown, "Random", kRANDOM, g_drawMode==kRANDOM);
|
||||
g_hud.AddPullDownButton(shading_pulldown, "(u,v)", kUV, g_drawMode==kUV);
|
||||
g_hud.AddPullDownButton(shading_pulldown, "Varying", kVARYING, g_drawMode==kVARYING);
|
||||
g_hud.AddPullDownButton(shading_pulldown, "Normal", kNORMAL, g_drawMode==kNORMAL);
|
||||
|
@ -25,38 +25,104 @@
|
||||
#include "particles.h"
|
||||
|
||||
#include <far/ptexIndices.h>
|
||||
#include <far/patchMap.h>
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
#include <tbb/parallel_for.h>
|
||||
#include <tbb/atomic.h>
|
||||
tbb::atomic<int> g_tbbCounter;
|
||||
class TbbUpdateKernel {
|
||||
public:
|
||||
TbbUpdateKernel(float speed,
|
||||
STParticles::Position *positions,
|
||||
float *velocities,
|
||||
std::vector<STParticles::FaceInfo> const &adjacency,
|
||||
OpenSubdiv::Osd::PatchCoord *patchCoords,
|
||||
OpenSubdiv::Far::PatchMap const *patchMap) :
|
||||
_speed(speed), _positions(positions), _velocities(velocities),
|
||||
_adjacency(adjacency), _patchCoords(patchCoords), _patchMap(patchMap) {
|
||||
}
|
||||
|
||||
void operator () (tbb::blocked_range<int> const &r) const {
|
||||
for (int i = r.begin(); i < r.end(); ++i) {
|
||||
STParticles::Position * p = _positions + i;
|
||||
float *dp = _velocities + i*2;
|
||||
|
||||
// apply velocity
|
||||
p->s += dp[0] * _speed;
|
||||
p->t += dp[1] * _speed;
|
||||
|
||||
// make sure particles can't skip more than 1 face boundary at a time
|
||||
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
|
||||
|
||||
// check if the particle is jumping a boundary
|
||||
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
|
||||
// this is not treated here.
|
||||
int edge = -1;
|
||||
if (p->s >= 1.0f) edge = 1;
|
||||
if (p->s <= 0.0f) edge = 3;
|
||||
if (p->t >= 1.0f) edge = 2;
|
||||
if (p->t <= 0.0f) edge = 0;
|
||||
|
||||
if (edge>=0) {
|
||||
// warp the particle to the other side of the boundary
|
||||
STParticles::WarpParticle(_adjacency, edge, p, dp);
|
||||
}
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
|
||||
// resolve particle positions into patch handles
|
||||
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
|
||||
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
|
||||
if (handle) {
|
||||
int index = g_tbbCounter.fetch_and_add(1);
|
||||
_patchCoords[index] =
|
||||
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t);
|
||||
}
|
||||
}
|
||||
}
|
||||
private:
|
||||
float _speed;
|
||||
STParticles::Position *_positions;
|
||||
float *_velocities;
|
||||
std::vector<STParticles::FaceInfo> const &_adjacency;
|
||||
OpenSubdiv::Osd::PatchCoord *_patchCoords;
|
||||
OpenSubdiv::Far::PatchMap const *_patchMap;
|
||||
};
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
|
||||
STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered) :
|
||||
STParticles::STParticles(Refiner const & refiner,
|
||||
PatchTable const *patchTable,
|
||||
int nParticles, bool centered) :
|
||||
_speed(1.0f) {
|
||||
|
||||
OpenSubdiv::Far::PtexIndices ptexIndices(refiner);
|
||||
|
||||
int nptexfaces = ptexIndices.GetNumFaces(),
|
||||
nsamples = nptexfaces * nparticles;
|
||||
// Create a far patch map
|
||||
_patchMap = new OpenSubdiv::Far::PatchMap(*patchTable);
|
||||
|
||||
int nPtexFaces = ptexIndices.GetNumFaces();
|
||||
|
||||
srand(static_cast<int>(2147483647));
|
||||
|
||||
{ // initialize positions
|
||||
|
||||
_positions.resize(nsamples);
|
||||
_positions.resize(nParticles);
|
||||
Position * pos = &_positions[0];
|
||||
|
||||
for (int i=0; i<nptexfaces; ++i) {
|
||||
for (int j=0; j<nparticles; ++j) {
|
||||
pos->ptexIndex = i;
|
||||
pos->s = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
|
||||
pos->t = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
|
||||
++pos;
|
||||
}
|
||||
for (int i = 0; i < nParticles; ++i) {
|
||||
pos->ptexIndex = (int)(((float)rand()/(float)RAND_MAX) * nPtexFaces);
|
||||
pos->s = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
|
||||
pos->t = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
{ // initialize velocities
|
||||
_velocities.resize(nsamples*2);
|
||||
_velocities.resize(nParticles * 2);
|
||||
|
||||
for (int i=0; i<nsamples; ++i) {
|
||||
for (int i = 0; i < nParticles; ++i) {
|
||||
// initialize normalized random directions
|
||||
float s = 2.0f*(float)rand()/(float)RAND_MAX - 1.0f,
|
||||
t = 2.0f*(float)rand()/(float)RAND_MAX - 1.0f,
|
||||
@ -68,7 +134,7 @@ STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered)
|
||||
}
|
||||
|
||||
{ // initialize topology adjacency
|
||||
_adjacency.resize(nptexfaces);
|
||||
_adjacency.resize(nPtexFaces);
|
||||
|
||||
OpenSubdiv::Far::TopologyLevel const & refBaseLevel = refiner.GetLevel(0);
|
||||
|
||||
@ -97,42 +163,6 @@ STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered)
|
||||
//std::cout << *this;
|
||||
}
|
||||
|
||||
void
|
||||
STParticles::Update(float deltaTime) {
|
||||
|
||||
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
|
||||
|
||||
Position * p = &_positions[0];
|
||||
float * dp = &_velocities[0];
|
||||
|
||||
for (int i=0; i<GetNumParticles(); ++i, ++p, dp+=2) {
|
||||
|
||||
// apply velocity
|
||||
p->s += dp[0] * speed;
|
||||
p->t += dp[1] * speed;
|
||||
|
||||
// make sure particles can't skip more than 1 face boundary at a time
|
||||
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
|
||||
|
||||
// check if the particle is jumping a boundary
|
||||
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
|
||||
// this is not treated here.
|
||||
int edge = -1;
|
||||
|
||||
if (p->s >= 1.0f) edge = 1;
|
||||
if (p->s <= 0.0f) edge = 3;
|
||||
if (p->t >= 1.0f) edge = 2;
|
||||
if (p->t <= 0.0f) edge = 0;
|
||||
|
||||
if (edge>=0) {
|
||||
// warp the particle to the other side of the boundary
|
||||
warpParticle(edge, p, dp);
|
||||
}
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
inline void
|
||||
FlipS(STParticles::Position * p, float * dp) {
|
||||
p->s = 1.0f-p->s;
|
||||
@ -163,6 +193,15 @@ Rotate(int rot, STParticles::Position * p, float * dp) {
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
inline void
|
||||
Trim(STParticles::Position * p) {
|
||||
if (p->s <0.0f) p->s = 1.0f + p->s;
|
||||
if (p->s>=1.0f) p->s = p->s - 1.0f;
|
||||
if (p->t <0.0f) p->t = 1.0f + p->t;
|
||||
if (p->t>=1.0f) p->t = p->t - 1.0f;
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
inline void
|
||||
Clamp(STParticles::Position * p) {
|
||||
if (p->s<0.0f) {
|
||||
@ -192,21 +231,12 @@ Bounce(int edge, STParticles::Position * p, float * dp) {
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
inline void
|
||||
Trim(STParticles::Position * p) {
|
||||
if (p->s <0.0f) p->s = 1.0f + p->s;
|
||||
if (p->s>=1.0f) p->s = p->s - 1.0f;
|
||||
if (p->t <0.0f) p->t = 1.0f + p->t;
|
||||
if (p->t>=1.0f) p->t = p->t - 1.0f;
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
void
|
||||
STParticles::warpParticle(int edge, Position * p, float * dp) {
|
||||
|
||||
assert(p->ptexIndex<(int)_adjacency.size() and (edge>=0 and edge<4));
|
||||
STParticles::WarpParticle(std::vector<FaceInfo> const &adjacency,
|
||||
int edge, Position * p, float * dp) {
|
||||
assert(p->ptexIndex<(int)adjacency.size() and (edge>=0 and edge<4));
|
||||
|
||||
FaceInfo const & f = _adjacency[p->ptexIndex];
|
||||
FaceInfo const & f = adjacency[p->ptexIndex];
|
||||
|
||||
int afid = f.adjface(edge),
|
||||
aeid = f.adjedge(edge);
|
||||
@ -215,7 +245,7 @@ STParticles::warpParticle(int edge, Position * p, float * dp) {
|
||||
// boundary detected: bounce the particle
|
||||
Bounce(edge, p, dp);
|
||||
} else {
|
||||
FaceInfo const & af = _adjacency[afid];
|
||||
FaceInfo const & af = adjacency[afid];
|
||||
int rot = edge - aeid + 2;
|
||||
|
||||
bool fIsSubface = f.isSubface(),
|
||||
@ -233,6 +263,66 @@ STParticles::warpParticle(int edge, Position * p, float * dp) {
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
STParticles::~STParticles() {
|
||||
delete _patchMap;
|
||||
}
|
||||
|
||||
void
|
||||
STParticles::Update(float deltaTime) {
|
||||
|
||||
if (fabs(GetSpeed()) < 0.001f) return;
|
||||
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
|
||||
|
||||
_patchCoords.clear();
|
||||
|
||||
// XXX: this process should be parallelized.
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
|
||||
_patchCoords.resize((int)GetNumParticles());
|
||||
TbbUpdateKernel kernel(speed, &_positions[0], &_velocities[0],
|
||||
_adjacency, &_patchCoords[0], _patchMap);;
|
||||
g_tbbCounter = 0;
|
||||
tbb::blocked_range<int> range(0, GetNumParticles(), 256);
|
||||
tbb::parallel_for(range, kernel);
|
||||
_patchCoords.resize(g_tbbCounter);
|
||||
#else
|
||||
Position * p = &_positions[0];
|
||||
float * dp = &_velocities[0];
|
||||
for (int i=0; i<GetNumParticles(); ++i, ++p, dp+=2) {
|
||||
// apply velocity
|
||||
p->s += dp[0] * speed;
|
||||
p->t += dp[1] * speed;
|
||||
|
||||
// make sure particles can't skip more than 1 face boundary at a time
|
||||
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
|
||||
|
||||
// check if the particle is jumping a boundary
|
||||
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
|
||||
// this is not treated here.
|
||||
int edge = -1;
|
||||
|
||||
if (p->s >= 1.0f) edge = 1;
|
||||
if (p->s <= 0.0f) edge = 3;
|
||||
if (p->t >= 1.0f) edge = 2;
|
||||
if (p->t <= 0.0f) edge = 0;
|
||||
|
||||
if (edge>=0) {
|
||||
// warp the particle to the other side of the boundary
|
||||
WarpParticle(_adjacency, edge, p, dp);
|
||||
}
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
|
||||
// resolve particle positions into patch handles
|
||||
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
|
||||
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
|
||||
if (handle) {
|
||||
_patchCoords.push_back(
|
||||
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Dump adjacency info
|
||||
std::ostream & operator << (std::ostream & os,
|
||||
STParticles::FaceInfo const & f) {
|
||||
|
@ -26,7 +26,8 @@
|
||||
#define ST_PARTICLES_H
|
||||
|
||||
#include <far/topologyRefiner.h>
|
||||
|
||||
#include <far/patchMap.h>
|
||||
#include <osd/types.h>
|
||||
#include <iostream>
|
||||
|
||||
//
|
||||
@ -72,47 +73,6 @@ public:
|
||||
float s, t; ///< parametric location on face
|
||||
};
|
||||
|
||||
typedef OpenSubdiv::Far::TopologyRefiner Refiner;
|
||||
|
||||
STParticles(Refiner const & refiner, int nparticles, bool centered=false);
|
||||
|
||||
void Update(float deltaTime);
|
||||
|
||||
int GetNumParticles() const {
|
||||
return (int)_positions.size();
|
||||
}
|
||||
|
||||
void SetSpeed(float speed) {
|
||||
_speed = std::max(-1.0f, std::min(1.0f, speed));
|
||||
}
|
||||
|
||||
float GetSpeed() const {
|
||||
return _speed;
|
||||
}
|
||||
|
||||
std::vector<Position> & GetPositions() {
|
||||
return _positions;
|
||||
}
|
||||
|
||||
std::vector<float> & GetVelocities() {
|
||||
return _velocities;
|
||||
}
|
||||
|
||||
friend std::ostream & operator << (std::ostream & os, STParticles const & f);
|
||||
|
||||
private:
|
||||
|
||||
//
|
||||
// Particle "Dynamics"
|
||||
//
|
||||
std::vector<Position> _positions;
|
||||
|
||||
std::vector<float> _velocities;
|
||||
|
||||
float _speed; // velocity multiplier
|
||||
|
||||
private:
|
||||
|
||||
//
|
||||
// Topology adjacency (borrowed from Ptexture.h)
|
||||
//
|
||||
@ -152,11 +112,63 @@ private:
|
||||
int adjfaces[4];
|
||||
};
|
||||
|
||||
void warpParticle(int edge, Position * p, float * dp);
|
||||
typedef OpenSubdiv::Far::TopologyRefiner Refiner;
|
||||
typedef OpenSubdiv::Far::PatchTable PatchTable;
|
||||
|
||||
STParticles(Refiner const & refiner, PatchTable const *patchTable,
|
||||
int nparticles, bool centered=false);
|
||||
|
||||
~STParticles();
|
||||
|
||||
void Update(float deltaTime);
|
||||
|
||||
int GetNumParticles() const {
|
||||
return (int)_positions.size();
|
||||
}
|
||||
|
||||
void SetSpeed(float speed) {
|
||||
_speed = std::max(-1.0f, std::min(1.0f, speed));
|
||||
}
|
||||
|
||||
float GetSpeed() const {
|
||||
return _speed;
|
||||
}
|
||||
|
||||
std::vector<Position> & GetPositions() {
|
||||
return _positions;
|
||||
}
|
||||
|
||||
std::vector<float> & GetVelocities() {
|
||||
return _velocities;
|
||||
}
|
||||
|
||||
std::vector<OpenSubdiv::Osd::PatchCoord> GetPatchCoords() const {
|
||||
return _patchCoords;
|
||||
}
|
||||
|
||||
friend std::ostream & operator << (std::ostream & os, STParticles const & f);
|
||||
|
||||
static void WarpParticle(std::vector<FaceInfo> const &adjacency,
|
||||
int edge, Position * p, float * dp);
|
||||
|
||||
private:
|
||||
|
||||
//
|
||||
// Particle "Dynamics"
|
||||
//
|
||||
std::vector<Position> _positions;
|
||||
|
||||
std::vector<float> _velocities;
|
||||
|
||||
std::vector<OpenSubdiv::Osd::PatchCoord> _patchCoords;
|
||||
|
||||
float _speed; // velocity multiplier
|
||||
|
||||
friend std::ostream & operator << (std::ostream & os, FaceInfo const & f);
|
||||
|
||||
|
||||
std::vector<FaceInfo> _adjacency;
|
||||
OpenSubdiv::Far::PatchMap const *_patchMap;
|
||||
};
|
||||
|
||||
#endif // ST_PARTICLES_H
|
||||
|
@ -786,7 +786,7 @@ bindTextures() {
|
||||
}
|
||||
|
||||
static GLenum
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
|
||||
@ -868,7 +868,7 @@ display() {
|
||||
|
||||
glBindVertexArray(g_vao);
|
||||
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
if (g_displayStyle == kWire)
|
||||
@ -879,7 +879,7 @@ display() {
|
||||
|
||||
// patch drawing
|
||||
for (int i = 0; i < (int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
|
||||
GLenum primType = bindProgram(GetEffect(), patch);
|
||||
|
||||
@ -909,7 +909,7 @@ display() {
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
|
||||
|
||||
for (int i = 0; i < (int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
|
||||
GLenum primType = bindProgram(GetEffect(/*uvDraw=*/ true), patch);
|
||||
|
||||
|
4
examples/glImaging/glImaging.cpp
Executable file → Normal file
4
examples/glImaging/glImaging.cpp
Executable file → Normal file
@ -422,11 +422,11 @@ void runTest(ShapeDesc const &shapeDesc, std::string const &kernel,
|
||||
mesh->GetPatchTable()->GetPatchParamTextureBuffer());
|
||||
}
|
||||
|
||||
Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
Osd::PatchArrayVector const & patches =
|
||||
mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
for (int i=0; i<(int)patches.size(); ++i) {
|
||||
Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
Osd::PatchArray const & patch = patches[i];
|
||||
Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
Far::PatchDescriptor::Type patchType = desc.GetType();
|
||||
|
||||
|
@ -597,7 +597,7 @@ static void bindTextures(Effect effect) {
|
||||
}
|
||||
|
||||
static GLuint
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
|
||||
@ -670,12 +670,12 @@ display() {
|
||||
|
||||
glBindVertexArray(g_vao);
|
||||
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
// patch drawing
|
||||
for (int i=0; i<(int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
|
||||
GLenum primType = GL_PATCHES;
|
||||
@ -807,13 +807,13 @@ drawStroke(int x, int y) {
|
||||
effect.paint = 1;
|
||||
bindTextures(effect);
|
||||
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
// patch drawing
|
||||
for (int i=0; i<(int)patches.size(); ++i) {
|
||||
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
|
||||
GLenum primType = GL_PATCHES;
|
||||
|
@ -1249,7 +1249,7 @@ bindTextures() {
|
||||
//------------------------------------------------------------------------------
|
||||
static GLenum
|
||||
bindProgram(Effect effect,
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
|
||||
OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
|
||||
GLDrawConfig *config = g_shaderCache.GetDrawConfig(effectDesc);
|
||||
@ -1296,10 +1296,10 @@ drawModel() {
|
||||
glBindVertexArray(g_vao);
|
||||
|
||||
// patch drawing
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
for (int i = 0; i < (int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
|
||||
Effect effect;
|
||||
effect.value = 0;
|
||||
|
@ -1181,7 +1181,7 @@ bindTextures() {
|
||||
|
||||
static GLenum
|
||||
bindProgram(Effect effect,
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
|
||||
OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
|
||||
// only legacy gregory needs maxValence and numElements
|
||||
@ -1299,7 +1299,7 @@ display() {
|
||||
|
||||
glBindVertexArray(g_vao);
|
||||
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
// patch drawing
|
||||
@ -1316,7 +1316,7 @@ display() {
|
||||
|
||||
// core draw-calls
|
||||
for (int i=0; i<(int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
|
||||
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();
|
||||
|
@ -29,6 +29,7 @@
|
||||
set(CPU_SOURCE_FILES
|
||||
cpuEvaluator.cpp
|
||||
cpuKernel.cpp
|
||||
cpuPatchTable.cpp
|
||||
cpuVertexBuffer.cpp
|
||||
)
|
||||
|
||||
@ -43,10 +44,12 @@ set(PRIVATE_HEADER_FILES
|
||||
|
||||
set(PUBLIC_HEADER_FILES
|
||||
cpuEvaluator.h
|
||||
cpuPatchTable.h
|
||||
cpuVertexBuffer.h
|
||||
mesh.h
|
||||
nonCopyable.h
|
||||
opengl.h
|
||||
types.h
|
||||
vertexDescriptor.h
|
||||
)
|
||||
|
||||
@ -216,6 +219,7 @@ list(APPEND DOXY_HEADER_FILES ${DXSDK_PUBLIC_HEADERS})
|
||||
# OpenCL code & dependencies
|
||||
set(OPENCL_PUBLIC_HEADERS
|
||||
clEvaluator.h
|
||||
clPatchTable.h
|
||||
clVertexBuffer.h
|
||||
opencl.h
|
||||
)
|
||||
@ -223,6 +227,7 @@ set(OPENCL_PUBLIC_HEADERS
|
||||
if ( OPENCL_FOUND )
|
||||
list(APPEND GPU_SOURCE_FILES
|
||||
clEvaluator.cpp
|
||||
clPatchTable.cpp
|
||||
clVertexBuffer.cpp
|
||||
)
|
||||
list(APPEND PUBLIC_HEADER_FILES ${OPENCL_PUBLIC_HEADERS})
|
||||
@ -254,12 +259,14 @@ list(APPEND DOXY_HEADER_FILES ${OPENCL_PUBLIC_HEADERS})
|
||||
# CUDA code & dependencies
|
||||
set(CUDA_PUBLIC_HEADERS
|
||||
cudaEvaluator.h
|
||||
cudaPatchTable.h
|
||||
cudaVertexBuffer.h
|
||||
)
|
||||
|
||||
if( CUDA_FOUND )
|
||||
list(APPEND GPU_SOURCE_FILES
|
||||
cudaEvaluator.cpp
|
||||
cudaPatchTable.cpp
|
||||
cudaVertexBuffer.cpp
|
||||
)
|
||||
list(APPEND PUBLIC_HEADER_FILES ${CUDA_PUBLIC_HEADERS})
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
|
||||
#include "../osd/opencl.h"
|
||||
#include "../far/error.h"
|
||||
@ -87,11 +88,12 @@ CLStencilTable::~CLStencilTable() {
|
||||
|
||||
CLEvaluator::CLEvaluator(cl_context context, cl_command_queue queue)
|
||||
: _clContext(context), _clCommandQueue(queue),
|
||||
_program(NULL), _stencilsKernel(NULL) {
|
||||
_program(NULL), _stencilKernel(NULL), _patchKernel(NULL) {
|
||||
}
|
||||
|
||||
CLEvaluator::~CLEvaluator() {
|
||||
if (_stencilsKernel) clReleaseKernel(_stencilsKernel);
|
||||
if (_stencilKernel) clReleaseKernel(_stencilKernel);
|
||||
if (_patchKernel) clReleaseKernel(_patchKernel);
|
||||
if (_program) clReleaseProgram(_program);
|
||||
}
|
||||
|
||||
@ -145,7 +147,13 @@ CLEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
return false;
|
||||
}
|
||||
|
||||
_stencilsKernel = clCreateKernel(_program, "computeStencils", &errNum);
|
||||
_stencilKernel = clCreateKernel(_program, "computeStencils", &errNum);
|
||||
if (errNum != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
_patchKernel = clCreateKernel(_program, "computePatches", &errNum);
|
||||
|
||||
if (errNum != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
|
||||
@ -169,24 +177,24 @@ CLEvaluator::EvalStencils(cl_mem src,
|
||||
|
||||
size_t globalWorkSize = (size_t)(end - start);
|
||||
|
||||
clSetKernelArg(_stencilsKernel, 0, sizeof(cl_mem), &src);
|
||||
clSetKernelArg(_stencilsKernel, 1, sizeof(int), &srcDesc.offset);
|
||||
clSetKernelArg(_stencilsKernel, 2, sizeof(cl_mem), &dst);
|
||||
clSetKernelArg(_stencilsKernel, 3, sizeof(int), &dstDesc.offset);
|
||||
clSetKernelArg(_stencilsKernel, 4, sizeof(cl_mem), &sizes);
|
||||
clSetKernelArg(_stencilsKernel, 5, sizeof(cl_mem), &offsets);
|
||||
clSetKernelArg(_stencilsKernel, 6, sizeof(cl_mem), &indices);
|
||||
clSetKernelArg(_stencilsKernel, 7, sizeof(cl_mem), &weights);
|
||||
clSetKernelArg(_stencilsKernel, 8, sizeof(int), &start);
|
||||
clSetKernelArg(_stencilsKernel, 9, sizeof(int), &end);
|
||||
clSetKernelArg(_stencilKernel, 0, sizeof(cl_mem), &src);
|
||||
clSetKernelArg(_stencilKernel, 1, sizeof(int), &srcDesc.offset);
|
||||
clSetKernelArg(_stencilKernel, 2, sizeof(cl_mem), &dst);
|
||||
clSetKernelArg(_stencilKernel, 3, sizeof(int), &dstDesc.offset);
|
||||
clSetKernelArg(_stencilKernel, 4, sizeof(cl_mem), &sizes);
|
||||
clSetKernelArg(_stencilKernel, 5, sizeof(cl_mem), &offsets);
|
||||
clSetKernelArg(_stencilKernel, 6, sizeof(cl_mem), &indices);
|
||||
clSetKernelArg(_stencilKernel, 7, sizeof(cl_mem), &weights);
|
||||
clSetKernelArg(_stencilKernel, 8, sizeof(int), &start);
|
||||
clSetKernelArg(_stencilKernel, 9, sizeof(int), &end);
|
||||
|
||||
cl_int errNum = clEnqueueNDRangeKernel(
|
||||
_clCommandQueue, _stencilsKernel, 1, NULL,
|
||||
_clCommandQueue, _stencilKernel, 1, NULL,
|
||||
&globalWorkSize, NULL, 0, NULL, NULL);
|
||||
|
||||
if (errNum != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR,
|
||||
"ApplyStencilTableKernel (%d) ", errNum);
|
||||
"ApplyStencilKernel (%d) ", errNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -194,6 +202,51 @@ CLEvaluator::EvalStencils(cl_mem src,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CLEvaluator::EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc,
|
||||
cl_mem dst, VertexBufferDescriptor const &dstDesc,
|
||||
cl_mem du, VertexBufferDescriptor const &duDesc,
|
||||
cl_mem dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
cl_mem patchCoordsBuffer,
|
||||
cl_mem patchArrayBuffer,
|
||||
cl_mem patchIndexBuffer,
|
||||
cl_mem patchParamBuffer) const {
|
||||
|
||||
size_t globalWorkSize = (size_t)(numPatchCoords);
|
||||
|
||||
clSetKernelArg(_patchKernel, 0, sizeof(cl_mem), &src);
|
||||
clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst);
|
||||
clSetKernelArg(_patchKernel, 3, sizeof(int), &dstDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 4, sizeof(int), &numPatchCoords);
|
||||
// clSetKernelArg(_patchKernel, 4, sizeof(cl_mem), &du);
|
||||
// clSetKernelArg(_patchKernel, 5, sizeof(int), &duDesc.offset);
|
||||
// clSetKernelArg(_patchKernel, 6, sizeof(int), &duDesc.stride);
|
||||
// clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
|
||||
// clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
|
||||
// clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
|
||||
clSetKernelArg(_patchKernel, 5, sizeof(cl_mem), &patchCoordsBuffer);
|
||||
clSetKernelArg(_patchKernel, 6, sizeof(cl_mem), &patchArrayBuffer);
|
||||
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &patchIndexBuffer);
|
||||
clSetKernelArg(_patchKernel, 8, sizeof(cl_mem), &patchParamBuffer);
|
||||
|
||||
cl_int errNum = clEnqueueNDRangeKernel(
|
||||
_clCommandQueue, _patchKernel, 1, NULL,
|
||||
&globalWorkSize, NULL, 0, NULL, NULL);
|
||||
|
||||
if (errNum != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR,
|
||||
"ApplyPatchKernel (%d) ", errNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
clFinish(_clCommandQueue);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* static */
|
||||
void
|
||||
CLEvaluator::Synchronize(cl_command_queue clCommandQueue) {
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "../version.h"
|
||||
|
||||
#include "../osd/opencl.h"
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -75,9 +76,6 @@ private:
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// \brief OpenCL stencil kernel
|
||||
///
|
||||
///
|
||||
class CLEvaluator {
|
||||
public:
|
||||
typedef bool Instantiatable;
|
||||
@ -107,6 +105,12 @@ public:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static compute function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
@ -124,7 +128,7 @@ public:
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied. The table must have
|
||||
/// OpenCL memory interfaces.
|
||||
/// SSBO interfaces.
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
@ -137,25 +141,25 @@ public:
|
||||
/// cl_command_queue GetCommandQueue()
|
||||
/// methods.
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE,
|
||||
typename DEVICE_CONTEXT>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
CLEvaluator const *instance,
|
||||
DEVICE_CONTEXT deviceContext) {
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename STENCIL_TABLE, typename DEVICE_CONTEXT>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
CLEvaluator const *instance,
|
||||
DEVICE_CONTEXT deviceContext) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
return instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
instance = Create(srcDesc, dstDesc, deviceContext);
|
||||
if (instance) {
|
||||
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
bool r = instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
delete instance;
|
||||
return r;
|
||||
@ -167,15 +171,14 @@ public:
|
||||
/// Generic compute function.
|
||||
/// Dispatch the CL compute kernel asynchronously.
|
||||
/// Returns false if the kernel hasn't been compiled yet.
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
return EvalStencils(srcVertexBuffer->BindCLBuffer(_clCommandQueue),
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindCLBuffer(_clCommandQueue),
|
||||
dstBuffer->BindCLBuffer(_clCommandQueue),
|
||||
dstDesc,
|
||||
stencilTable->GetSizesBuffer(),
|
||||
stencilTable->GetOffsetsBuffer(),
|
||||
@ -187,10 +190,8 @@ public:
|
||||
|
||||
/// Dispatch the CL compute kernel asynchronously.
|
||||
/// returns false if the kernel hasn't been compiled yet.
|
||||
bool EvalStencils(cl_mem src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
cl_mem dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
bool EvalStencils(cl_mem src, VertexBufferDescriptor const &srcDesc,
|
||||
cl_mem dst, VertexBufferDescriptor const &dstDesc,
|
||||
cl_mem sizes,
|
||||
cl_mem offsets,
|
||||
cl_mem indices,
|
||||
@ -198,6 +199,278 @@ public:
|
||||
int start,
|
||||
int end) const;
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCLBuffer() method returning an
|
||||
/// array of PatchCoord struct.
|
||||
///
|
||||
/// @param patchTable CLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext client providing context class which supports
|
||||
/// cL_context GetContext()
|
||||
/// cl_command_queue GetCommandQueue()
|
||||
/// methods.
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
|
||||
typename DEVICE_CONTEXT>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CLEvaluator const *instance,
|
||||
DEVICE_CONTEXT deviceContext) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc, deviceContext);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer
|
||||
///
|
||||
/// @param duDesc
|
||||
///
|
||||
/// @param dvBuffer
|
||||
///
|
||||
/// @param dvDesc
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCLBuffer() method returning an
|
||||
/// array of PatchCoord struct
|
||||
///
|
||||
/// @param patchTable CLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext client providing context class which supports
|
||||
/// cL_context GetContext()
|
||||
/// cl_command_queue GetCommandQueue()
|
||||
/// methods.
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
|
||||
typename DEVICE_CONTEXT>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CLEvaluator const *instance,
|
||||
DEVICE_CONTEXT deviceContext) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc, deviceContext);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCLBuffer() method returning an
|
||||
/// array of PatchCoord struct.
|
||||
///
|
||||
/// @param patchTable CLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
|
||||
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
|
||||
0, VertexBufferDescriptor(),
|
||||
0, VertexBufferDescriptor(),
|
||||
numPatchCoords,
|
||||
patchCoords->BindCLBuffer(_clCommandQueue),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data of Du
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data of Dv
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
|
||||
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
|
||||
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
|
||||
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
|
||||
numPatchCoords,
|
||||
patchCoords->BindCLBuffer(_clCommandQueue),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
bool EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc,
|
||||
cl_mem dst, VertexBufferDescriptor const &dstDesc,
|
||||
cl_mem du, VertexBufferDescriptor const &duDesc,
|
||||
cl_mem dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
cl_mem patchCoordsBuffer,
|
||||
cl_mem patchArrayBuffer,
|
||||
cl_mem patchIndexBuffer,
|
||||
cl_mem patchParamsBuffer) const;
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// Configure OpenCL kernel.
|
||||
/// Returns false if it fails to compile the kernel.
|
||||
bool Compile(VertexBufferDescriptor const &srcDesc,
|
||||
@ -215,7 +488,8 @@ private:
|
||||
cl_context _clContext;
|
||||
cl_command_queue _clCommandQueue;
|
||||
cl_program _program;
|
||||
cl_kernel _stencilsKernel;
|
||||
cl_kernel _stencilKernel;
|
||||
cl_kernel _patchKernel;
|
||||
};
|
||||
|
||||
|
||||
|
@ -85,3 +85,118 @@ __kernel void computeStencils(__global float * src,
|
||||
|
||||
writeVertex(dst, current, &v);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
struct PatchArray {
|
||||
int patchType;
|
||||
int numPatches;
|
||||
int indexBase; // an offset within the index buffer
|
||||
int primitiveIdBase; // an offset within the patch param buffer
|
||||
};
|
||||
|
||||
struct PatchCoord {
|
||||
int arrayIndex;
|
||||
int patchIndex;
|
||||
int vertIndex;
|
||||
float s;
|
||||
float t;
|
||||
};
|
||||
|
||||
struct PatchParam {
|
||||
int faceIndex;
|
||||
uint patchBits;
|
||||
float sharpness;
|
||||
};
|
||||
|
||||
static void getBSplineWeights(float t, float *point, float *deriv) {
|
||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
||||
float one6th = 1.0f / 6.0f;
|
||||
|
||||
float t2 = t * t;
|
||||
float t3 = t * t2;
|
||||
|
||||
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
||||
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
||||
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
||||
point[3] = one6th * ( t3);
|
||||
|
||||
// Derivatives of the above four basis functions at t:
|
||||
deriv[0] = -0.5f*t2 + t - 0.5f;
|
||||
deriv[1] = 1.5f*t2 - 2.0f*t;
|
||||
deriv[2] = -1.5f*t2 + t + 0.5f;
|
||||
deriv[3] = 0.5f*t2;
|
||||
}
|
||||
|
||||
__kernel void computePatches(__global float *src, int srcOffset,
|
||||
__global float *dst, int dstOffset,
|
||||
// __global float *du, int duOffset, int duStride,
|
||||
// __global float *dv, int dvOffset, int dvStride,
|
||||
int numPatchCoords,
|
||||
__global struct PatchCoord *patchCoords,
|
||||
__global struct PatchArray *patchArrayBuffer,
|
||||
__global int *patchIndexBuffer,
|
||||
__global struct PatchParam *patchParamBuffer) {
|
||||
int current = get_global_id(0);
|
||||
|
||||
if (current > numPatchCoords) return;
|
||||
|
||||
src += srcOffset;
|
||||
dst += dstOffset;
|
||||
// du += duOffset;
|
||||
// dv += dvOffset;
|
||||
|
||||
struct PatchCoord coord = patchCoords[current];
|
||||
int patchIndex = coord.patchIndex;
|
||||
// struct PatchArray array = patchArrayBuffer[coord.arrayIndex];
|
||||
struct PatchArray array = patchArrayBuffer[0];
|
||||
|
||||
int patchType = 6; // array.x XXX: REGULAR only for now.
|
||||
int numControlVertices = 16;
|
||||
|
||||
uint patchBits = patchParamBuffer[patchIndex].patchBits;
|
||||
// vec2 uv = normalizePatchCoord(patchBits, vec2(coord.s, coord.t));
|
||||
float dScale = 1.0f;//float(1 << getDepth(patchBits));
|
||||
|
||||
float uv[2] = {coord.s, coord.t};
|
||||
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
if (patchType == 6) { // REGULAR
|
||||
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
|
||||
getBSplineWeights(uv[0], sWeights, dsWeights);
|
||||
getBSplineWeights(uv[1], tWeights, dtWeights);
|
||||
|
||||
// adjustBoundaryWeights(patchBits, sWeights, tWeights);
|
||||
// adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
wP[4*k+l] = sWeights[l] * tWeights[k];
|
||||
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
|
||||
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// TODO: GREGORY BASIS
|
||||
}
|
||||
|
||||
struct Vertex v;
|
||||
clear(&v);
|
||||
|
||||
#if 1
|
||||
// debug
|
||||
v.v[0] = uv[0];
|
||||
v.v[1] = uv[1];
|
||||
v.v[2] = patchIndexBuffer[current] * 0.1;
|
||||
writeVertex(dst, current, &v);
|
||||
return;
|
||||
#endif
|
||||
|
||||
int indexBase = array.indexBase + coord.vertIndex;
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
if (index < 0) index = 0;
|
||||
addWithWeight(&v, src, index, wP[i]);
|
||||
}
|
||||
writeVertex(dst, current, &v);
|
||||
}
|
||||
|
99
opensubdiv/osd/clPatchTable.cpp
Normal file
99
opensubdiv/osd/clPatchTable.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#include "../osd/clPatchTable.h"
|
||||
|
||||
#include "../far/error.h"
|
||||
#include "../far/patchTable.h"
|
||||
#include "../osd/opencl.h"
|
||||
#include "../osd/cpuPatchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
CLPatchTable::CLPatchTable() :
|
||||
_patchArrays(NULL), _indexBuffer(NULL), _patchParamBuffer(NULL) {
|
||||
}
|
||||
|
||||
CLPatchTable::~CLPatchTable() {
|
||||
if (_patchArrays) clReleaseMemObject(_patchArrays);
|
||||
if (_indexBuffer) clReleaseMemObject(_indexBuffer);
|
||||
if (_patchParamBuffer) clReleaseMemObject(_patchParamBuffer);
|
||||
}
|
||||
|
||||
CLPatchTable *
|
||||
CLPatchTable::Create(Far::PatchTable const *farPatchTable,
|
||||
cl_context clContext) {
|
||||
CLPatchTable *instance = new CLPatchTable();
|
||||
if (instance->allocate(farPatchTable, clContext)) return instance;
|
||||
delete instance;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
CLPatchTable::allocate(Far::PatchTable const *farPatchTable, cl_context clContext) {
|
||||
CpuPatchTable patchTable(farPatchTable);
|
||||
|
||||
size_t numPatchArrays = patchTable.GetNumPatchArrays();
|
||||
size_t indexSize = patchTable.GetPatchIndexSize();
|
||||
size_t patchParamSize = patchTable.GetPatchParamSize();
|
||||
|
||||
cl_int err = 0;
|
||||
_patchArrays = clCreateBuffer(clContext, CL_MEM_READ_WRITE,
|
||||
numPatchArrays * sizeof(Osd::PatchArray),
|
||||
(void*)patchTable.GetPatchArrayBuffer(),
|
||||
&err);
|
||||
if (err != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
|
||||
return false;
|
||||
}
|
||||
|
||||
_indexBuffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE,
|
||||
indexSize * sizeof(int),
|
||||
(void*)patchTable.GetPatchIndexBuffer(),
|
||||
&err);
|
||||
if (err != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
|
||||
return false;
|
||||
}
|
||||
|
||||
_patchParamBuffer = clCreateBuffer(clContext, CL_MEM_READ_WRITE,
|
||||
patchParamSize * sizeof(Osd::PatchParam),
|
||||
(void*)patchTable.GetPatchParamBuffer(),
|
||||
&err);
|
||||
if (err != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
} // end namespace OpenSubdiv
|
||||
|
91
opensubdiv/osd/clPatchTable.h
Normal file
91
opensubdiv/osd/clPatchTable.h
Normal file
@ -0,0 +1,91 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OPENSUBDIV3_OSD_CL_PATCH_TABLE_H
|
||||
#define OPENSUBDIV3_OSD_CL_PATCH_TABLE_H
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include "../osd/opencl.h"
|
||||
#include "../osd/nonCopyable.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Far{
|
||||
class PatchTable;
|
||||
};
|
||||
|
||||
namespace Osd {
|
||||
|
||||
/// \brief CL patch table
|
||||
///
|
||||
/// This class is a CL buffer representation of Far::PatchTable.
|
||||
///
|
||||
/// CLEvaluator consumes this table to evaluate on the patches.
|
||||
///
|
||||
///
|
||||
class CLPatchTable : private NonCopyable<CLPatchTable> {
|
||||
public:
|
||||
/// Creator. Returns NULL if error
|
||||
static CLPatchTable *Create(Far::PatchTable const *patchTable,
|
||||
cl_context clContext);
|
||||
|
||||
template <typename DEVICE_CONTEXT>
|
||||
static CLPatchTable * Create(Far::PatchTable const *patchTable,
|
||||
DEVICE_CONTEXT context) {
|
||||
return Create(patchTable, context->GetContext());
|
||||
}
|
||||
|
||||
/// Destructor
|
||||
~CLPatchTable();
|
||||
|
||||
/// Returns the CL memory of the array of Osd::PatchArray buffer
|
||||
cl_mem GetPatchArrayBuffer() const { return _patchArrays; }
|
||||
|
||||
/// Returns the CL memory of the patch control vertices
|
||||
cl_mem GetPatchIndexBuffer() const { return _indexBuffer; }
|
||||
|
||||
/// Returns the CL memory of the array of Osd::PatchParam buffer
|
||||
cl_mem GetPatchParamBuffer() const { return _patchParamBuffer; }
|
||||
|
||||
protected:
|
||||
CLPatchTable();
|
||||
|
||||
bool allocate(Far::PatchTable const *patchTable, cl_context clContext);
|
||||
|
||||
cl_mem _patchArrays;
|
||||
cl_mem _indexBuffer;
|
||||
cl_mem _patchParamBuffer;
|
||||
};
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
} // end namespace OpenSubdiv
|
||||
|
||||
#endif // OPENSUBDIV3_OSD_CL_PATCH_TABLE_H
|
@ -45,6 +45,12 @@ public:
|
||||
/// Creator. Returns NULL if error.
|
||||
static CLVertexBuffer * Create(int numElements, int numVertices, cl_context clContext);
|
||||
|
||||
template <typename DEVICE_CONTEXT>
|
||||
static CLVertexBuffer * Create(int numElements, int numVertices,
|
||||
DEVICE_CONTEXT context) {
|
||||
return Create(numElements, numVertices, context->GetContext());
|
||||
}
|
||||
|
||||
/// Destructor.
|
||||
~CLVertexBuffer();
|
||||
|
||||
@ -52,6 +58,12 @@ public:
|
||||
/// vertices data to Osd.
|
||||
void UpdateData(const float *src, int startVertex, int numVertices, cl_command_queue clQueue);
|
||||
|
||||
template<typename DEVICE_CONTEXT>
|
||||
void UpdateData(const float *src, int startVertex, int numVertices,
|
||||
DEVICE_CONTEXT context) {
|
||||
UpdateData(src, startVertex, numVertices, context->GetCommandQueue());
|
||||
}
|
||||
|
||||
/// Returns how many elements defined in this vertex buffer.
|
||||
int GetNumElements() const;
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include "../osd/cpuEvaluator.h"
|
||||
#include "../osd/cpuKernel.h"
|
||||
#include "../far/patchBasis.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
@ -34,15 +35,15 @@ namespace Osd {
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CpuEvaluator::EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
CpuEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
@ -55,30 +56,28 @@ CpuEvaluator::EvalStencils(const float *src,
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CpuEvaluator::EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDs,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
float *dstDt,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end) {
|
||||
CpuEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
if (srcDesc.length != dstDsDesc.length) return false;
|
||||
if (srcDesc.length != dstDtDesc.length) return false;
|
||||
if (srcDesc.length != duDesc.length) return false;
|
||||
if (srcDesc.length != dvDesc.length) return false;
|
||||
|
||||
CpuEvalStencils(src, srcDesc,
|
||||
dst, dstDesc,
|
||||
dstDs, dstDsDesc,
|
||||
dstDt, dstDtDesc,
|
||||
du, duDesc,
|
||||
dv, dvDesc,
|
||||
sizes, offsets, indices,
|
||||
weights, duWeights, dvWeights,
|
||||
start, end);
|
||||
@ -123,10 +122,13 @@ CpuEvaluator::EvalPatches(const float *src,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable) {
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer){
|
||||
src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
else return false;
|
||||
|
||||
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
|
||||
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
|
||||
@ -134,14 +136,38 @@ CpuEvaluator::EvalPatches(const float *src,
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
PatchCoord const &coords = patchCoords[i];
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
|
||||
|
||||
patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt);
|
||||
int patchType = array.GetPatchType();
|
||||
// XXX: patchIndex is absolute. not sure it's consistent.
|
||||
// (should be offsetted by array.primitiveIdBase?)
|
||||
// patchParamBuffer[array.primitiveIdBase + coord.handle.patchIndex]
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle);
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
const int *cvs =
|
||||
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
for (int j = 0; j < cvs.size(); ++j) {
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
}
|
||||
++dstT;
|
||||
@ -151,47 +177,67 @@ CpuEvaluator::EvalPatches(const float *src,
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CpuEvaluator::EvalPatches(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDs,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
float *dstDt,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable) {
|
||||
CpuEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer) {
|
||||
|
||||
src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (dstDs) dstDs += dstDsDesc.offset;
|
||||
if (dstDt) dstDt += dstDtDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
|
||||
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
|
||||
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
|
||||
BufferAdapter<float> dstDsT(dstDs, dstDsDesc.length, dstDsDesc.stride);
|
||||
BufferAdapter<float> dstDtT(dstDt, dstDtDesc.length, dstDtDesc.stride);
|
||||
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
|
||||
BufferAdapter<float> duT (du, duDesc.length, duDesc.stride);
|
||||
BufferAdapter<float> dvT (dv, dvDesc.length, dvDesc.stride);
|
||||
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
PatchCoord const &coords = patchCoords[i];
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
|
||||
|
||||
patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt);
|
||||
int patchType = array.GetPatchType();
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle);
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
const int *cvs =
|
||||
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
dstDsT.Clear();
|
||||
dstDtT.Clear();
|
||||
for (int j = 0; j < cvs.size(); ++j) {
|
||||
duT.Clear();
|
||||
dvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
dstDsT.AddWithWeight(srcT[cvs[j]], wDs[j]);
|
||||
dstDtT.AddWithWeight(srcT[cvs[j]], wDt[j]);
|
||||
duT.AddWithWeight (srcT[cvs[j]], wDs[j]);
|
||||
dvT.AddWithWeight (srcT[cvs[j]], wDt[j]);
|
||||
}
|
||||
++dstT;
|
||||
++dstDsT;
|
||||
++dstDtT;
|
||||
++duT;
|
||||
++dvT;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -29,33 +29,14 @@
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
#include "../far/patchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
/// \brief Coordinates set on a patch table
|
||||
/// XXX: this is a temporary structure, exists during Osd refactoring work.
|
||||
///
|
||||
struct PatchCoord {
|
||||
/// \brief Constructor
|
||||
///
|
||||
/// @param p patch handle
|
||||
///
|
||||
/// @param s parametric location on the patch
|
||||
///
|
||||
/// @param t parametric location on the patch
|
||||
///
|
||||
PatchCoord(Far::PatchTable::PatchHandle handle, float s, float t) :
|
||||
handle(handle), s(s), t(t) { }
|
||||
|
||||
Far::PatchTable::PatchHandle handle; ///< patch handle
|
||||
float s, t; ///< parametric location on patch
|
||||
};
|
||||
|
||||
class CpuEvaluator {
|
||||
public:
|
||||
/// ----------------------------------------------------------------------
|
||||
@ -80,7 +61,7 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
@ -89,20 +70,18 @@ public:
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(SRC_BUFFER *srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CpuEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CpuEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
@ -125,24 +104,27 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
/// to apply for the range [start, end)
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
static bool EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start,
|
||||
int end);
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
@ -161,19 +143,19 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDsBuffer Output s-derivative buffer
|
||||
/// @param duBuffer Output U-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDsDesc vertex buffer descriptor for the output buffer
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDtBuffer Output t-derivative buffer
|
||||
/// @param dvBuffer Output V-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDtDesc vertex buffer descriptor for the output buffer
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
@ -182,28 +164,22 @@ public:
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(SRC_BUFFER *srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *dstDsBuffer,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
DST_BUFFER *dstDtBuffer,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CpuEvaluator *evaluator = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)evaluator; // unused
|
||||
(void)deviceContext; // unused
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CpuEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
dstDsBuffer->BindCpuBuffer(),
|
||||
dstDsDesc,
|
||||
dstDtBuffer->BindCpuBuffer(),
|
||||
dstDtDesc,
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
@ -228,40 +204,44 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDs Output s-derivatives pointer. An offset of
|
||||
/// dstDsDesc will be applied internally.
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param dstDsDesc vertex buffer descriptor for the output buffer
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDt Output t-derivatives pointer. An offset of
|
||||
/// dstDtDesc will be applied internally.
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dstDtDesc vertex buffer descriptor for the output buffer
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
static bool EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDs,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
float *dstDt,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start,
|
||||
int end);
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
@ -289,32 +269,35 @@ public:
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER>
|
||||
static bool EvalPatches(SRC_BUFFER *srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable,
|
||||
CpuEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CpuEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
numPatchCoords,
|
||||
patchCoords,
|
||||
patchTable);
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
@ -333,56 +316,59 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDsBuffer Output s-derivatives buffer
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDsDesc vertex buffer descriptor for the dstDsBuffer
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dstDtBuffer Output t-derivatives buffer
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDtDesc vertex buffer descriptor for the dstDtBuffer
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER>
|
||||
static bool EvalPatches(SRC_BUFFER *srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *dstDsBuffer,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
DST_BUFFER *dstDtBuffer,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable,
|
||||
CpuEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CpuEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
dstDsBuffer->BindCpuBuffer(),
|
||||
dstDsDesc,
|
||||
dstDtBuffer->BindCpuBuffer(),
|
||||
dstDtDesc,
|
||||
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
|
||||
// ideally all buffer classes should have templated by datatype
|
||||
// so that downcast isn't needed there.
|
||||
// (e.g. Osd::CpuBuffer<PatchCoord> )
|
||||
//
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
patchCoords,
|
||||
patchTable);
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
@ -403,20 +389,23 @@ public:
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable on which primvars are evaluated
|
||||
/// for the patchCoords
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable);
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
@ -432,38 +421,45 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDs Output s-derivatives pointer. An offset of
|
||||
/// dstDsDesc will be applied internally.
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param dstDsDesc vertex buffer descriptor for the dstDs buffer
|
||||
/// @param duDesc vertex buffer descriptor for the du buffer
|
||||
///
|
||||
/// @param dstDt Output t-derivatives pointer. An offset of
|
||||
/// dstDtDesc will be applied internally.
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dstDtDesc vertex buffer descriptor for the dstDt buffer
|
||||
/// @param dvDesc vertex buffer descriptor for the dv buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable on which primvars are evaluated
|
||||
/// for the patchCoords
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDs,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
float *dstDt,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable);
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief synchronize all asynchronous computation invoked on this device.
|
||||
static void Synchronize(void * /*deviceContext = NULL*/) {
|
||||
|
106
opensubdiv/osd/cpuPatchTable.cpp
Normal file
106
opensubdiv/osd/cpuPatchTable.cpp
Normal file
@ -0,0 +1,106 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#include "../osd/cpuPatchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
CpuPatchTable::CpuPatchTable(const Far::PatchTable *farPatchTable) {
|
||||
int nPatchArrays = farPatchTable->GetNumPatchArrays();
|
||||
|
||||
// count
|
||||
int numPatches = 0;
|
||||
int numIndices = 0;
|
||||
for (int j = 0; j < nPatchArrays; ++j) {
|
||||
int nPatch = farPatchTable->GetNumPatches(j);
|
||||
int nCV = farPatchTable->GetPatchArrayDescriptor(j).GetNumControlVertices();
|
||||
numPatches += nPatch;
|
||||
numIndices += nPatch * nCV;
|
||||
}
|
||||
_patchArrays.reserve(nPatchArrays);
|
||||
_indexBuffer.reserve(numIndices);
|
||||
_patchParamBuffer.reserve(numPatches);
|
||||
|
||||
// for each patchArray
|
||||
for (int j = 0; j < nPatchArrays; ++j) {
|
||||
PatchArray patchArray(farPatchTable->GetPatchArrayDescriptor(j),
|
||||
farPatchTable->GetNumPatches(j),
|
||||
(int)_indexBuffer.size(),
|
||||
(int)_patchParamBuffer.size());
|
||||
_patchArrays.push_back(patchArray);
|
||||
|
||||
// indices
|
||||
Far::ConstIndexArray indices = farPatchTable->GetPatchArrayVertices(j);
|
||||
for (int k = 0; k < indices.size(); ++k) {
|
||||
_indexBuffer.push_back(indices[k]);
|
||||
}
|
||||
|
||||
// patchParams bundling
|
||||
// XXX: this process won't be needed if Far::PatchParam includes
|
||||
// sharpness.
|
||||
#if 0
|
||||
// XXX: we need sharpness interface for patcharray or put sharpness
|
||||
// into patchParam.
|
||||
Far::ConstPatchParamArray patchParams =
|
||||
farPatchTable->GetPatchParams(j);
|
||||
for (int k = 0; k < patchParams.size(); ++k) {
|
||||
float sharpness = 0.0;
|
||||
_patchParamBuffer.push_back(patchParams[k].faceIndex);
|
||||
_patchParamBuffer.push_back(patchParams[k].bitField.field);
|
||||
_patchParamBuffer.push_back(*((unsigned int *)&sharpness));
|
||||
}
|
||||
#else
|
||||
// XXX: workaround. GetPatchParamTable() will be deprecated though.
|
||||
Far::PatchParamTable const & patchParamTable =
|
||||
farPatchTable->GetPatchParamTable();
|
||||
std::vector<Far::Index> const &sharpnessIndexTable =
|
||||
farPatchTable->GetSharpnessIndexTable();
|
||||
int numPatches = farPatchTable->GetNumPatches(j);
|
||||
for (int k = 0; k < numPatches; ++k) {
|
||||
float sharpness = 0.0;
|
||||
int patchIndex = (int)_patchParamBuffer.size();
|
||||
if (patchIndex < (int)sharpnessIndexTable.size()) {
|
||||
int sharpnessIndex = sharpnessIndexTable[patchIndex];
|
||||
if (sharpnessIndex >= 0)
|
||||
sharpness = farPatchTable->GetSharpnessValues()[sharpnessIndex];
|
||||
}
|
||||
PatchParam param;
|
||||
//param.patchParam = patchParamTable[patchIndex];
|
||||
param.faceIndex = patchParamTable[patchIndex].faceIndex;
|
||||
param.patchBits = patchParamTable[patchIndex].bitField.field;
|
||||
param.sharpness = sharpness;
|
||||
_patchParamBuffer.push_back(param);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
} // end namespace OpenSubdiv
|
||||
|
102
opensubdiv/osd/cpuPatchTable.h
Normal file
102
opensubdiv/osd/cpuPatchTable.h
Normal file
@ -0,0 +1,102 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H
|
||||
#define OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include <vector>
|
||||
#include "../far/patchDescriptor.h"
|
||||
#include "../osd/nonCopyable.h"
|
||||
#include "../osd/opengl.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Far{
|
||||
class PatchTable;
|
||||
};
|
||||
|
||||
namespace Osd {
|
||||
|
||||
/// \brief Cpu patch table
|
||||
///
|
||||
/// XXX: We can use just Far::PatchTable for typical CpuEval use cases.
|
||||
///
|
||||
/// Currently this class exists because of the template resolution
|
||||
/// for the CpuEvaluator's generic interface functions
|
||||
/// (glEvalLimit example uses), and
|
||||
/// device-specific patch tables such as GLPatchTables internally use
|
||||
/// as a staging buffer to splice patcharray and interleave sharpnesses.
|
||||
///
|
||||
/// Ideally Far::PatchTables should have the same data representation
|
||||
/// and accessors so that we don't have to copy data unnecessarily.
|
||||
///
|
||||
class CpuPatchTable {
|
||||
public:
|
||||
static CpuPatchTable *Create(const Far::PatchTable *patchTable,
|
||||
void *deviceContext = NULL) {
|
||||
(void)deviceContext; // unused
|
||||
return new CpuPatchTable(patchTable);
|
||||
}
|
||||
|
||||
explicit CpuPatchTable(const Far::PatchTable *patchTable);
|
||||
~CpuPatchTable() {}
|
||||
|
||||
const PatchArray *GetPatchArrayBuffer() const {
|
||||
return &_patchArrays[0];
|
||||
}
|
||||
const int *GetPatchIndexBuffer() const {
|
||||
return &_indexBuffer[0];
|
||||
}
|
||||
const PatchParam *GetPatchParamBuffer() const {
|
||||
return &_patchParamBuffer[0];
|
||||
}
|
||||
|
||||
size_t GetNumPatchArrays() const {
|
||||
return _patchArrays.size();
|
||||
}
|
||||
size_t GetPatchIndexSize() const {
|
||||
return _indexBuffer.size();
|
||||
}
|
||||
size_t GetPatchParamSize() const {
|
||||
return _patchParamBuffer.size();
|
||||
}
|
||||
|
||||
protected:
|
||||
PatchArrayVector _patchArrays;
|
||||
std::vector<int> _indexBuffer;
|
||||
PatchParamVector _patchParamBuffer;
|
||||
};
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
} // end namespace OpenSubdiv
|
||||
|
||||
#endif // OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H
|
@ -52,7 +52,8 @@ CpuVertexBuffer::Create(int numElements, int numVertices,
|
||||
}
|
||||
|
||||
void
|
||||
CpuVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices) {
|
||||
CpuVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices,
|
||||
void * /*deviceContext*/) {
|
||||
|
||||
memcpy(_cpuBuffer + startVertex * _numElements,
|
||||
src, GetNumElements() * numVertices * sizeof(float));
|
||||
|
@ -50,7 +50,8 @@ public:
|
||||
|
||||
/// This method is meant to be used in client code in order to provide
|
||||
/// coarse vertices data to Osd.
|
||||
void UpdateData(const float *src, int startVertex, int numVertices);
|
||||
void UpdateData(const float *src, int startVertex, int numVertices,
|
||||
void *deviceContext = NULL);
|
||||
|
||||
/// Returns how many elements defined in this vertex buffer.
|
||||
int GetNumElements() const;
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../far/stencilTable.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
extern "C" {
|
||||
void CudaEvalStencils(const float *src,
|
||||
@ -41,6 +42,25 @@ extern "C" {
|
||||
const float * weights,
|
||||
int start,
|
||||
int end);
|
||||
|
||||
void CudaEvalPatches(
|
||||
const float *src, float *dst,
|
||||
int length, int srcStride, int dstStride,
|
||||
int numPatchCoords,
|
||||
const void *patchCoords,
|
||||
const void *patchArrays,
|
||||
const int *patchIndices,
|
||||
const void *patchParams);
|
||||
|
||||
void CudaEvalPatchesWithDerivatives(
|
||||
const float *src, float *dst, float *du, float *dv,
|
||||
int length,
|
||||
int srcStride, int dstStride, int dvStride, int duStride,
|
||||
int numPatchCoords,
|
||||
const void *patchCoords,
|
||||
const void *patchArrays,
|
||||
const int *patchIndices,
|
||||
const void *patchParams);
|
||||
}
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -102,6 +122,8 @@ CudaEvaluator::EvalStencils(const float *src,
|
||||
const float * weights,
|
||||
int start,
|
||||
int end) {
|
||||
if (dst == NULL) return false;
|
||||
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dst + dstDesc.offset,
|
||||
srcDesc.length,
|
||||
@ -112,6 +134,105 @@ CudaEvaluator::EvalStencils(const float *src,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CudaEvaluator::EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float *dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start,
|
||||
int end) {
|
||||
// PERFORMANCE: need to combine 3 launches together
|
||||
if (dst) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dst + dstDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
dstDesc.stride,
|
||||
sizes, offsets, indices, weights,
|
||||
start, end);
|
||||
}
|
||||
if (dstDu) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dstDu + dstDuDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
dstDuDesc.stride,
|
||||
sizes, offsets, indices, duWeights,
|
||||
start, end);
|
||||
}
|
||||
if (dstDv) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dstDv + dstDvDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
dstDvDesc.stride,
|
||||
sizes, offsets, indices, dvWeights,
|
||||
start, end);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CudaEvaluator::EvalPatches(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndices,
|
||||
const PatchParam *patchParams) {
|
||||
src += srcDesc.offset;
|
||||
if (dst)
|
||||
dst += dstDesc.offset;
|
||||
|
||||
CudaEvalPatches(src, dst,
|
||||
srcDesc.length, srcDesc.stride, dstDesc.stride,
|
||||
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CudaEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndices,
|
||||
const PatchParam *patchParams) {
|
||||
|
||||
if (src) src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
|
||||
CudaEvalPatchesWithDerivatives(
|
||||
src, dst, du, dv,
|
||||
srcDesc.length, srcDesc.stride,
|
||||
dstDesc.stride, duDesc.stride, dvDesc.stride,
|
||||
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* static */
|
||||
void
|
||||
CudaEvaluator::Synchronize(void * /*deviceContext*/) {
|
||||
|
@ -29,11 +29,13 @@
|
||||
|
||||
#include <vector>
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Far {
|
||||
class PatchTable;
|
||||
class StencilTable;
|
||||
}
|
||||
|
||||
@ -43,7 +45,7 @@ namespace Osd {
|
||||
///
|
||||
/// This class is a cuda buffer representation of Far::StencilTable.
|
||||
///
|
||||
/// CudaComputeKernel consumes this table to apply stencils
|
||||
/// CudaEvaluator consumes this table to apply stencils
|
||||
///
|
||||
///
|
||||
class CudaStencilTable {
|
||||
@ -72,10 +74,14 @@ private:
|
||||
int _numStencils;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
class CudaEvaluator {
|
||||
public:
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static compute function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
@ -99,21 +105,18 @@ public:
|
||||
///
|
||||
/// @param deviceContext not used in the CudaEvaluator
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const void *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const void *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
return EvalStencils(srcVertexBuffer->BindCudaBuffer(),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindCudaBuffer(),
|
||||
dstDesc,
|
||||
return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
(int const *)stencilTable->GetSizesBuffer(),
|
||||
(int const *)stencilTable->GetOffsetsBuffer(),
|
||||
(int const *)stencilTable->GetIndicesBuffer(),
|
||||
@ -122,17 +125,369 @@ public:
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
static bool EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start,
|
||||
int end);
|
||||
/// \brief Static eval stencils function which takes raw cuda buffers for
|
||||
/// input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivative buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivative buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
///
|
||||
/// @param instance not used in the cuda kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the cuda kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CudaEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
duBuffer->BindCudaBuffer(), duDesc,
|
||||
dvBuffer->BindCudaBuffer(), dvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
&stencilTable->GetDuWeights()[0],
|
||||
&stencilTable->GetDvWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw cuda pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCudaBuffer() method returning an
|
||||
/// array of PatchCoord struct in cuda memory.
|
||||
///
|
||||
/// @param patchTable CudaPatchTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cuda evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cuda evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord *)patchCoords->BindCudaBuffer(),
|
||||
(const PatchArray *)patchTable->GetPatchArrayBuffer(),
|
||||
(const int *)patchTable->GetPatchIndexBuffer(),
|
||||
(const PatchParam *)patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CudaPatchTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cuda evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cuda evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
duBuffer->BindCudaBuffer(), duDesc,
|
||||
dvBuffer->BindCudaBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord *)patchCoords->BindCudaBuffer(),
|
||||
(const PatchArray *)patchTable->GetPatchArrayBuffer(),
|
||||
(const int *)patchTable->GetPatchIndexBuffer(),
|
||||
(const PatchParam *)patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndices an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParams an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndices,
|
||||
const PatchParam *patchParams);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the du buffer
|
||||
///
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dv buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndices an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParams an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndices,
|
||||
const PatchParam *patchParams);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
static void Synchronize(void *deviceContext = NULL);
|
||||
};
|
||||
|
||||
|
@ -238,6 +238,204 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs,
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Osd::PatchCoord osd/types.h
|
||||
struct PatchCoord {
|
||||
int arrayIndex;
|
||||
int patchIndex;
|
||||
int vertIndex;
|
||||
float s;
|
||||
float t;
|
||||
};
|
||||
struct PatchArray {
|
||||
int patchType; // Far::PatchDescriptor::Type
|
||||
int numPatches;
|
||||
int indexBase; // offset in the index buffer
|
||||
int primitiveIdBase; // offset in the patch param buffer
|
||||
};
|
||||
struct PatchParam {
|
||||
int faceIndex;
|
||||
unsigned int bitField;
|
||||
float sharpness;
|
||||
};
|
||||
|
||||
__device__ void
|
||||
getBSplineWeights(float t, float point[4], float deriv[4]) {
|
||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
||||
float const one6th = 1.0f / 6.0f;
|
||||
|
||||
float t2 = t * t;
|
||||
float t3 = t * t2;
|
||||
|
||||
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
||||
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
||||
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
||||
point[3] = one6th * ( t3);
|
||||
|
||||
// Derivatives of the above four basis functions at t:
|
||||
if (deriv) {
|
||||
deriv[0] = -0.5f*t2 + t - 0.5f;
|
||||
deriv[1] = 1.5f*t2 - 2.0f*t;
|
||||
deriv[2] = -1.5f*t2 + t + 0.5f;
|
||||
deriv[3] = 0.5f*t2;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void
|
||||
adjustBoundaryWeights(unsigned int bits, float sWeights[4], float tWeights[4]) {
|
||||
int boundary = ((bits >> 4) & 0xf); // far/patchParam.h
|
||||
|
||||
if (boundary & 1) {
|
||||
tWeights[2] -= tWeights[0];
|
||||
tWeights[1] += 2*tWeights[0];
|
||||
tWeights[0] = 0;
|
||||
}
|
||||
if (boundary & 2) {
|
||||
sWeights[1] -= sWeights[3];
|
||||
sWeights[2] += 2*sWeights[3];
|
||||
sWeights[3] = 0;
|
||||
}
|
||||
if (boundary & 4) {
|
||||
tWeights[1] -= tWeights[3];
|
||||
tWeights[2] += 2*tWeights[3];
|
||||
tWeights[3] = 0;
|
||||
}
|
||||
if (boundary & 8) {
|
||||
sWeights[2] -= sWeights[0];
|
||||
sWeights[1] += 2*sWeights[0];
|
||||
sWeights[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
__device__
|
||||
int getDepth(unsigned int patchBits) {
|
||||
return (patchBits & 0x7);
|
||||
}
|
||||
|
||||
__device__
|
||||
float getParamFraction(unsigned int patchBits) {
|
||||
bool nonQuadRoot = (patchBits >> 3) & 0x1;
|
||||
int depth = getDepth(patchBits);
|
||||
if (nonQuadRoot) {
|
||||
return 1.0f / float( 1 << (depth-1) );
|
||||
} else {
|
||||
return 1.0f / float( 1 << depth );
|
||||
}
|
||||
}
|
||||
|
||||
__device__
|
||||
void normalizePatchCoord(unsigned int patchBits, float *u, float *v) {
|
||||
float frac = getParamFraction(patchBits);
|
||||
|
||||
int iu = (patchBits >> 22) & 0x3ff;
|
||||
int iv = (patchBits >> 12) & 0x3ff;
|
||||
|
||||
// top left corner
|
||||
float pu = (float)iu*frac;
|
||||
float pv = (float)iv*frac;
|
||||
|
||||
// normalize u,v coordinates
|
||||
*u = (*u - pu) / frac,
|
||||
*v = (*v - pv) / frac;
|
||||
}
|
||||
|
||||
// Far::PatchDescriptor::Type
|
||||
enum Type {
|
||||
NON_PATCH = 0, ///< undefined
|
||||
POINTS, ///< points (useful for cage drawing)
|
||||
LINES, ///< lines (useful for cage drawing)
|
||||
QUADS, ///< bilinear quads-only patches
|
||||
TRIANGLES, ///< bilinear triangles-only mesh
|
||||
LOOP, ///< Loop patch
|
||||
REGULAR, ///< feature-adaptive bicubic patches
|
||||
GREGORY,
|
||||
GREGORY_BOUNDARY,
|
||||
GREGORY_BASIS
|
||||
};
|
||||
|
||||
__global__ void
|
||||
computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
|
||||
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
|
||||
int numPatchCoords, const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
int first = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
// PERFORMANCE: not yet optimized
|
||||
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
|
||||
for (int i = first; i < numPatchCoords; i += blockDim.x * gridDim.x) {
|
||||
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrayBuffer[coord.arrayIndex];
|
||||
|
||||
int patchType = array.patchType;
|
||||
int numControlVertices = 16;
|
||||
// note: patchIndex is absolute.
|
||||
unsigned int patchBits = patchParamBuffer[coord.patchIndex].bitField;
|
||||
|
||||
// normalize
|
||||
float s = coord.s;
|
||||
float t = coord.t;
|
||||
normalizePatchCoord(patchBits, &s, &t);
|
||||
float dScale = (float)(1 << getDepth(patchBits));
|
||||
|
||||
if (patchType == REGULAR) {
|
||||
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
|
||||
getBSplineWeights(s, sWeights, dsWeights);
|
||||
getBSplineWeights(t, tWeights, dtWeights);
|
||||
|
||||
// Compute the tensor product weight of the (s,t) basis function
|
||||
// corresponding to each control vertex:
|
||||
adjustBoundaryWeights(patchBits, sWeights, tWeights);
|
||||
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
wP[4*k+l] = sWeights[l] * tWeights[k];
|
||||
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
|
||||
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
|
||||
}
|
||||
}
|
||||
} else if (patchType == GREGORY_BASIS) {
|
||||
// XXX: not yet implemented.
|
||||
continue;
|
||||
} else {
|
||||
// unknown patchType
|
||||
continue;
|
||||
}
|
||||
const int *cvs =
|
||||
&patchIndexBuffer[array.indexBase + coord.vertIndex];
|
||||
|
||||
float * dstVert = dst + i * dstStride;
|
||||
clear(dstVert, length);
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
const float * srcVert = src + cvs[j] * srcStride;
|
||||
addWithWeight(dstVert, srcVert, wP[j], length);
|
||||
}
|
||||
if (dstDu) {
|
||||
float *d = dstDu + i * dstDuStride;
|
||||
clear(d, length);
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
const float * srcVert = src + cvs[j] * srcStride;
|
||||
addWithWeight(d, srcVert, wDs[j], length);
|
||||
}
|
||||
}
|
||||
if (dstDv) {
|
||||
float *d = dstDv + i * dstDvStride;
|
||||
clear(d, length);
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
const float * srcVert = src + cvs[j] * srcStride;
|
||||
addWithWeight(d, srcVert, wDt[j], length);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#define OPT_KERNEL(NUM_ELEMENTS, KERNEL, X, Y, ARG) \
|
||||
@ -257,20 +455,12 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs,
|
||||
|
||||
extern "C" {
|
||||
|
||||
void CudaEvalStencils(const float *src,
|
||||
float *dst,
|
||||
int length,
|
||||
int srcStride,
|
||||
int dstStride,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start,
|
||||
int end)
|
||||
{
|
||||
// assert(cvs and dst and sizes and offsets and indices and weights and (end>=start));
|
||||
|
||||
void CudaEvalStencils(
|
||||
const float *src, float *dst,
|
||||
int length, int srcStride, int dstStride,
|
||||
const int * sizes, const int * offsets, const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
if (length == 0 or srcStride == 0 or dstStride == 0 or (end <= start)) {
|
||||
return;
|
||||
}
|
||||
@ -301,4 +491,36 @@ void CudaEvalStencils(const float *src,
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void CudaEvalPatches(
|
||||
const float *src, float *dst,
|
||||
int length, int srcStride, int dstStride,
|
||||
int numPatchCoords, const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
// PERFORMANCE: not optimized at all
|
||||
|
||||
computePatches <<<512, 32>>>(
|
||||
src, dst, NULL, NULL, length, srcStride, dstStride, 0, 0,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
}
|
||||
|
||||
void CudaEvalPatchesWithDerivatives(
|
||||
const float *src, float *dst, float *dstDu, float *dstDv,
|
||||
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
|
||||
int numPatchCoords, const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
// PERFORMANCE: not optimized at all
|
||||
|
||||
computePatches <<<512, 32>>>(
|
||||
src, dst, dstDu, dstDv, length, srcStride, dstStride, dstDuStride, dstDvStride,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
}
|
||||
|
||||
} /* extern "C" */
|
||||
|
103
opensubdiv/osd/cudaPatchTable.cpp
Normal file
103
opensubdiv/osd/cudaPatchTable.cpp
Normal file
@ -0,0 +1,103 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#include "../osd/cudaPatchTable.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "../far/patchTable.h"
|
||||
#include "../osd/cpuPatchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
CudaPatchTable::CudaPatchTable() :
|
||||
_patchArrays(NULL), _indexBuffer(NULL), _patchParamBuffer(NULL) {
|
||||
}
|
||||
|
||||
CudaPatchTable::~CudaPatchTable() {
|
||||
if (_patchArrays) cudaFree(_patchArrays);
|
||||
if (_indexBuffer) cudaFree(_indexBuffer);
|
||||
if (_patchParamBuffer) cudaFree(_patchParamBuffer);
|
||||
}
|
||||
|
||||
CudaPatchTable *
|
||||
CudaPatchTable::Create(Far::PatchTable const *farPatchTable,
|
||||
void * /*deviceContext*/) {
|
||||
CudaPatchTable *instance = new CudaPatchTable();
|
||||
if (instance->allocate(farPatchTable)) return instance;
|
||||
delete instance;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
CudaPatchTable::allocate(Far::PatchTable const *farPatchTable) {
|
||||
CpuPatchTable patchTable(farPatchTable);
|
||||
|
||||
size_t numPatchArrays = patchTable.GetNumPatchArrays();
|
||||
size_t indexSize = patchTable.GetPatchIndexSize();
|
||||
size_t patchParamSize = patchTable.GetPatchParamSize();
|
||||
|
||||
cudaError_t err;
|
||||
err = cudaMalloc(&_patchArrays, numPatchArrays * sizeof(Osd::PatchArray));
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
err = cudaMalloc(&_indexBuffer, indexSize * sizeof(int));
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
err = cudaMalloc(&_patchParamBuffer, patchParamSize * sizeof(Osd::PatchParam));
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
// copy patch array
|
||||
err = cudaMemcpy(_patchArrays,
|
||||
patchTable.GetPatchArrayBuffer(),
|
||||
numPatchArrays * sizeof(Osd::PatchArray),
|
||||
cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
// copy index buffer
|
||||
err = cudaMemcpy(_indexBuffer,
|
||||
patchTable.GetPatchIndexBuffer(),
|
||||
indexSize * sizeof(int),
|
||||
cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
// patch param buffer
|
||||
err = cudaMemcpy(_patchParamBuffer,
|
||||
patchTable.GetPatchParamBuffer(),
|
||||
patchParamSize * sizeof(Osd::PatchParam),
|
||||
cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
} // end namespace OpenSubdiv
|
||||
|
83
opensubdiv/osd/cudaPatchTable.h
Normal file
83
opensubdiv/osd/cudaPatchTable.h
Normal file
@ -0,0 +1,83 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H
|
||||
#define OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include "../osd/nonCopyable.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Far{
|
||||
class PatchTable;
|
||||
};
|
||||
|
||||
namespace Osd {
|
||||
|
||||
/// \brief CUDA patch table
|
||||
///
|
||||
/// This class is a cuda buffer representation of Far::PatchTable.
|
||||
///
|
||||
/// CudaEvaluator consumes this table to evaluate on the patches.
|
||||
///
|
||||
///
|
||||
class CudaPatchTable : private NonCopyable<CudaPatchTable> {
|
||||
public:
|
||||
/// Creator. Returns NULL if error
|
||||
static CudaPatchTable *Create(Far::PatchTable const *patchTable,
|
||||
void *deviceContext = NULL);
|
||||
/// Destructor
|
||||
~CudaPatchTable();
|
||||
|
||||
/// Returns the cuda memory of the array of Osd::PatchArray buffer
|
||||
void *GetPatchArrayBuffer() const { return _patchArrays; }
|
||||
|
||||
/// Returns the cuda memory of the patch control vertices
|
||||
void *GetPatchIndexBuffer() const { return _indexBuffer; }
|
||||
|
||||
/// Returns the cuda memory of the array of Osd::PatchParam buffer
|
||||
void *GetPatchParamBuffer() const { return _patchParamBuffer; }
|
||||
|
||||
protected:
|
||||
CudaPatchTable();
|
||||
|
||||
bool allocate(Far::PatchTable const *patchTable);
|
||||
|
||||
void *_patchArrays;
|
||||
void *_indexBuffer;
|
||||
void *_patchParamBuffer;
|
||||
};
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
} // end namespace OpenSubdiv
|
||||
|
||||
#endif // OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H
|
@ -43,7 +43,8 @@ CudaVertexBuffer::~CudaVertexBuffer() {
|
||||
}
|
||||
|
||||
CudaVertexBuffer *
|
||||
CudaVertexBuffer::Create(int numElements, int numVertices) {
|
||||
CudaVertexBuffer::Create(int numElements, int numVertices,
|
||||
void * /*deviceContext */) {
|
||||
CudaVertexBuffer *instance =
|
||||
new CudaVertexBuffer(numElements, numVertices);
|
||||
if (instance->allocate()) return instance;
|
||||
@ -52,7 +53,8 @@ CudaVertexBuffer::Create(int numElements, int numVertices) {
|
||||
}
|
||||
|
||||
void
|
||||
CudaVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices) {
|
||||
CudaVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices,
|
||||
void * /*deviceContext*/) {
|
||||
|
||||
size_t size = _numElements * numVertices * sizeof(float);
|
||||
|
||||
|
@ -27,6 +27,8 @@
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
@ -41,14 +43,16 @@ class CudaVertexBuffer {
|
||||
|
||||
public:
|
||||
/// Creator. Returns NULL if error.
|
||||
static CudaVertexBuffer * Create(int numElements, int numVertices);
|
||||
static CudaVertexBuffer * Create(int numElements, int numVertices,
|
||||
void *deviceContext = NULL);
|
||||
|
||||
/// Destructor.
|
||||
~CudaVertexBuffer();
|
||||
|
||||
/// This method is meant to be used in client code in order to provide coarse
|
||||
/// vertices data to Osd.
|
||||
void UpdateData(const float *src, int startVertex, int numVertices);
|
||||
void UpdateData(const float *src, int startVertex, int numVertices,
|
||||
void *deviceContext=NULL);
|
||||
|
||||
/// Returns how many elements defined in this vertex buffer.
|
||||
int GetNumElements() const;
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <vector>
|
||||
#include "../far/patchDescriptor.h"
|
||||
#include "../osd/nonCopyable.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
struct ID3D11Buffer;
|
||||
struct ID3D11ShaderResourceView;
|
||||
@ -49,33 +50,6 @@ class D3D11PatchTable : private NonCopyable<D3D11PatchTable> {
|
||||
public:
|
||||
typedef ID3D11Buffer * VertexBufferBinding;
|
||||
|
||||
// XXX: this struct will be further refactored.
|
||||
class PatchArray {
|
||||
public:
|
||||
PatchArray(Far::PatchDescriptor desc, int numPatches,
|
||||
int indexBase, int primitiveIdBase) :
|
||||
desc(desc), numPatches(numPatches), indexBase(indexBase),
|
||||
primitiveIdBase(primitiveIdBase) {}
|
||||
Far::PatchDescriptor const &GetDescriptor() const {
|
||||
return desc;
|
||||
}
|
||||
int GetNumPatches() const {
|
||||
return numPatches;
|
||||
}
|
||||
int GetIndexBase() const {
|
||||
return indexBase;
|
||||
}
|
||||
int GetPrimitiveIdBase() const {
|
||||
return primitiveIdBase;
|
||||
}
|
||||
private:
|
||||
Far::PatchDescriptor desc;
|
||||
int numPatches;
|
||||
int indexBase; // an offset within the index buffer
|
||||
int primitiveIdBase; // an offset within the patch param buffer
|
||||
};
|
||||
typedef std::vector<PatchArray> PatchArrayVector;
|
||||
|
||||
D3D11PatchTable();
|
||||
~D3D11PatchTable();
|
||||
|
||||
|
@ -88,16 +88,66 @@ GLStencilTableSSBO::~GLStencilTableSSBO() {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
GLComputeEvaluator::GLComputeEvaluator() :
|
||||
_program(0), _workGroupSize(64) {
|
||||
GLComputeEvaluator::GLComputeEvaluator() : _workGroupSize(64) {
|
||||
memset (&_stencilKernel, 0, sizeof(_stencilKernel));
|
||||
memset (&_patchKernel, 0, sizeof(_patchKernel));
|
||||
}
|
||||
|
||||
GLComputeEvaluator::~GLComputeEvaluator() {
|
||||
if (_program) {
|
||||
glDeleteProgram(_program);
|
||||
if (_stencilKernel.program) {
|
||||
glDeleteProgram(_stencilKernel.program);
|
||||
}
|
||||
if (_patchKernel.program) {
|
||||
glDeleteProgram(_patchKernel.program);
|
||||
}
|
||||
}
|
||||
|
||||
static GLuint
|
||||
compileKernel(VertexBufferDescriptor const &srcDesc,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const char *kernelDefine,
|
||||
int workGroupSize) {
|
||||
GLuint program = glCreateProgram();
|
||||
|
||||
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
|
||||
|
||||
std::ostringstream defines;
|
||||
defines << "#define LENGTH " << srcDesc.length << "\n"
|
||||
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
|
||||
<< "#define DST_STRIDE " << dstDesc.stride << "\n"
|
||||
<< "#define WORK_GROUP_SIZE " << workGroupSize << "\n"
|
||||
<< kernelDefine << "\n";
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *shaderSources[3] = {"#version 430\n", 0, 0};
|
||||
shaderSources[1] = defineStr.c_str();
|
||||
shaderSources[2] = shaderSource;
|
||||
glShaderSource(shader, 3, shaderSources, NULL);
|
||||
glCompileShader(shader);
|
||||
glAttachShader(program, shader);
|
||||
|
||||
GLint linked = 0;
|
||||
glLinkProgram(program);
|
||||
glGetProgramiv(program, GL_LINK_STATUS, &linked);
|
||||
|
||||
if (linked == GL_FALSE) {
|
||||
char buffer[1024];
|
||||
glGetShaderInfoLog(shader, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glGetProgramInfoLog(program, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glDeleteProgram(program);
|
||||
return 0;
|
||||
}
|
||||
|
||||
glDeleteShader(shader);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
VertexBufferDescriptor const &dstDesc) {
|
||||
@ -108,58 +158,55 @@ GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (_program) {
|
||||
glDeleteProgram(_program);
|
||||
_program = 0;
|
||||
// create stencil kernel
|
||||
if (_stencilKernel.program) {
|
||||
glDeleteProgram(_stencilKernel.program);
|
||||
}
|
||||
_program = glCreateProgram();
|
||||
|
||||
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
|
||||
|
||||
std::ostringstream defines;
|
||||
defines << "#define LENGTH " << srcDesc.length << "\n"
|
||||
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
|
||||
<< "#define DST_STRIDE " << dstDesc.stride << "\n"
|
||||
<< "#define WORK_GROUP_SIZE " << _workGroupSize << "\n";
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *shaderSources[3] = {"#version 430\n", 0, 0};
|
||||
shaderSources[1] = defineStr.c_str();
|
||||
shaderSources[2] = shaderSource;
|
||||
glShaderSource(shader, 3, shaderSources, NULL);
|
||||
glCompileShader(shader);
|
||||
glAttachShader(_program, shader);
|
||||
|
||||
GLint linked = 0;
|
||||
glLinkProgram(_program);
|
||||
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
|
||||
|
||||
if (linked == GL_FALSE) {
|
||||
char buffer[1024];
|
||||
glGetShaderInfoLog(shader, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glGetProgramInfoLog(_program, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glDeleteProgram(_program);
|
||||
_program = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
glDeleteShader(shader);
|
||||
_stencilKernel.program = compileKernel(
|
||||
srcDesc, dstDesc,
|
||||
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS",
|
||||
_workGroupSize);
|
||||
if (_stencilKernel.program == 0) return false;
|
||||
|
||||
// store uniform locations for the compute kernel program.
|
||||
_uniformSizes = glGetUniformLocation(_program, "stencilSizes");
|
||||
_uniformOffsets = glGetUniformLocation(_program, "stencilOffsets");
|
||||
_uniformIndices = glGetUniformLocation(_program, "stencilIndices");
|
||||
_uniformWeights = glGetUniformLocation(_program, "stencilIWeights");
|
||||
_stencilKernel.uniformSizes =
|
||||
glGetUniformLocation(_stencilKernel.program, "stencilSizes");
|
||||
_stencilKernel.uniformOffsets =
|
||||
glGetUniformLocation(_stencilKernel.program, "stencilOffsets");
|
||||
_stencilKernel.uniformIndices =
|
||||
glGetUniformLocation(_stencilKernel.program, "stencilIndices");
|
||||
_stencilKernel.uniformWeights =
|
||||
glGetUniformLocation(_stencilKernel.program, "stencilIWeights");
|
||||
_stencilKernel.uniformStart =
|
||||
glGetUniformLocation(_stencilKernel.program, "batchStart");
|
||||
_stencilKernel.uniformEnd =
|
||||
glGetUniformLocation(_stencilKernel.program, "batchEnd");
|
||||
_stencilKernel.uniformSrcOffset =
|
||||
glGetUniformLocation(_stencilKernel.program, "srcOffset");
|
||||
_stencilKernel.uniformDstOffset =
|
||||
glGetUniformLocation(_stencilKernel.program, "dstOffset");
|
||||
|
||||
_uniformStart = glGetUniformLocation(_program, "batchStart");
|
||||
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
|
||||
// create patch kernel
|
||||
if (_patchKernel.program) {
|
||||
glDeleteProgram(_patchKernel.program);
|
||||
}
|
||||
_patchKernel.program = compileKernel(
|
||||
srcDesc, dstDesc,
|
||||
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES",
|
||||
_workGroupSize);
|
||||
if (_patchKernel.program == 0) return false;
|
||||
|
||||
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
|
||||
_uniformDstOffset = glGetUniformLocation(_program, "dstOffset");
|
||||
// uniform locaitons
|
||||
_patchKernel.uniformSrcOffset =
|
||||
glGetUniformLocation(_patchKernel.program, "srcOffset");
|
||||
_patchKernel.uniformDstOffset =
|
||||
glGetUniformLocation(_patchKernel.program, "dstOffset");
|
||||
_patchKernel.uniformPatchArray =
|
||||
glGetUniformLocation(_patchKernel.program, "patchArray");
|
||||
_patchKernel.uniformDuDesc =
|
||||
glGetUniformLocation(_patchKernel.program, "dstDuDesc");
|
||||
_patchKernel.uniformDvDesc =
|
||||
glGetUniformLocation(_patchKernel.program, "dstDvDesc");
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -183,7 +230,7 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
GLuint weightsBuffer,
|
||||
int start,
|
||||
int end) const {
|
||||
if (!_program) return false;
|
||||
if (!_stencilKernel.program) return false;
|
||||
int count = end - start;
|
||||
if (count <= 0) {
|
||||
return true;
|
||||
@ -196,12 +243,12 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, indicesBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, weightsBuffer);
|
||||
|
||||
glUseProgram(_program);
|
||||
glUseProgram(_stencilKernel.program);
|
||||
|
||||
glUniform1i(_uniformStart, start);
|
||||
glUniform1i(_uniformEnd, end);
|
||||
glUniform1i(_uniformSrcOffset, srcDesc.offset);
|
||||
glUniform1i(_uniformDstOffset, dstDesc.offset);
|
||||
glUniform1i(_stencilKernel.uniformStart, start);
|
||||
glUniform1i(_stencilKernel.uniformEnd, end);
|
||||
glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
|
||||
glUniform1i(_stencilKernel.uniformDstOffset, dstDesc.offset);
|
||||
|
||||
glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
|
||||
|
||||
@ -218,6 +265,52 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GLComputeEvaluator::EvalPatches(
|
||||
GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
GLuint patchCoordsBuffer,
|
||||
const PatchArrayVector &patchArrays,
|
||||
GLuint patchIndexBuffer,
|
||||
GLuint patchParamsBuffer) const {
|
||||
|
||||
if (!_patchKernel.program) return false;
|
||||
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, patchCoordsBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchIndexBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchParamsBuffer);
|
||||
|
||||
glUseProgram(_patchKernel.program);
|
||||
|
||||
glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
|
||||
glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset);
|
||||
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
|
||||
(const GLint*)&patchArrays[0]);
|
||||
glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride);
|
||||
glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride);
|
||||
|
||||
glDispatchCompute((numPatchCoords + _workGroupSize - 1) / _workGroupSize, 1, 1);
|
||||
|
||||
glUseProgram(0);
|
||||
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "../version.h"
|
||||
|
||||
#include "../osd/opengl.h"
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -92,26 +93,32 @@ public:
|
||||
/// Destructor. note that the GL context must be made current.
|
||||
~GLComputeEvaluator();
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static compute function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a
|
||||
/// const float pointer for read
|
||||
/// GL buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a
|
||||
/// float pointer for write
|
||||
/// GL buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied. The table must have
|
||||
/// SSBO interfaces.
|
||||
///
|
||||
/// @param evaluator cached compiled instance. Clients are supposed to
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
@ -119,25 +126,25 @@ public:
|
||||
///
|
||||
/// @param deviceContext not used in the GLSL kernel
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
GLComputeEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
GLComputeEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
return instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
} else {
|
||||
// Create a kernel on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
bool r = instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
delete instance;
|
||||
return r;
|
||||
@ -148,15 +155,14 @@ public:
|
||||
|
||||
/// Dispatch the GLSL compute kernel on GPU asynchronously.
|
||||
/// returns false if the kernel hasn't been compiled yet.
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
return EvalStencils(srcVertexBuffer->BindVBO(),
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
return EvalStencils(srcBuffer->BindVBO(),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindVBO(),
|
||||
dstBuffer->BindVBO(),
|
||||
dstDesc,
|
||||
stencilTable->GetSizesBuffer(),
|
||||
stencilTable->GetOffsetsBuffer(),
|
||||
@ -168,10 +174,8 @@ public:
|
||||
|
||||
/// Dispatch the GLSL compute kernel on GPU asynchronously.
|
||||
/// returns false if the kernel hasn't been compiled yet.
|
||||
bool EvalStencils(GLuint srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
bool EvalStencils(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint sizesBuffer,
|
||||
GLuint offsetsBuffer,
|
||||
GLuint indicesBuffer,
|
||||
@ -179,6 +183,271 @@ public:
|
||||
int start,
|
||||
int end) const;
|
||||
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindVBO() method returning an
|
||||
/// array of PatchCoord struct in VBO.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext not used in the GLXFB evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
GLComputeEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer
|
||||
///
|
||||
/// @param duDesc
|
||||
///
|
||||
/// @param dvBuffer
|
||||
///
|
||||
/// @param dvDesc
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindVBO() method returning an
|
||||
/// array of PatchCoord struct in VBO.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext not used in the GLXFB evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
GLComputeEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBOBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindVBO() method returning an
|
||||
/// array of PatchCoord struct in VBO.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
|
||||
dstBuffer->BindVBO(), dstDesc,
|
||||
0, VertexBufferDescriptor(),
|
||||
0, VertexBufferDescriptor(),
|
||||
numPatchCoords,
|
||||
patchCoords->BindVBO(),
|
||||
patchTable->GetPatchArrays(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindVBO() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindVBO() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
|
||||
dstBuffer->BindVBO(), dstDesc,
|
||||
duBuffer->BindVBO(), duDesc,
|
||||
dvBuffer->BindVBO(), dvDesc,
|
||||
numPatchCoords,
|
||||
patchCoords->BindVBO(),
|
||||
patchTable->GetPatchArrays(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
bool EvalPatches(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
GLuint patchCoordsBuffer,
|
||||
const PatchArrayVector &patchArrays,
|
||||
GLuint patchIndexBuffer,
|
||||
GLuint patchParamsBuffer) const;
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// Configure GLSL kernel. A valid GL context must be made current before
|
||||
/// calling this function. Returns false if it fails to compile the kernel.
|
||||
bool Compile(VertexBufferDescriptor const &srcDesc,
|
||||
@ -188,18 +457,27 @@ public:
|
||||
static void Synchronize(void *deviceContext);
|
||||
|
||||
private:
|
||||
GLuint _program;
|
||||
struct _StencilKernel {
|
||||
GLuint program;
|
||||
GLuint uniformSizes;
|
||||
GLuint uniformOffsets;
|
||||
GLuint uniformIndices;
|
||||
GLuint uniformWeights;
|
||||
GLuint uniformStart;
|
||||
GLuint uniformEnd;
|
||||
GLuint uniformSrcOffset;
|
||||
GLuint uniformDstOffset;
|
||||
} _stencilKernel;
|
||||
|
||||
GLuint _uniformSizes, // stencil table
|
||||
_uniformOffsets,
|
||||
_uniformIndices,
|
||||
_uniformWeights,
|
||||
struct _PatchKernel {
|
||||
GLuint program;
|
||||
GLuint uniformSrcOffset;
|
||||
GLuint uniformDstOffset;
|
||||
GLuint uniformPatchArray;
|
||||
GLuint uniformDuDesc;
|
||||
GLuint uniformDvDesc;
|
||||
|
||||
_uniformStart, // range
|
||||
_uniformEnd,
|
||||
|
||||
_uniformSrcOffset, // src buffer offset (in elements)
|
||||
_uniformDstOffset; // dst buffer offset (in elements)
|
||||
} _patchKernel;
|
||||
|
||||
int _workGroupSize;
|
||||
};
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
#include "../far/patchTable.h"
|
||||
#include "../osd/opengl.h"
|
||||
#include "../osd/cpuPatchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
@ -33,11 +34,14 @@ namespace OPENSUBDIV_VERSION {
|
||||
namespace Osd {
|
||||
|
||||
GLPatchTable::GLPatchTable() :
|
||||
_indexBuffer(0), _patchParamTexture(0) {
|
||||
_patchIndexBuffer(0), _patchParamBuffer(0),
|
||||
_patchIndexTexture(0), _patchParamTexture(0) {
|
||||
}
|
||||
|
||||
GLPatchTable::~GLPatchTable() {
|
||||
if (_indexBuffer) glDeleteBuffers(1, &_indexBuffer);
|
||||
if (_patchIndexBuffer) glDeleteBuffers(1, &_patchIndexBuffer);
|
||||
if (_patchParamBuffer) glDeleteBuffers(1, &_patchParamBuffer);
|
||||
if (_patchIndexTexture) glDeleteTextures(1, &_patchIndexTexture);
|
||||
if (_patchParamTexture) glDeleteTextures(1, &_patchParamTexture);
|
||||
}
|
||||
|
||||
@ -52,84 +56,56 @@ GLPatchTable::Create(Far::PatchTable const *farPatchTable,
|
||||
|
||||
bool
|
||||
GLPatchTable::allocate(Far::PatchTable const *farPatchTable) {
|
||||
glGenBuffers(1, &_indexBuffer);
|
||||
glGenBuffers(1, &_patchIndexBuffer);
|
||||
glGenBuffers(1, &_patchParamBuffer);
|
||||
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _indexBuffer);
|
||||
std::vector<int> buffer;
|
||||
std::vector<unsigned int> ppBuffer;
|
||||
CpuPatchTable patchTable(farPatchTable);
|
||||
|
||||
// needs reserve?
|
||||
size_t numPatchArrays = patchTable.GetNumPatchArrays();
|
||||
GLsizei indexSize = (GLsizei)patchTable.GetPatchIndexSize();
|
||||
GLsizei patchParamSize = (GLsizei)patchTable.GetPatchParamSize();
|
||||
|
||||
int nPatchArrays = farPatchTable->GetNumPatchArrays();
|
||||
|
||||
// for each patchArray
|
||||
for (int j = 0; j < nPatchArrays; ++j) {
|
||||
PatchArray patchArray(farPatchTable->GetPatchArrayDescriptor(j),
|
||||
farPatchTable->GetNumPatches(j),
|
||||
(int)buffer.size(),
|
||||
(int)ppBuffer.size()/3);
|
||||
_patchArrays.push_back(patchArray);
|
||||
|
||||
// indices
|
||||
Far::ConstIndexArray indices = farPatchTable->GetPatchArrayVertices(j);
|
||||
for (int k = 0; k < indices.size(); ++k) {
|
||||
buffer.push_back(indices[k]);
|
||||
}
|
||||
|
||||
// patchParams
|
||||
#if 0
|
||||
// XXX: we need sharpness interface for patcharray or put sharpness
|
||||
// into patchParam.
|
||||
Far::ConstPatchParamArray patchParams =
|
||||
farPatchTable->GetPatchParams(j);
|
||||
for (int k = 0; k < patchParams.size(); ++k) {
|
||||
float sharpness = 0.0;
|
||||
ppBuffer.push_back(patchParams[k].faceIndex);
|
||||
ppBuffer.push_back(patchParams[k].bitField.field);
|
||||
ppBuffer.push_back(*((unsigned int *)&sharpness));
|
||||
}
|
||||
#else
|
||||
// XXX: workaround. GetPatchParamTable() will be deprecated though.
|
||||
Far::PatchParamTable const & patchParamTable =
|
||||
farPatchTable->GetPatchParamTable();
|
||||
std::vector<Far::Index> const &sharpnessIndexTable =
|
||||
farPatchTable->GetSharpnessIndexTable();
|
||||
int numPatches = farPatchTable->GetNumPatches(j);
|
||||
for (int k = 0; k < numPatches; ++k) {
|
||||
float sharpness = 0.0;
|
||||
int patchIndex = (int)ppBuffer.size()/3;
|
||||
if (patchIndex < (int)sharpnessIndexTable.size()) {
|
||||
int sharpnessIndex = sharpnessIndexTable[patchIndex];
|
||||
if (sharpnessIndex >= 0)
|
||||
sharpness = farPatchTable->GetSharpnessValues()[sharpnessIndex];
|
||||
}
|
||||
ppBuffer.push_back(patchParamTable[patchIndex].faceIndex);
|
||||
ppBuffer.push_back(patchParamTable[patchIndex].bitField.field);
|
||||
ppBuffer.push_back(*((unsigned int *)&sharpness));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
// copy patch array
|
||||
_patchArrays.insert(_patchArrays.end(),
|
||||
patchTable.GetPatchArrayBuffer(),
|
||||
patchTable.GetPatchArrayBuffer() + numPatchArrays);
|
||||
|
||||
// copy index buffer
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _patchIndexBuffer);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER,
|
||||
(int)buffer.size()*sizeof(int), &buffer[0], GL_STATIC_DRAW);
|
||||
|
||||
// patchParam is currently expected to be texture (it can be SSBO)
|
||||
GLuint texBuffer = 0;
|
||||
glGenBuffers(1, &texBuffer);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, texBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, ppBuffer.size()*sizeof(unsigned int),
|
||||
&ppBuffer[0], GL_STATIC_DRAW);
|
||||
|
||||
glGenTextures(1, &_patchParamTexture);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _patchParamTexture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGB32I, texBuffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
|
||||
glDeleteBuffers(1, &texBuffer);
|
||||
|
||||
indexSize * sizeof(GLint),
|
||||
patchTable.GetPatchIndexBuffer(),
|
||||
GL_STATIC_DRAW);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
|
||||
// copy patchparam buffer
|
||||
glBindBuffer(GL_ARRAY_BUFFER, _patchParamBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER,
|
||||
patchParamSize * sizeof(PatchParam),
|
||||
patchTable.GetPatchParamBuffer(),
|
||||
GL_STATIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
|
||||
// make both buffer as texture buffers too.
|
||||
glGenTextures(1, &_patchIndexTexture);
|
||||
glGenTextures(1, &_patchParamTexture);
|
||||
|
||||
GLuint buffer;
|
||||
glGenBuffers(1, &buffer);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, buffer);
|
||||
glBufferData(GL_ARRAY_BUFFER,
|
||||
indexSize * sizeof(GLint),
|
||||
patchTable.GetPatchIndexBuffer(),
|
||||
GL_STATIC_DRAW);
|
||||
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _patchIndexTexture);
|
||||
// glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, _patchIndexBuffer);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, buffer);
|
||||
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _patchParamTexture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGB32I, _patchParamBuffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -27,10 +27,9 @@
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include <vector>
|
||||
#include "../far/patchDescriptor.h"
|
||||
#include "../osd/nonCopyable.h"
|
||||
#include "../osd/opengl.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
@ -45,34 +44,6 @@ class GLPatchTable : private NonCopyable<GLPatchTable> {
|
||||
public:
|
||||
typedef GLuint VertexBufferBinding;
|
||||
|
||||
// XXX: this struct will be further refactored.
|
||||
class PatchArray {
|
||||
public:
|
||||
PatchArray(Far::PatchDescriptor desc, int numPatches,
|
||||
int indexBase, int primitiveIdBase) :
|
||||
desc(desc), numPatches(numPatches), indexBase(indexBase),
|
||||
primitiveIdBase(primitiveIdBase) {}
|
||||
Far::PatchDescriptor const &GetDescriptor() const {
|
||||
return desc;
|
||||
}
|
||||
int GetNumPatches() const {
|
||||
return numPatches;
|
||||
}
|
||||
int GetIndexBase() const {
|
||||
return indexBase;
|
||||
}
|
||||
int GetPrimitiveIdBase() const {
|
||||
return primitiveIdBase;
|
||||
}
|
||||
private:
|
||||
Far::PatchDescriptor desc;
|
||||
int numPatches;
|
||||
int indexBase; // an offset within the index buffer
|
||||
int primitiveIdBase; // an offset within the patch param buffer
|
||||
};
|
||||
typedef std::vector<PatchArray> PatchArrayVector;
|
||||
|
||||
GLPatchTable();
|
||||
~GLPatchTable();
|
||||
|
||||
static GLPatchTable *Create(Far::PatchTable const *farPatchTable,
|
||||
@ -84,7 +55,17 @@ public:
|
||||
|
||||
/// Returns the GL index buffer containing the patch control vertices
|
||||
GLuint GetPatchIndexBuffer() const {
|
||||
return _indexBuffer;
|
||||
return _patchIndexBuffer;
|
||||
}
|
||||
|
||||
/// Returns the GL index buffer containing the patch parameter
|
||||
GLuint GetPatchParamBuffer() const {
|
||||
return _patchParamBuffer;
|
||||
}
|
||||
|
||||
/// Returns the GL texture buffer containing the patch control vertices
|
||||
GLuint GetPatchIndexTextureBuffer() const {
|
||||
return _patchIndexTexture;
|
||||
}
|
||||
|
||||
/// Returns the GL texture buffer containing the patch parameter
|
||||
@ -93,11 +74,17 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
GLPatchTable();
|
||||
|
||||
// allocate buffers from patchTable
|
||||
bool allocate(Far::PatchTable const *farPatchTable);
|
||||
|
||||
PatchArrayVector _patchArrays;
|
||||
GLuint _indexBuffer;
|
||||
|
||||
GLuint _patchIndexBuffer;
|
||||
GLuint _patchParamBuffer;
|
||||
|
||||
GLuint _patchIndexTexture;
|
||||
GLuint _patchParamTexture;
|
||||
};
|
||||
|
||||
|
@ -108,37 +108,36 @@ GLStencilTableTBO::~GLStencilTableTBO() {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
GLXFBEvaluator::GLXFBEvaluator() :
|
||||
_program(0), _srcBufferTexture(0),
|
||||
_uniformSrcBufferTexture(0), _uniformSizesTexture(0),
|
||||
_uniformOffsetsTexture(0), _uniformIndicesTexture(0),
|
||||
_uniformWeightsTexture(0), _uniformStart(0), _uniformEnd(0),
|
||||
_uniformSrcOffset(0) {
|
||||
GLXFBEvaluator::GLXFBEvaluator() : _srcBufferTexture(0) {
|
||||
memset (&_stencilKernel, 0, sizeof(_stencilKernel));
|
||||
memset (&_patchKernel, 0, sizeof(_patchKernel));
|
||||
}
|
||||
|
||||
GLXFBEvaluator::~GLXFBEvaluator() {
|
||||
if (_program) {
|
||||
glDeleteProgram(_program);
|
||||
if (_stencilKernel.program) {
|
||||
glDeleteProgram(_stencilKernel.program);
|
||||
}
|
||||
if (_patchKernel.program) {
|
||||
glDeleteProgram(_patchKernel.program);
|
||||
}
|
||||
if (_srcBufferTexture) {
|
||||
glDeleteTextures(1, &_srcBufferTexture);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
VertexBufferDescriptor const &dstDesc) {
|
||||
if (_program) {
|
||||
glDeleteProgram(_program);
|
||||
_program = 0;
|
||||
}
|
||||
_program = glCreateProgram();
|
||||
static GLuint
|
||||
compileKernel(VertexBufferDescriptor const &srcDesc,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const char *kernelDefine) {
|
||||
|
||||
GLuint program = glCreateProgram();
|
||||
|
||||
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
|
||||
|
||||
std::ostringstream defines;
|
||||
defines << "#define LENGTH " << srcDesc.length << "\n"
|
||||
<< "#define SRC_STRIDE " << srcDesc.stride << "\n";
|
||||
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
|
||||
<< kernelDefine << "\n";
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *shaderSources[3] = {"#version 410\n", NULL, NULL};
|
||||
@ -147,7 +146,7 @@ GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
shaderSources[2] = shaderSource;
|
||||
glShaderSource(shader, 3, shaderSources, NULL);
|
||||
glCompileShader(shader);
|
||||
glAttachShader(_program, shader);
|
||||
glAttachShader(program, shader);
|
||||
|
||||
std::vector<std::string> outputs;
|
||||
std::vector<const char *> pOutputs;
|
||||
@ -183,40 +182,83 @@ GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
}
|
||||
}
|
||||
|
||||
glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(),
|
||||
glTransformFeedbackVaryings(program, (GLsizei)outputs.size(),
|
||||
&pOutputs[0], GL_INTERLEAVED_ATTRIBS);
|
||||
|
||||
GLint linked = 0;
|
||||
glLinkProgram(_program);
|
||||
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
|
||||
glLinkProgram(program);
|
||||
glGetProgramiv(program, GL_LINK_STATUS, &linked);
|
||||
|
||||
if (linked == GL_FALSE) {
|
||||
char buffer[1024];
|
||||
glGetShaderInfoLog(shader, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glGetProgramInfoLog(_program, 1024, NULL, buffer);
|
||||
glGetProgramInfoLog(program, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glDeleteProgram(_program);
|
||||
_program = 0;
|
||||
return false;
|
||||
glDeleteProgram(program);
|
||||
program = 0;
|
||||
}
|
||||
|
||||
glDeleteShader(shader);
|
||||
|
||||
// set uniform locations for compute kernels
|
||||
_uniformSrcBufferTexture = glGetUniformLocation(_program, "vertexBuffer");
|
||||
return program;
|
||||
}
|
||||
|
||||
_uniformSizesTexture = glGetUniformLocation(_program, "sizes");
|
||||
_uniformOffsetsTexture = glGetUniformLocation(_program, "offsets");
|
||||
_uniformIndicesTexture = glGetUniformLocation(_program, "indices");
|
||||
_uniformWeightsTexture = glGetUniformLocation(_program, "weights");
|
||||
bool
|
||||
GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
VertexBufferDescriptor const &dstDesc) {
|
||||
|
||||
_uniformStart = glGetUniformLocation(_program, "batchStart");
|
||||
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
|
||||
// create stencil kernel
|
||||
if (_stencilKernel.program) {
|
||||
glDeleteProgram(_stencilKernel.program);
|
||||
}
|
||||
_stencilKernel.program = compileKernel(
|
||||
srcDesc, dstDesc,
|
||||
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS");
|
||||
if (_stencilKernel.program == 0) return false;
|
||||
|
||||
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
|
||||
// cache uniform locations
|
||||
_stencilKernel.uniformSrcBufferTexture
|
||||
= glGetUniformLocation(_stencilKernel.program, "vertexBuffer");
|
||||
_stencilKernel.uniformSrcOffset
|
||||
= glGetUniformLocation(_stencilKernel.program, "srcOffset");
|
||||
|
||||
_stencilKernel.uniformSizesTexture
|
||||
= glGetUniformLocation(_stencilKernel.program, "sizes");
|
||||
_stencilKernel.uniformOffsetsTexture
|
||||
= glGetUniformLocation(_stencilKernel.program, "offsets");
|
||||
_stencilKernel.uniformIndicesTexture
|
||||
= glGetUniformLocation(_stencilKernel.program, "indices");
|
||||
_stencilKernel.uniformWeightsTexture
|
||||
= glGetUniformLocation(_stencilKernel.program, "weights");
|
||||
_stencilKernel.uniformStart
|
||||
= glGetUniformLocation(_stencilKernel.program, "batchStart");
|
||||
_stencilKernel.uniformEnd
|
||||
= glGetUniformLocation(_stencilKernel.program, "batchEnd");
|
||||
|
||||
// create patch kernel
|
||||
if (_patchKernel.program) {
|
||||
glDeleteProgram(_patchKernel.program);
|
||||
}
|
||||
_patchKernel.program = compileKernel(
|
||||
srcDesc, dstDesc,
|
||||
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES");
|
||||
if (_patchKernel.program == 0) return false;
|
||||
|
||||
// cache uniform locations
|
||||
_patchKernel.uniformSrcBufferTexture
|
||||
= glGetUniformLocation(_patchKernel.program, "vertexBuffer");
|
||||
_patchKernel.uniformSrcOffset
|
||||
= glGetUniformLocation(_patchKernel.program, "srcOffset");
|
||||
|
||||
_patchKernel.uniformPatchArray
|
||||
= glGetUniformLocation(_patchKernel.program, "patchArray");
|
||||
_patchKernel.uniformPatchParamTexture
|
||||
= glGetUniformLocation(_patchKernel.program, "patchParamBuffer");
|
||||
_patchKernel.uniformPatchIndexTexture
|
||||
= glGetUniformLocation(_patchKernel.program, "patchIndexBuffer");
|
||||
|
||||
// create a texture for input buffer
|
||||
if (!_srcBufferTexture) {
|
||||
@ -255,7 +297,7 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
GLuint weightsTexture,
|
||||
int start,
|
||||
int end) const {
|
||||
if (!_program) return false;
|
||||
if (!_stencilKernel.program) return false;
|
||||
int count = end - start;
|
||||
if (count <= 0) {
|
||||
return true;
|
||||
@ -268,25 +310,25 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
glBindVertexArray(vao);
|
||||
|
||||
glEnable(GL_RASTERIZER_DISCARD);
|
||||
glUseProgram(_program);
|
||||
glUseProgram(_stencilKernel.program);
|
||||
|
||||
// Set input VBO as a texture buffer.
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
|
||||
bindTexture(_uniformSrcBufferTexture, _srcBufferTexture, 0);
|
||||
bindTexture(_stencilKernel.uniformSrcBufferTexture, _srcBufferTexture, 0);
|
||||
|
||||
// bind stencil table textures.
|
||||
bindTexture(_uniformSizesTexture, sizesTexture, 1);
|
||||
bindTexture(_uniformOffsetsTexture, offsetsTexture, 2);
|
||||
bindTexture(_uniformIndicesTexture, indicesTexture, 3);
|
||||
bindTexture(_uniformWeightsTexture, weightsTexture, 4);
|
||||
bindTexture(_stencilKernel.uniformSizesTexture, sizesTexture, 1);
|
||||
bindTexture(_stencilKernel.uniformOffsetsTexture, offsetsTexture, 2);
|
||||
bindTexture(_stencilKernel.uniformIndicesTexture, indicesTexture, 3);
|
||||
bindTexture(_stencilKernel.uniformWeightsTexture, weightsTexture, 4);
|
||||
|
||||
// set batch range
|
||||
glUniform1i(_uniformStart, start);
|
||||
glUniform1i(_uniformEnd, end);
|
||||
glUniform1i(_uniformSrcOffset, srcDesc.offset);
|
||||
glUniform1i(_stencilKernel.uniformStart, start);
|
||||
glUniform1i(_stencilKernel.uniformEnd, end);
|
||||
glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
|
||||
|
||||
// The destination buffer is bound at vertex boundary.
|
||||
//
|
||||
@ -347,6 +389,94 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GLXFBEvaluator::EvalPatches(
|
||||
GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, VertexBufferDescriptor const & /*duDesc*/,
|
||||
GLuint dvBuffer, VertexBufferDescriptor const & /*dvDesc*/,
|
||||
int numPatchCoords,
|
||||
GLuint patchCoordsBuffer,
|
||||
const PatchArrayVector &patchArrays,
|
||||
GLuint patchIndexTexture,
|
||||
GLuint patchParamTexture) const {
|
||||
if (!_patchKernel.program) return false;
|
||||
|
||||
if (duBuffer != 0 || dvBuffer != 0) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR,
|
||||
"GLXFBEvaluator doesn't support derivative evaluation yet.\n");
|
||||
}
|
||||
|
||||
// bind vertex array
|
||||
// always create new one, to be safe with multiple contexts (slow though)
|
||||
GLuint vao = 0;
|
||||
glGenVertexArrays(1, &vao);
|
||||
glBindVertexArray(vao);
|
||||
|
||||
glEnable(GL_RASTERIZER_DISCARD);
|
||||
glUseProgram(_patchKernel.program);
|
||||
|
||||
// Set input VBO as a texture buffer.
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
|
||||
bindTexture(_patchKernel.uniformSrcBufferTexture, _srcBufferTexture, 0);
|
||||
|
||||
// bind patch index and patch param textures.
|
||||
bindTexture(_patchKernel.uniformPatchParamTexture, patchParamTexture, 1);
|
||||
bindTexture(_patchKernel.uniformPatchIndexTexture, patchIndexTexture, 2);
|
||||
|
||||
// set other uniforms
|
||||
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
|
||||
(const GLint*)&patchArrays[0]);
|
||||
glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
|
||||
|
||||
// input patchcoords
|
||||
glEnableVertexAttribArray(0);
|
||||
glEnableVertexAttribArray(1);
|
||||
int stride = sizeof(int) * 5; // patchcoord = int*5 struct
|
||||
glBindBuffer(GL_ARRAY_BUFFER, patchCoordsBuffer);
|
||||
glVertexAttribIPointer(0, 3, GL_UNSIGNED_INT, stride, (void*)0);
|
||||
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, stride, (void*)(sizeof(int)*3));
|
||||
|
||||
int dstBufferBindOffset =
|
||||
dstDesc.offset - (dstDesc.offset % dstDesc.stride);
|
||||
|
||||
// bind destination buffer
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
0, dstBuffer,
|
||||
dstBufferBindOffset * sizeof(float),
|
||||
numPatchCoords * dstDesc.stride * sizeof(float));
|
||||
|
||||
glBeginTransformFeedback(GL_POINTS);
|
||||
glDrawArrays(GL_POINTS, 0, numPatchCoords);
|
||||
glEndTransformFeedback();
|
||||
|
||||
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
|
||||
|
||||
// unbind textures
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
glActiveTexture(GL_TEXTURE0 + i);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
}
|
||||
|
||||
glDisable(GL_RASTERIZER_DISCARD);
|
||||
glUseProgram(0);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
|
||||
glDisableVertexAttribArray(0);
|
||||
glDisableVertexAttribArray(1);
|
||||
|
||||
// revert vao
|
||||
glBindVertexArray(0);
|
||||
glDeleteVertexArrays(1, &vao);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "../version.h"
|
||||
|
||||
#include "../osd/opengl.h"
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -93,6 +94,12 @@ public:
|
||||
/// Destructor. note that the GL context must be made current.
|
||||
~GLXFBEvaluator();
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static stencil function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
@ -120,25 +127,25 @@ public:
|
||||
///
|
||||
/// @param deviceContext not used in the GLSLTransformFeedback kernel
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
GLXFBEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
GLXFBEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
return instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
bool r = instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
delete instance;
|
||||
return r;
|
||||
@ -147,18 +154,30 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Dispatch the GLSL compute kernel on GPU asynchronously.
|
||||
/// returns false if the kernel hasn't been compiled yet.
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
return EvalStencils(srcVertexBuffer->BindVBO(),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindVBO(),
|
||||
dstDesc,
|
||||
/// \brief dispatch eval stencils function.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object for destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
|
||||
return EvalStencils(srcBuffer->BindVBO(), srcDesc,
|
||||
dstBuffer->BindVBO(), dstDesc,
|
||||
stencilTable->GetSizesTexture(),
|
||||
stencilTable->GetOffsetsTexture(),
|
||||
stencilTable->GetIndicesTexture(),
|
||||
@ -167,12 +186,31 @@ public:
|
||||
/* end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// Dispatch the GLSL compute kernel on GPU asynchronously.
|
||||
/// returns false if the kernel hasn't been compiled yet.
|
||||
bool EvalStencils(GLuint srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
/// \brief Static eval stencils function, dispatch the GLSL XFB kernel on
|
||||
/// on GPU asynchronously.
|
||||
///
|
||||
/// @param srcBuffer GL buffer of input primvars.
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer GL buffer of output primvars.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizesBuffer GL buffer of the sizes in the stencil table
|
||||
///
|
||||
/// @param offsetsBuffer GL buffer of the offsets in the stencil table
|
||||
///
|
||||
/// @param indicesBuffer GL buffer of the indices in the stencil table
|
||||
///
|
||||
/// @param weightsBuffer GL buffer of the weifgrs in the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
bool EvalStencils(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint sizesBuffer,
|
||||
GLuint offsetsBuffer,
|
||||
GLuint indicesBuffer,
|
||||
@ -180,6 +218,270 @@ public:
|
||||
int start,
|
||||
int end) const;
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindVBO() method returning an
|
||||
/// array of PatchCoord struct in VBO.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext not used in the GLXFB evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
GLXFBEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer
|
||||
///
|
||||
/// @param duDesc
|
||||
///
|
||||
/// @param dvBuffer
|
||||
///
|
||||
/// @param dvDesc
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindVBO() method returning an
|
||||
/// array of PatchCoord struct in VBO.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext not used in the GLXFB evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
GLXFBEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCudaBuffer() method returning an
|
||||
/// array of PatchCoord struct in cuda memory.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
|
||||
dstBuffer->BindVBO(), dstDesc,
|
||||
0, VertexBufferDescriptor(),
|
||||
0, VertexBufferDescriptor(),
|
||||
numPatchCoords,
|
||||
patchCoords->BindVBO(),
|
||||
patchTable->GetPatchArrays(),
|
||||
patchTable->GetPatchIndexTextureBuffer(),
|
||||
patchTable->GetPatchParamTextureBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output s-derivatives buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output t-derivatives buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
|
||||
dstBuffer->BindVBO(), dstDesc,
|
||||
duBuffer->BindVBO(), duDesc,
|
||||
dvBuffer->BindVBO(), dvDesc,
|
||||
numPatchCoords,
|
||||
patchCoords->BindVBO(),
|
||||
patchTable->GetPatchArrays(),
|
||||
patchTable->GetPatchIndexTextureBuffer(),
|
||||
patchTable->GetPatchParamTextureBuffer());
|
||||
}
|
||||
|
||||
bool EvalPatches(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
GLuint patchCoordsBuffer,
|
||||
const PatchArrayVector &patchArrays,
|
||||
GLuint patchIndexBuffer,
|
||||
GLuint patchParamsBuffer) const;
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// Configure GLSL kernel. A valid GL context must be made current before
|
||||
/// calling this function. Returns false if it fails to compile the kernel.
|
||||
bool Compile(VertexBufferDescriptor const &srcDesc,
|
||||
@ -189,19 +491,31 @@ public:
|
||||
static void Synchronize(void *kernel);
|
||||
|
||||
private:
|
||||
GLuint _program;
|
||||
|
||||
GLuint _srcBufferTexture;
|
||||
|
||||
GLuint _uniformSrcBufferTexture;
|
||||
GLuint _uniformSizesTexture;
|
||||
GLuint _uniformOffsetsTexture;
|
||||
GLuint _uniformIndicesTexture;
|
||||
GLuint _uniformWeightsTexture;
|
||||
struct _StencilKernel {
|
||||
GLuint program;
|
||||
GLuint uniformSrcBufferTexture;
|
||||
GLuint uniformSrcOffset; // src buffer offset (in elements)
|
||||
|
||||
GLuint uniformSizesTexture;
|
||||
GLuint uniformOffsetsTexture;
|
||||
GLuint uniformIndicesTexture;
|
||||
GLuint uniformWeightsTexture;
|
||||
GLuint uniformStart; // range
|
||||
GLuint uniformEnd;
|
||||
} _stencilKernel;
|
||||
|
||||
struct _PatchKernel {
|
||||
GLuint program;
|
||||
GLuint uniformSrcBufferTexture;
|
||||
GLuint uniformSrcOffset; // src buffer offset (in elements)
|
||||
|
||||
GLuint uniformPatchArray;
|
||||
GLuint uniformPatchParamTexture;
|
||||
GLuint uniformPatchIndexTexture;
|
||||
} _patchKernel;
|
||||
|
||||
GLuint _uniformStart; // range
|
||||
GLuint _uniformEnd;
|
||||
GLuint _uniformSrcOffset; // src buffer offset (in elements)
|
||||
};
|
||||
|
||||
} // end namespace Osd
|
||||
|
@ -24,18 +24,10 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
uniform int batchStart = 0;
|
||||
uniform int batchEnd = 0;
|
||||
uniform int srcOffset = 0;
|
||||
uniform int dstOffset = 0;
|
||||
|
||||
layout(binding=0) buffer src_buffer { float srcVertexBuffer[]; };
|
||||
layout(binding=1) buffer dst_buffer { float dstVertexBuffer[]; };
|
||||
layout(binding=2) buffer stencilSizes { int _sizes[]; };
|
||||
layout(binding=3) buffer stencilOffsets { int _offsets[]; };
|
||||
layout(binding=4) buffer stencilIndices { int _indices[]; };
|
||||
layout(binding=5) buffer stencilWeights { float _weights[]; };
|
||||
|
||||
layout(local_size_x=WORK_GROUP_SIZE, local_size_y=1, local_size_z=1) in;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -73,6 +65,15 @@ void addWithWeight(inout Vertex v, const Vertex src, float weight) {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
|
||||
|
||||
uniform int batchStart = 0;
|
||||
uniform int batchEnd = 0;
|
||||
layout(binding=2) buffer stencilSizes { int _sizes[]; };
|
||||
layout(binding=3) buffer stencilOffsets { int _offsets[]; };
|
||||
layout(binding=4) buffer stencilIndices { int _indices[]; };
|
||||
layout(binding=5) buffer stencilWeights { float _weights[]; };
|
||||
|
||||
void main() {
|
||||
|
||||
int current = int(gl_GlobalInvocationID.x) + batchStart;
|
||||
@ -94,4 +95,194 @@ void main() {
|
||||
writeVertex(current, dst);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES)
|
||||
|
||||
// PERFORMANCE: stride could be constant, but not as significant as length
|
||||
|
||||
//struct PatchArray {
|
||||
// int patchType;
|
||||
// int numPatches;
|
||||
// int indexBase; // an offset within the index buffer
|
||||
// int primitiveIdBase; // an offset within the patch param buffer
|
||||
//};
|
||||
// # of patcharrays is 1 or 2.
|
||||
|
||||
uniform ivec4 patchArray[2];
|
||||
uniform ivec3 dstDuDesc;
|
||||
uniform ivec3 dstDvDesc;
|
||||
layout(binding=2) buffer du_buffer { float dstDuBuffer[]; };
|
||||
layout(binding=3) buffer dv_buffer { float dstDvBuffer[]; };
|
||||
|
||||
struct PatchCoord {
|
||||
int arrayIndex;
|
||||
int patchIndex;
|
||||
int vertIndex;
|
||||
float s;
|
||||
float t;
|
||||
};
|
||||
|
||||
struct PatchParam {
|
||||
int faceIndex;
|
||||
uint patchBits;
|
||||
float sharpness;
|
||||
};
|
||||
|
||||
layout(binding=4) buffer patchCoord_buffer { PatchCoord patchCoords[]; };
|
||||
layout(binding=5) buffer patchIndex_buffer { int patchIndexBuffer[]; };
|
||||
layout(binding=6) buffer patchParam_buffer { PatchParam patchParamBuffer[]; };
|
||||
|
||||
void writeDu(int index, Vertex du) {
|
||||
int duIndex = dstDuDesc.x + index * dstDuDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
dstDuBuffer[duIndex + i] = du.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDv(int index, Vertex dv) {
|
||||
int dvIndex = dstDvDesc.x + index * dstDvDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
dstDvBuffer[dvIndex + i] = dv.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void getBSplineWeights(float t, inout vec4 point, inout vec4 deriv) {
|
||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
||||
float one6th = 1.0f / 6.0f;
|
||||
|
||||
float t2 = t * t;
|
||||
float t3 = t * t2;
|
||||
|
||||
point.x = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
||||
point.y = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
||||
point.z = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
||||
point.w = one6th * ( t3);
|
||||
|
||||
// Derivatives of the above four basis functions at t:
|
||||
deriv.x = -0.5f*t2 + t - 0.5f;
|
||||
deriv.y = 1.5f*t2 - 2.0f*t;
|
||||
deriv.z = -1.5f*t2 + t + 0.5f;
|
||||
deriv.w = 0.5f*t2;
|
||||
}
|
||||
|
||||
uint getDepth(uint patchBits) {
|
||||
return (patchBits & 0x7);
|
||||
}
|
||||
|
||||
float getParamFraction(uint patchBits) {
|
||||
uint nonQuadRoot = (patchBits >> 3) & 0x1;
|
||||
uint depth = getDepth(patchBits);
|
||||
if (nonQuadRoot == 1) {
|
||||
return 1.0f / float( 1 << (depth-1) );
|
||||
} else {
|
||||
return 1.0f / float( 1 << depth );
|
||||
}
|
||||
}
|
||||
|
||||
vec2 normalizePatchCoord(uint patchBits, vec2 uv) {
|
||||
float frac = getParamFraction(patchBits);
|
||||
|
||||
uint iu = (patchBits >> 22) & 0x3ff;
|
||||
uint iv = (patchBits >> 12) & 0x3ff;
|
||||
|
||||
// top left corner
|
||||
float pu = float(iu*frac);
|
||||
float pv = float(iv*frac);
|
||||
|
||||
// normalize u,v coordinates
|
||||
return vec2((uv.x - pu) / frac, (uv.y - pv) / frac);
|
||||
}
|
||||
|
||||
void adjustBoundaryWeights(uint bits, inout vec4 sWeights, inout vec4 tWeights) {
|
||||
uint boundary = ((bits >> 4) & 0xf);
|
||||
|
||||
if ((boundary & 1) != 0) {
|
||||
tWeights[2] -= tWeights[0];
|
||||
tWeights[1] += 2*tWeights[0];
|
||||
tWeights[0] = 0;
|
||||
}
|
||||
if ((boundary & 2) != 0) {
|
||||
sWeights[1] -= sWeights[3];
|
||||
sWeights[2] += 2*sWeights[3];
|
||||
sWeights[3] = 0;
|
||||
}
|
||||
if ((boundary & 4) != 0) {
|
||||
tWeights[1] -= tWeights[3];
|
||||
tWeights[2] += 2*tWeights[3];
|
||||
tWeights[3] = 0;
|
||||
}
|
||||
if ((boundary & 8) != 0) {
|
||||
sWeights[2] -= sWeights[0];
|
||||
sWeights[1] += 2*sWeights[0];
|
||||
sWeights[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
|
||||
int current = int(gl_GlobalInvocationID.x);
|
||||
|
||||
PatchCoord coord = patchCoords[current];
|
||||
int patchIndex = coord.patchIndex;
|
||||
|
||||
ivec4 array = patchArray[coord.arrayIndex];
|
||||
int patchType = 6; // array.x XXX: REGULAR only for now.
|
||||
int numControlVertices = 16;
|
||||
|
||||
uint patchBits = patchParamBuffer[patchIndex].patchBits;
|
||||
vec2 uv = normalizePatchCoord(patchBits, vec2(coord.s, coord.t));
|
||||
float dScale = float(1 << getDepth(patchBits));
|
||||
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
if (patchType == 6) { // REGULAR
|
||||
vec4 sWeights, tWeights, dsWeights, dtWeights;
|
||||
getBSplineWeights(uv.x, sWeights, dsWeights);
|
||||
getBSplineWeights(uv.y, tWeights, dtWeights);
|
||||
|
||||
adjustBoundaryWeights(patchBits, sWeights, tWeights);
|
||||
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
wP[4*k+l] = sWeights[l] * tWeights[k];
|
||||
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
|
||||
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// TODO: GREGORY BASIS
|
||||
}
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
int indexBase = array.z + coord.vertIndex;
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(dst, readVertex(index), wP[i]);
|
||||
}
|
||||
writeVertex(current, dst);
|
||||
|
||||
if (dstDuDesc.y > 0) { // length
|
||||
Vertex du;
|
||||
clear(du);
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(du, readVertex(index), wDs[i]);
|
||||
}
|
||||
writeDu(current, du);
|
||||
}
|
||||
if (dstDvDesc.y > 0) {
|
||||
Vertex dv;
|
||||
clear(dv);
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(dv, readVertex(index), wDt[i]);
|
||||
}
|
||||
writeDv(current, dv);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -25,18 +25,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
uniform samplerBuffer vertexBuffer;
|
||||
|
||||
out float outVertexBuffer[LENGTH];
|
||||
|
||||
uniform usamplerBuffer sizes;
|
||||
uniform isamplerBuffer offsets;
|
||||
uniform isamplerBuffer indices;
|
||||
uniform samplerBuffer weights;
|
||||
|
||||
uniform int batchStart = 0;
|
||||
uniform int batchEnd = 0;
|
||||
|
||||
uniform int srcOffset = 0;
|
||||
out float outVertexBuffer[LENGTH];
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@ -72,6 +62,16 @@ void writeVertex(Vertex v) {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS)
|
||||
|
||||
uniform usamplerBuffer sizes;
|
||||
uniform isamplerBuffer offsets;
|
||||
uniform isamplerBuffer indices;
|
||||
uniform samplerBuffer weights;
|
||||
uniform int batchStart = 0;
|
||||
uniform int batchEnd = 0;
|
||||
|
||||
void main() {
|
||||
|
||||
int current = gl_VertexID + batchStart;
|
||||
@ -97,4 +97,146 @@ void main() {
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES)
|
||||
|
||||
layout (location = 0) in ivec3 patchHandles;
|
||||
layout (location = 1) in vec2 patchCoords;
|
||||
|
||||
//struct PatchArray {
|
||||
// int patchType;
|
||||
// int numPatches;
|
||||
// int indexBase; // an offset within the index buffer
|
||||
// int primitiveIdBase; // an offset within the patch param buffer
|
||||
//};
|
||||
// # of patcharrays is 1 or 2.
|
||||
|
||||
uniform ivec4 patchArray[2];
|
||||
uniform isamplerBuffer patchParamBuffer;
|
||||
uniform isamplerBuffer patchIndexBuffer;
|
||||
|
||||
void getBSplineWeights(float t, inout vec4 point, vec4 deriv) {
|
||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
||||
float one6th = 1.0f / 6.0f;
|
||||
|
||||
float t2 = t * t;
|
||||
float t3 = t * t2;
|
||||
|
||||
point.x = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
||||
point.y = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
||||
point.z = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
||||
point.w = one6th * ( t3);
|
||||
|
||||
// Derivatives of the above four basis functions at t:
|
||||
/* if (deriv) { */
|
||||
/* deriv[0] = -0.5f*t2 + t - 0.5f; */
|
||||
/* deriv[1] = 1.5f*t2 - 2.0f*t; */
|
||||
/* deriv[2] = -1.5f*t2 + t + 0.5f; */
|
||||
/* deriv[3] = 0.5f*t2; */
|
||||
/* } */
|
||||
}
|
||||
|
||||
uint getDepth(uint patchBits) {
|
||||
return (patchBits & 0x7);
|
||||
}
|
||||
|
||||
float getParamFraction(uint patchBits) {
|
||||
uint nonQuadRoot = (patchBits >> 3) & 0x1;
|
||||
uint depth = getDepth(patchBits);
|
||||
if (nonQuadRoot == 1) {
|
||||
return 1.0f / float( 1 << (depth-1) );
|
||||
} else {
|
||||
return 1.0f / float( 1 << depth );
|
||||
}
|
||||
}
|
||||
|
||||
vec2 normalizePatchCoord(uint patchBits, vec2 uv) {
|
||||
float frac = getParamFraction(patchBits);
|
||||
|
||||
uint iu = (patchBits >> 22) & 0x3ff;
|
||||
uint iv = (patchBits >> 12) & 0x3ff;
|
||||
|
||||
// top left corner
|
||||
float pu = float(iu*frac);
|
||||
float pv = float(iv*frac);
|
||||
|
||||
// normalize u,v coordinates
|
||||
return vec2((uv.x - pu) / frac, (uv.y - pv) / frac);
|
||||
}
|
||||
|
||||
void adjustBoundaryWeights(uint bits, inout vec4 sWeights, inout vec4 tWeights) {
|
||||
uint boundary = ((bits >> 4) & 0xf);
|
||||
|
||||
if ((boundary & 1) != 0) {
|
||||
tWeights[2] -= tWeights[0];
|
||||
tWeights[1] += 2*tWeights[0];
|
||||
tWeights[0] = 0;
|
||||
}
|
||||
if ((boundary & 2) != 0) {
|
||||
sWeights[1] -= sWeights[3];
|
||||
sWeights[2] += 2*sWeights[3];
|
||||
sWeights[3] = 0;
|
||||
}
|
||||
if ((boundary & 4) != 0) {
|
||||
tWeights[1] -= tWeights[3];
|
||||
tWeights[2] += 2*tWeights[3];
|
||||
tWeights[3] = 0;
|
||||
}
|
||||
if ((boundary & 8) != 0) {
|
||||
sWeights[2] -= sWeights[0];
|
||||
sWeights[1] += 2*sWeights[0];
|
||||
sWeights[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
int current = gl_VertexID;
|
||||
|
||||
ivec3 handle = patchHandles;
|
||||
int patchIndex = handle.y;
|
||||
|
||||
vec2 coord = patchCoords;
|
||||
ivec4 array = patchArray[handle.x];
|
||||
int patchType = array.x;
|
||||
int numControlVertices = 16;
|
||||
|
||||
uint patchBits = texelFetch(patchParamBuffer, patchIndex).y;
|
||||
|
||||
// normalize
|
||||
coord = normalizePatchCoord(patchBits, coord);
|
||||
|
||||
// XXX: dScale for derivative
|
||||
|
||||
// if regular
|
||||
float wP[20];
|
||||
{
|
||||
vec4 sWeights, tWeights, dsWeights, dtWeights;
|
||||
getBSplineWeights(coord.s, sWeights, dsWeights);
|
||||
getBSplineWeights(coord.t, tWeights, dtWeights);
|
||||
|
||||
adjustBoundaryWeights(patchBits, sWeights, tWeights);
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
wP[4*k+l] = sWeights[l] * tWeights[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
int indexBase = array.z + handle.z;
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = texelFetch(patchIndexBuffer, indexBase + i).x;
|
||||
addWithWeight(dst, readVertex(index), wP[i]);
|
||||
}
|
||||
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -171,7 +171,12 @@ public:
|
||||
|
||||
for(typename Evaluators::iterator it = _evaluators.begin();
|
||||
it != _evaluators.end(); ++it) {
|
||||
if (it->srcDesc.length == srcDesc.length and
|
||||
// Note: XFB kernel needs to be configured with the local offset
|
||||
// of the dstDesc to skip preceding primvars.
|
||||
int dstOffset1 = it->dstDesc.offset % it->dstDesc.stride;
|
||||
int dstOffset2 = dstDesc.offset % dstDesc.stride;
|
||||
if (dstOffset1 == dstOffset2 and
|
||||
it->srcDesc.length == srcDesc.length and
|
||||
it->srcDesc.stride == srcDesc.stride and
|
||||
it->dstDesc.length == dstDesc.length and
|
||||
it->dstDesc.stride == dstDesc.stride) {
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include "../osd/ompEvaluator.h"
|
||||
#include "../osd/ompKernel.h"
|
||||
#include "../far/patchBasis.h"
|
||||
#include <omp.h>
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -33,24 +34,212 @@ namespace Osd {
|
||||
|
||||
/* static */
|
||||
bool
|
||||
OmpEvaluator::EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
if (end <= start) return true;
|
||||
OmpEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
|
||||
// we can probably expand cpuKernel.cpp to here.
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
// XXX: we can probably expand cpuKernel.cpp to here.
|
||||
OmpEvalStencils(src, srcDesc, dst, dstDesc,
|
||||
sizes, offsets, indices, weights, start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
OmpEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
if (srcDesc.length != duDesc.length) return false;
|
||||
if (srcDesc.length != dvDesc.length) return false;
|
||||
|
||||
OmpEvalStencils(src, srcDesc,
|
||||
dst, dstDesc,
|
||||
du, duDesc,
|
||||
dv, dvDesc,
|
||||
sizes, offsets, indices,
|
||||
weights, duWeights, dvWeights,
|
||||
start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct BufferAdapter {
|
||||
BufferAdapter(T *p, int length, int stride) :
|
||||
_p(p), _length(length), _stride(stride) { }
|
||||
void Clear() {
|
||||
for (int i = 0; i < _length; ++i) _p[i] = 0;
|
||||
}
|
||||
void AddWithWeight(T const *src, float w) {
|
||||
if (_p) {
|
||||
// TODO: derivatives.
|
||||
for (int i = 0; i < _length; ++i) {
|
||||
_p[i] += src[i] * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
const T *operator[] (int index) const {
|
||||
return _p + _stride * index;
|
||||
}
|
||||
BufferAdapter<T> & operator ++() {
|
||||
if (_p) {
|
||||
_p += _stride;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
T *_p;
|
||||
int _length;
|
||||
int _stride;
|
||||
};
|
||||
|
||||
/* static */
|
||||
bool
|
||||
OmpEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer){
|
||||
|
||||
src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
else return false;
|
||||
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
|
||||
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
|
||||
|
||||
int patchType = array.GetPatchType();
|
||||
// XXX: patchIndex is absolute. not sure it's consistent.
|
||||
// (should be offsetted by array.primitiveIdBase?)
|
||||
// patchParamBuffer[array.primitiveIdBase + coord.handle.patchIndex]
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
const int *cvs =
|
||||
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
OmpEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer) {
|
||||
|
||||
src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
|
||||
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
|
||||
BufferAdapter<float> duT(du + duDesc.stride*i, duDesc.length, duDesc.stride);
|
||||
BufferAdapter<float> dvT(dv + dvDesc.stride*i, dvDesc.length, dvDesc.stride);
|
||||
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
|
||||
|
||||
int patchType = array.GetPatchType();
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
const int *cvs =
|
||||
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
duT.Clear();
|
||||
dvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
duT.AddWithWeight(srcT[cvs[j]], wDs[j]);
|
||||
dvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
|
||||
}
|
||||
++dstT;
|
||||
++duT;
|
||||
++dvT;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
void
|
||||
OmpEvaluator::Synchronize(void * /*deviceContext*/) {
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include "../version.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -38,9 +38,15 @@ namespace Osd {
|
||||
|
||||
class OmpEvaluator {
|
||||
public:
|
||||
/// \brief Generic static compute function. This function has a same
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static eval stencils function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
/// in the same way from OsdMesh template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
@ -54,29 +60,27 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the omp kernel
|
||||
/// @param instance not used in the omp kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the omp kernel
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
OmpEvaluator const * instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused;
|
||||
(void)deviceContext; // unused;
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const OmpEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
return EvalStencils(srcVertexBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
@ -85,17 +89,376 @@ public:
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// stencil compute function.
|
||||
static bool EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start,
|
||||
int end);
|
||||
/// \brief Static eval stencils function which takes raw CPU pointers for
|
||||
/// input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
/// to apply for the range [start, end)
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the omp kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the omp kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const OmpEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
&stencilTable->GetDuWeights()[0],
|
||||
&stencilTable->GetDvWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw CPU pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the omp evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the omp evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
OmpEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the omp evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the omp evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
OmpEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
|
||||
// ideally all buffer classes should have templated by datatype
|
||||
// so that downcast isn't needed there.
|
||||
// (e.g. Osd::CpuBuffer<PatchCoord> )
|
||||
//
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the du buffer
|
||||
///
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dv buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
static void Synchronize(void *deviceContext = NULL);
|
||||
|
||||
|
@ -117,10 +117,82 @@ OmpEvalStencils(float const * src,
|
||||
|
||||
copy(dst, i, threadResult, dstDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OmpEvalStencils(float const * src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float * dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float * dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float * dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
int start, int end) {
|
||||
if (start > 0) {
|
||||
sizes += start;
|
||||
indices += offsets[start];
|
||||
weights += offsets[start];
|
||||
duWeights += offsets[start];
|
||||
dvWeights += offsets[start];
|
||||
}
|
||||
|
||||
src += srcDesc.offset;
|
||||
dst += dstDesc.offset;
|
||||
dstDu += dstDuDesc.offset;
|
||||
dstDv += dstDvDesc.offset;
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
int n = end - start;
|
||||
|
||||
float * result = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
float * resultDu = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
float * resultDv = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < n; ++i) {
|
||||
|
||||
int index = i + (start > 0 ? start : 0); // Stencil index
|
||||
|
||||
// Get thread-local pointers
|
||||
int const * threadIndices = indices + offsets[index];
|
||||
float const * threadWeights = weights + offsets[index];
|
||||
float const * threadWeightsDu = duWeights + offsets[index];
|
||||
float const * threadWeightsDv = dvWeights + offsets[index];
|
||||
|
||||
int threadId = omp_get_thread_num();
|
||||
|
||||
float * threadResult = result + threadId*srcDesc.length;
|
||||
float * threadResultDu = resultDu + threadId*srcDesc.length;
|
||||
float * threadResultDv = resultDv + threadId*srcDesc.length;
|
||||
|
||||
clear(threadResult, dstDesc);
|
||||
clear(threadResultDu, dstDuDesc);
|
||||
clear(threadResultDv, dstDvDesc);
|
||||
|
||||
for (int j=0; j<(int)sizes[index]; ++j) {
|
||||
addWithWeight(threadResult, src,
|
||||
threadIndices[j], threadWeights[j], srcDesc);
|
||||
addWithWeight(threadResultDu, src,
|
||||
threadIndices[j], threadWeightsDu[j], srcDesc);
|
||||
addWithWeight(threadResultDv, src,
|
||||
threadIndices[j], threadWeightsDv[j], srcDesc);
|
||||
}
|
||||
|
||||
copy(dst, i, threadResult, dstDesc);
|
||||
copy(dstDu, i, threadResultDu, dstDuDesc);
|
||||
copy(dstDv, i, threadResultDv, dstDvDesc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} // end namespace Osd
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
} // end namespace OpenSubdiv
|
||||
|
@ -45,6 +45,23 @@ OmpEvalStencils(float const * src,
|
||||
float const * weights,
|
||||
int start, int end);
|
||||
|
||||
void
|
||||
OmpEvalStencils(float const * src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float * dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float * dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float * dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -34,15 +34,15 @@ namespace Osd {
|
||||
|
||||
/* static */
|
||||
bool
|
||||
TbbEvaluator::EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
TbbEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
|
||||
TbbEvalStencils(src, srcDesc, dst, dstDesc,
|
||||
@ -51,6 +51,82 @@ TbbEvaluator::EvalStencils(const float *src,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
TbbEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
if (srcDesc.length != duDesc.length) return false;
|
||||
if (srcDesc.length != dvDesc.length) return false;
|
||||
|
||||
TbbEvalStencils(src, srcDesc,
|
||||
dst, dstDesc,
|
||||
du, duDesc,
|
||||
dv, dvDesc,
|
||||
sizes, offsets, indices,
|
||||
weights, duWeights, dvWeights,
|
||||
start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
TbbEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
TbbEvalPatches(src, srcDesc, dst, dstDesc,
|
||||
NULL, VertexBufferDescriptor(),
|
||||
NULL, VertexBufferDescriptor(),
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
TbbEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
TbbEvalPatches(src, srcDesc, dst, dstDesc,
|
||||
du, duDesc, dv, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
void
|
||||
TbbEvaluator::Synchronize(void *) {
|
||||
|
@ -26,7 +26,9 @@
|
||||
#define OPENSUBDIV3_OSD_TBB_EVALUATOR_H
|
||||
|
||||
#include "../version.h"
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
#include "../far/patchTable.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
@ -37,9 +39,15 @@ namespace Osd {
|
||||
|
||||
class TbbEvaluator {
|
||||
public:
|
||||
/// \brief Generic static stencil eval function. This function has a same
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static eval stencils function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
/// in the same way from OsdMesh template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
@ -55,27 +63,25 @@ public:
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
///
|
||||
/// @param instance not used in the tbb kernel
|
||||
/// @param instance not used in the tbb kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the tbb kernel
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void *deviceContext = NULL) {
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void *deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcVertexBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
@ -84,19 +90,381 @@ public:
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
static bool EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int *sizes,
|
||||
const int *offsets,
|
||||
const int *indices,
|
||||
const float *weights,
|
||||
int start,
|
||||
int end);
|
||||
/// \brief Static eval stencils function which takes raw CPU pointers for
|
||||
/// input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
/// to apply for the range [start, end)
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int *sizes,
|
||||
const int *offsets,
|
||||
const int *indices,
|
||||
const float *weights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
///
|
||||
/// @param instance not used in the tbb kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the tbb kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const TbbEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
&stencilTable->GetDuWeights()[0],
|
||||
&stencilTable->GetDvWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw CPU pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output s-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dv Output t-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
/// to apply for the range [start, end)
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the u-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the v-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output s-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output t-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(
|
||||
srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output s-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the du buffer
|
||||
///
|
||||
/// @param dv Output t-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dv buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief synchronize all asynchronous computation invoked on this device.
|
||||
static void Synchronize(void *deviceContext = NULL);
|
||||
|
||||
/// \brief initialize tbb task schedular
|
||||
/// (optional: client may use tbb::task_scheduler_init)
|
||||
///
|
||||
/// @param numThreads how many threads
|
||||
///
|
||||
static void SetNumThreads(int numThreads);
|
||||
};
|
||||
|
||||
|
@ -24,7 +24,9 @@
|
||||
|
||||
#include "../osd/cpuKernel.h"
|
||||
#include "../osd/tbbKernel.h"
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
#include "../far/patchBasis.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
@ -187,6 +189,274 @@ TbbEvalStencils(float const * src,
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
void
|
||||
TbbEvalStencils(float const * src, VertexBufferDescriptor const &srcDesc,
|
||||
float * dst, VertexBufferDescriptor const &dstDesc,
|
||||
float * du, VertexBufferDescriptor const &duDesc,
|
||||
float * dv, VertexBufferDescriptor const &dvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
int start, int end) {
|
||||
if (start > 0) {
|
||||
sizes += start;
|
||||
indices += offsets[start];
|
||||
weights += offsets[start];
|
||||
duWeights += offsets[start];
|
||||
dvWeights += offsets[start];
|
||||
}
|
||||
|
||||
if (src) src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
|
||||
// PERFORMANCE: need to combine 3 launches together
|
||||
if (dst) {
|
||||
TBBStencilKernel kernel(src, srcDesc, dst, dstDesc,
|
||||
sizes, offsets, indices, weights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
if (du) {
|
||||
TBBStencilKernel kernel(src, srcDesc, du, duDesc,
|
||||
sizes, offsets, indices, duWeights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
if (dv) {
|
||||
TBBStencilKernel kernel(src, srcDesc, dv, dvDesc,
|
||||
sizes, offsets, indices, dvWeights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
struct BufferAdapter {
|
||||
BufferAdapter(T *p, int length, int stride) :
|
||||
_p(p), _length(length), _stride(stride) { }
|
||||
void Clear() {
|
||||
for (int i = 0; i < _length; ++i) _p[i] = 0;
|
||||
}
|
||||
void AddWithWeight(T const *src, float w) {
|
||||
if (_p) {
|
||||
for (int i = 0; i < _length; ++i) {
|
||||
_p[i] += src[i] * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
const T *operator[] (int index) const {
|
||||
return _p + _stride * index;
|
||||
}
|
||||
BufferAdapter<T> & operator ++() {
|
||||
if (_p) {
|
||||
_p += _stride;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
T *_p;
|
||||
int _length;
|
||||
int _stride;
|
||||
};
|
||||
|
||||
class TbbEvalPatchesKernel {
|
||||
VertexBufferDescriptor _srcDesc;
|
||||
VertexBufferDescriptor _dstDesc;
|
||||
VertexBufferDescriptor _dstDuDesc;
|
||||
VertexBufferDescriptor _dstDvDesc;
|
||||
float const * _src;
|
||||
float * _dst;
|
||||
float * _dstDu;
|
||||
float * _dstDv;
|
||||
int _numPatchCoords;
|
||||
const PatchCoord *_patchCoords;
|
||||
const PatchArray *_patchArrayBuffer;
|
||||
const int *_patchIndexBuffer;
|
||||
const PatchParam *_patchParamBuffer;
|
||||
|
||||
public:
|
||||
TbbEvalPatchesKernel(float const *src,
|
||||
VertexBufferDescriptor srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor dstDesc,
|
||||
float *dstDu,
|
||||
VertexBufferDescriptor dstDuDesc,
|
||||
float *dstDv,
|
||||
VertexBufferDescriptor dstDvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) :
|
||||
_srcDesc(srcDesc), _dstDesc(dstDesc),
|
||||
_dstDuDesc(dstDuDesc), _dstDvDesc(dstDvDesc),
|
||||
_src(src), _dst(dst), _dstDu(dstDu), _dstDv(dstDv),
|
||||
_numPatchCoords(numPatchCoords),
|
||||
_patchCoords(patchCoords),
|
||||
_patchArrayBuffer(patchArrayBuffer),
|
||||
_patchIndexBuffer(patchIndexBuffer),
|
||||
_patchParamBuffer(patchParamBuffer) {
|
||||
}
|
||||
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
if (_dstDu == NULL && _dstDv == NULL) {
|
||||
compute(r);
|
||||
} else {
|
||||
computeWithDerivative(r);
|
||||
}
|
||||
}
|
||||
|
||||
void compute(tbb::blocked_range<int> const &r) const {
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
|
||||
_srcDesc.length,
|
||||
_srcDesc.stride);
|
||||
BufferAdapter<float> dstT(_dst + _dstDesc.offset
|
||||
+ r.begin() * _dstDesc.stride,
|
||||
_dstDesc.length,
|
||||
_dstDesc.stride);
|
||||
|
||||
BufferAdapter<float> dstDuT(_dstDu,
|
||||
_dstDuDesc.length,
|
||||
_dstDuDesc.stride);
|
||||
BufferAdapter<float> dstDvT(_dstDv,
|
||||
_dstDvDesc.length,
|
||||
_dstDvDesc.stride);
|
||||
|
||||
for (int i = r.begin(); i < r.end(); ++i) {
|
||||
PatchCoord const &coord = _patchCoords[i];
|
||||
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
|
||||
|
||||
int patchType = array.GetPatchType();
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&_patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
const int *cvs =
|
||||
&_patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
}
|
||||
++dstT;
|
||||
}
|
||||
}
|
||||
|
||||
void computeWithDerivative(tbb::blocked_range<int> const &r) const {
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
|
||||
_srcDesc.length,
|
||||
_srcDesc.stride);
|
||||
BufferAdapter<float> dstT(_dst + _dstDesc.offset
|
||||
+ r.begin() * _dstDesc.stride,
|
||||
_dstDesc.length,
|
||||
_dstDesc.stride);
|
||||
BufferAdapter<float> dstDuT(_dstDu + _dstDuDesc.offset
|
||||
+ r.begin() * _dstDuDesc.stride,
|
||||
_dstDuDesc.length,
|
||||
_dstDuDesc.stride);
|
||||
BufferAdapter<float> dstDvT(_dstDv + _dstDvDesc.offset
|
||||
+ r.begin() * _dstDvDesc.stride,
|
||||
_dstDvDesc.length,
|
||||
_dstDvDesc.stride);
|
||||
|
||||
for (int i = r.begin(); i < r.end(); ++i) {
|
||||
PatchCoord const &coord = _patchCoords[i];
|
||||
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
|
||||
|
||||
int patchType = array.GetPatchType();
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&_patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
const int *cvs =
|
||||
&_patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
dstDuT.Clear();
|
||||
dstDvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
dstDuT.AddWithWeight(srcT[cvs[j]], wDs[j]);
|
||||
dstDvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
|
||||
}
|
||||
++dstT;
|
||||
++dstDuT;
|
||||
++dstDvT;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
TbbEvalPatches(float const *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float *dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
TbbEvalPatchesKernel kernel(src, srcDesc, dst, dstDesc,
|
||||
dstDu, dstDuDesc, dstDv, dstDvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer,
|
||||
patchIndexBuffer,
|
||||
patchParamBuffer);
|
||||
|
||||
tbb::blocked_range<int> range(0, numPatchCoords, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
|
||||
}
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -32,6 +32,9 @@ namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
struct PatchArray;
|
||||
struct PatchCoord;
|
||||
struct PatchParam;
|
||||
struct VertexBufferDescriptor;
|
||||
|
||||
void
|
||||
@ -45,6 +48,38 @@ TbbEvalStencils(float const * src,
|
||||
float const * weights,
|
||||
int start, int end);
|
||||
|
||||
void
|
||||
TbbEvalStencils(float const * src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float * dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float * dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float * dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
void
|
||||
TbbEvalPatches(float const *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float *dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
111
opensubdiv/osd/types.h
Normal file
111
opensubdiv/osd/types.h
Normal file
@ -0,0 +1,111 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OPENSUBDIV3_OSD_TYPES_H
|
||||
#define OPENSUBDIV3_OSD_TYPES_H
|
||||
|
||||
#include "../version.h"
|
||||
#include "../far/patchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
/// \brief Coordinates set on a patch table
|
||||
///
|
||||
/// XXX: this class may be moved into Far
|
||||
///
|
||||
struct PatchCoord {
|
||||
// 5-ints struct.
|
||||
|
||||
/// \brief Constructor
|
||||
///
|
||||
/// @param handle patch handle
|
||||
///
|
||||
/// @param s parametric location on the patch
|
||||
///
|
||||
/// @param t parametric location on the patch
|
||||
///
|
||||
PatchCoord(Far::PatchTable::PatchHandle handle, float s, float t) :
|
||||
handle(handle), s(s), t(t) { }
|
||||
|
||||
PatchCoord() : s(0), t(0) {
|
||||
handle.arrayIndex = 0;
|
||||
handle.patchIndex = 0;
|
||||
handle.vertIndex = 0;
|
||||
}
|
||||
|
||||
Far::PatchTable::PatchHandle handle; ///< patch handle
|
||||
float s, t; ///< parametric location on patch
|
||||
};
|
||||
|
||||
struct PatchArray {
|
||||
// 4-ints struct.
|
||||
PatchArray(Far::PatchDescriptor desc, int numPatches,
|
||||
int indexBase, int primitiveIdBase) :
|
||||
desc(desc), numPatches(numPatches), indexBase(indexBase),
|
||||
primitiveIdBase(primitiveIdBase) {}
|
||||
|
||||
Far::PatchDescriptor const &GetDescriptor() const {
|
||||
return desc;
|
||||
}
|
||||
|
||||
int GetPatchType() const {
|
||||
return desc.GetType();
|
||||
}
|
||||
int GetNumPatches() const {
|
||||
return numPatches;
|
||||
}
|
||||
int GetIndexBase() const {
|
||||
return indexBase;
|
||||
}
|
||||
int GetPrimitiveIdBase() const {
|
||||
return primitiveIdBase;
|
||||
}
|
||||
Far::PatchDescriptor desc;
|
||||
int numPatches;
|
||||
int indexBase; // an offset within the index buffer
|
||||
int primitiveIdBase; // an offset within the patch param buffer
|
||||
};
|
||||
|
||||
struct PatchParam {
|
||||
// int3 struct.
|
||||
int faceIndex;
|
||||
unsigned int patchBits;
|
||||
float sharpness;
|
||||
};
|
||||
|
||||
typedef std::vector<PatchArray> PatchArrayVector;
|
||||
typedef std::vector<PatchParam> PatchParamVector;
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
} // end namespace OpenSubdiv
|
||||
|
||||
|
||||
#endif // OPENSUBDIV3_OSD_TYPES_H
|
Loading…
Reference in New Issue
Block a user