mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2025-01-04 06:00:17 +00:00
Merge pull request #526 from takahito-tejima/dev
Osd API refactor: EvalStencils and EvalPatches
This commit is contained in:
commit
f5008a544e
6
examples/dxPtexViewer/dxPtexViewer.cpp
Executable file → Normal file
6
examples/dxPtexViewer/dxPtexViewer.cpp
Executable file → Normal file
@ -819,7 +819,7 @@ createOsdMesh(int level, int kernel) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch) {
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
|
||||
@ -984,7 +984,7 @@ drawModel() {
|
||||
UINT hOffsets = 0;
|
||||
g_pd3dDeviceContext->IASetVertexBuffers(0, 1, &buffer, &hStrides, &hOffsets);
|
||||
|
||||
OpenSubdiv::Osd::D3D11PatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
g_pd3dDeviceContext->IASetIndexBuffer(
|
||||
@ -993,7 +993,7 @@ drawModel() {
|
||||
|
||||
// patch drawing
|
||||
for (int i = 0; i < (int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();
|
||||
|
||||
|
@ -630,7 +630,7 @@ ShaderCache g_shaderCache;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch) {
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
typedef OpenSubdiv::Far::PatchDescriptor Descriptor;
|
||||
@ -822,7 +822,7 @@ display() {
|
||||
UINT hOffsets = 0;
|
||||
g_pd3dDeviceContext->IASetVertexBuffers(0, 1, &buffer, &hStrides, &hOffsets);
|
||||
|
||||
OpenSubdiv::Osd::D3D11PatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
g_pd3dDeviceContext->IASetIndexBuffer(
|
||||
@ -834,7 +834,7 @@ display() {
|
||||
int numDrawCalls = 0;
|
||||
|
||||
for (int i=0; i<(int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
|
||||
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();
|
||||
|
631
examples/glEvalLimit/glEvalLimit.cpp
Executable file → Normal file
631
examples/glEvalLimit/glEvalLimit.cpp
Executable file → Normal file
@ -44,15 +44,54 @@ GLFWmonitor* g_primary=0;
|
||||
|
||||
#include <osd/cpuEvaluator.h>
|
||||
#include <osd/cpuVertexBuffer.h>
|
||||
#include <osd/cpuPatchTable.h>
|
||||
#include <osd/cpuGLVertexBuffer.h>
|
||||
#include <osd/mesh.h>
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
#include <osd/tbbEvaluator.h>
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENMP
|
||||
#include <osd/ompEvaluator.h>
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
#include <osd/cudaEvaluator.h>
|
||||
#include <osd/cudaVertexBuffer.h>
|
||||
#include <osd/cudaGLVertexBuffer.h>
|
||||
#include <osd/cudaPatchTable.h>
|
||||
#include "../common/cudaDeviceContext.h"
|
||||
|
||||
CudaDeviceContext g_cudaDeviceContext;
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
#include <osd/clVertexBuffer.h>
|
||||
#include <osd/clGLVertexBuffer.h>
|
||||
#include <osd/clEvaluator.h>
|
||||
#include <osd/clPatchTable.h>
|
||||
#include "../common/clDeviceContext.h"
|
||||
CLDeviceContext g_clDeviceContext;
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
#include <osd/glXFBEvaluator.h>
|
||||
#include <osd/glVertexBuffer.h>
|
||||
#include <osd/glPatchTable.h>
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
|
||||
#include <osd/glComputeEvaluator.h>
|
||||
#include <osd/glVertexBuffer.h>
|
||||
#include <osd/glPatchTable.h>
|
||||
#endif
|
||||
|
||||
#include <far/gregoryBasis.h>
|
||||
#include <far/endCapGregoryBasisPatchFactory.h>
|
||||
#include <far/topologyRefiner.h>
|
||||
#include <far/stencilTableFactory.h>
|
||||
#include <far/patchTableFactory.h>
|
||||
#include <far/patchMap.h>
|
||||
|
||||
#include <far/error.h>
|
||||
|
||||
@ -75,32 +114,44 @@ GLFWmonitor* g_primary=0;
|
||||
using namespace OpenSubdiv;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
enum KernelType { kCPU = 0,
|
||||
kOPENMP = 1,
|
||||
kTBB = 2,
|
||||
kCUDA = 3,
|
||||
kCL = 4,
|
||||
kGLXFB = 5,
|
||||
kGLCompute = 6 };
|
||||
|
||||
enum EndCap { kEndCapBSplineBasis,
|
||||
kEndCapGregoryBasis };
|
||||
|
||||
enum DrawMode { kUV,
|
||||
kVARYING,
|
||||
kNORMAL,
|
||||
kSHADE,
|
||||
kFACEVARYING };
|
||||
|
||||
std::vector<float> g_orgPositions,
|
||||
g_positions,
|
||||
g_varyingColors;
|
||||
|
||||
int g_currentShape = 0,
|
||||
g_level = 3,
|
||||
g_kernel = kCPU,
|
||||
g_endCap = kEndCapBSplineBasis,
|
||||
g_numElements = 3;
|
||||
|
||||
std::vector<int> g_coarseEdges;
|
||||
std::vector<float> g_coarseEdgeSharpness;
|
||||
std::vector<float> g_coarseVertexSharpness;
|
||||
|
||||
enum DrawMode { kRANDOM=0,
|
||||
kUV,
|
||||
kVARYING,
|
||||
kNORMAL,
|
||||
kSHADE,
|
||||
kFACEVARYING };
|
||||
|
||||
int g_running = 1,
|
||||
g_width = 1024,
|
||||
g_height = 1024,
|
||||
g_fullscreen = 0,
|
||||
g_drawCageEdges = 1,
|
||||
g_drawCageVertices = 1,
|
||||
g_drawMode = kVARYING,
|
||||
g_drawMode = kUV,
|
||||
g_prev_x = 0,
|
||||
g_prev_y = 0,
|
||||
g_mbutton[3] = {0, 0, 0},
|
||||
@ -131,11 +182,9 @@ float g_computeTime = 0;
|
||||
Stopwatch g_fpsTimer;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
int g_nparticles=0,
|
||||
g_nsamples=101,
|
||||
g_nsamplesFound=0;
|
||||
int g_nParticles = 65536;
|
||||
|
||||
bool g_randomStart=false;
|
||||
bool g_randomStart = true;//false;
|
||||
|
||||
GLuint g_cageEdgeVAO = 0,
|
||||
g_cageEdgeVBO = 0,
|
||||
@ -155,6 +204,7 @@ struct Program {
|
||||
GLuint attrColor;
|
||||
GLuint attrTangentU;
|
||||
GLuint attrTangentV;
|
||||
GLuint attrPatchCoord;
|
||||
} g_defaultProgram;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -204,37 +254,189 @@ createCoarseMesh(OpenSubdiv::Far::TopologyRefiner const & refiner) {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
Far::TopologyRefiner * g_topologyRefiner = 0;
|
||||
|
||||
Far::StencilTable const * g_vertexStencils = NULL;
|
||||
Far::StencilTable const * g_varyingStencils = NULL;
|
||||
|
||||
Far::PatchTable const * g_patchTable = NULL;
|
||||
Far::PatchMap const * g_patchMap = NULL;
|
||||
std::vector<Osd::PatchCoord> g_patchCoords;
|
||||
|
||||
Osd::VertexBufferDescriptor g_idesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 3),
|
||||
g_odesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
|
||||
g_vdesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
|
||||
g_duDesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
|
||||
g_dvDesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
|
||||
g_fvidesc(/*offset*/ 0, /*legnth*/ 2, /*stride*/ 2),
|
||||
g_fvodesc(/*offset*/ 3, /*legnth*/ 2, /*stride*/ 6);
|
||||
// input and output vertex data
|
||||
class EvalOutputBase {
|
||||
public:
|
||||
virtual ~EvalOutputBase() {}
|
||||
virtual GLuint BindVertexData() const = 0;
|
||||
virtual GLuint BindDerivatives() const = 0;
|
||||
virtual GLuint BindPatchCoords() const = 0;
|
||||
virtual void UpdateData(const float *src, int startVertex, int numVertices) = 0;
|
||||
virtual void UpdateVaryingData(const float *src, int startVertex, int numVertices) = 0;
|
||||
virtual void Refine() = 0;
|
||||
virtual void EvalPatches() = 0;
|
||||
virtual void EvalPatchesWithDerivatives() = 0;
|
||||
virtual void EvalPatchesVarying() = 0;
|
||||
virtual void UpdatePatchCoords(
|
||||
std::vector<Osd::PatchCoord> const &patchCoords) = 0;
|
||||
};
|
||||
|
||||
// input vertex data (coarse + refined)
|
||||
Osd::CpuVertexBuffer * g_vertexData = 0;
|
||||
Osd::CpuVertexBuffer * g_varyingData = 0;
|
||||
// note: Since we don't have a class for device-patchcoord container in osd,
|
||||
// we cheat to use vertexbuffer as a patch-coord (5int) container.
|
||||
//
|
||||
// Please don't follow the pattern in your actual application.
|
||||
//
|
||||
template<typename SRC_VERTEX_BUFFER, typename EVAL_VERTEX_BUFFER,
|
||||
typename STENCIL_TABLE, typename PATCH_TABLE, typename EVALUATOR,
|
||||
typename DEVICE_CONTEXT = void>
|
||||
class EvalOutput : public EvalOutputBase {
|
||||
public:
|
||||
typedef OpenSubdiv::Osd::EvaluatorCacheT<EVALUATOR> EvaluatorCache;
|
||||
|
||||
// output vertex data (limit locations)
|
||||
Osd::CpuGLVertexBuffer * g_outVertexData = NULL;
|
||||
Osd::CpuGLVertexBuffer * g_outDerivatives = NULL;
|
||||
EvalOutput(Far::StencilTable const *vertexStencils,
|
||||
Far::StencilTable const *varyingStencils,
|
||||
int numCoarseVerts, int numTotalVerts, int numParticles,
|
||||
Far::PatchTable const *patchTable,
|
||||
EvaluatorCache *evaluatorCache = NULL,
|
||||
DEVICE_CONTEXT *deviceContext = NULL)
|
||||
: _srcDesc( /*offset*/ 0, /*length*/ 3, /*stride*/ 3),
|
||||
_srcVaryingDesc(/*offset*/ 0, /*length*/ 3, /*stride*/ 3),
|
||||
_vertexDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
|
||||
_varyingDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
|
||||
_duDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
|
||||
_dvDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
|
||||
_deviceContext(deviceContext) {
|
||||
_srcData = SRC_VERTEX_BUFFER::Create(3, numTotalVerts, _deviceContext);
|
||||
_srcVaryingData = SRC_VERTEX_BUFFER::Create(3, numTotalVerts, _deviceContext);
|
||||
_vertexData = EVAL_VERTEX_BUFFER::Create(6, numParticles, _deviceContext);
|
||||
_derivatives = EVAL_VERTEX_BUFFER::Create(6, numParticles, _deviceContext);
|
||||
_patchTable = PATCH_TABLE::Create(patchTable, _deviceContext);
|
||||
_patchCoords = NULL;
|
||||
_numCoarseVerts = numCoarseVerts;
|
||||
_vertexStencils =
|
||||
Osd::convertToCompatibleStencilTable<STENCIL_TABLE>(vertexStencils, _deviceContext);
|
||||
_varyingStencils =
|
||||
Osd::convertToCompatibleStencilTable<STENCIL_TABLE>(varyingStencils, _deviceContext);
|
||||
_evaluatorCache = evaluatorCache;
|
||||
}
|
||||
~EvalOutput() {
|
||||
delete _srcData;
|
||||
delete _srcVaryingData;
|
||||
delete _vertexData;
|
||||
delete _derivatives;
|
||||
delete _patchTable;
|
||||
delete _patchCoords;
|
||||
}
|
||||
virtual GLuint BindVertexData() const {
|
||||
return _vertexData->BindVBO();
|
||||
}
|
||||
virtual GLuint BindDerivatives() const {
|
||||
return _derivatives->BindVBO();
|
||||
}
|
||||
virtual GLuint BindPatchCoords() const {
|
||||
return _patchCoords->BindVBO();
|
||||
}
|
||||
virtual void UpdateData(const float *src, int startVertex, int numVertices) {
|
||||
_srcData->UpdateData(src, startVertex, numVertices, _deviceContext);
|
||||
}
|
||||
virtual void UpdateVaryingData(const float *src, int startVertex, int numVertices) {
|
||||
_srcVaryingData->UpdateData(src, startVertex, numVertices, _deviceContext);
|
||||
}
|
||||
virtual void Refine() {
|
||||
Osd::VertexBufferDescriptor dstDesc = _srcDesc;
|
||||
dstDesc.offset += _numCoarseVerts * _srcDesc.stride;
|
||||
|
||||
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
|
||||
_evaluatorCache, _srcDesc, dstDesc, _deviceContext);
|
||||
|
||||
EVALUATOR::EvalStencils(_srcData, _srcDesc,
|
||||
_srcData, dstDesc,
|
||||
_vertexStencils,
|
||||
evalInstance,
|
||||
_deviceContext);
|
||||
|
||||
dstDesc = _srcVaryingDesc;
|
||||
dstDesc.offset += _numCoarseVerts * _srcVaryingDesc.stride;
|
||||
evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
|
||||
_evaluatorCache, _srcVaryingDesc, dstDesc, _deviceContext);
|
||||
|
||||
EVALUATOR::EvalStencils(_srcVaryingData, _srcVaryingDesc,
|
||||
_srcVaryingData, dstDesc,
|
||||
_varyingStencils,
|
||||
evalInstance,
|
||||
_deviceContext);
|
||||
}
|
||||
virtual void EvalPatches() {
|
||||
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
|
||||
_evaluatorCache, _srcDesc, _vertexDesc, _deviceContext);
|
||||
|
||||
EVALUATOR::EvalPatches(
|
||||
_srcData, _srcDesc,
|
||||
_vertexData, _vertexDesc,
|
||||
_patchCoords->GetNumVertices(),
|
||||
_patchCoords,
|
||||
_patchTable, evalInstance, _deviceContext);
|
||||
}
|
||||
virtual void EvalPatchesWithDerivatives() {
|
||||
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
|
||||
_evaluatorCache, _srcDesc, _vertexDesc, _deviceContext);
|
||||
EVALUATOR::EvalPatches(
|
||||
_srcData, _srcDesc,
|
||||
_vertexData, _vertexDesc,
|
||||
_derivatives, _duDesc,
|
||||
_derivatives, _dvDesc,
|
||||
_patchCoords->GetNumVertices(),
|
||||
_patchCoords,
|
||||
_patchTable, evalInstance, _deviceContext);
|
||||
}
|
||||
virtual void EvalPatchesVarying() {
|
||||
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
|
||||
_evaluatorCache, _srcVaryingDesc, _varyingDesc, _deviceContext);
|
||||
|
||||
EVALUATOR::EvalPatches(
|
||||
_srcVaryingData, _srcVaryingDesc,
|
||||
// varyingdata is interleved in vertexData.
|
||||
_vertexData, _varyingDesc,
|
||||
_patchCoords->GetNumVertices(),
|
||||
_patchCoords,
|
||||
_patchTable, evalInstance, _deviceContext);
|
||||
}
|
||||
virtual void UpdatePatchCoords(
|
||||
std::vector<Osd::PatchCoord> const &patchCoords) {
|
||||
if (_patchCoords and
|
||||
_patchCoords->GetNumVertices() != (int)patchCoords.size()) {
|
||||
delete _patchCoords;
|
||||
_patchCoords = NULL;
|
||||
}
|
||||
if (not _patchCoords) {
|
||||
_patchCoords = EVAL_VERTEX_BUFFER::Create(5,
|
||||
(int)patchCoords.size(),
|
||||
_deviceContext);
|
||||
}
|
||||
_patchCoords->UpdateData((float*)&patchCoords[0], 0, (int)patchCoords.size(), _deviceContext);
|
||||
}
|
||||
private:
|
||||
SRC_VERTEX_BUFFER *_srcData;
|
||||
SRC_VERTEX_BUFFER *_srcVaryingData;
|
||||
EVAL_VERTEX_BUFFER *_vertexData;
|
||||
EVAL_VERTEX_BUFFER *_derivatives;
|
||||
EVAL_VERTEX_BUFFER *_varyingData;
|
||||
EVAL_VERTEX_BUFFER *_patchCoords;
|
||||
PATCH_TABLE *_patchTable;
|
||||
Osd::VertexBufferDescriptor _srcDesc;
|
||||
Osd::VertexBufferDescriptor _srcVaryingDesc;
|
||||
Osd::VertexBufferDescriptor _vertexDesc;
|
||||
Osd::VertexBufferDescriptor _varyingDesc;
|
||||
Osd::VertexBufferDescriptor _duDesc;
|
||||
Osd::VertexBufferDescriptor _dvDesc;
|
||||
int _numCoarseVerts;
|
||||
|
||||
STENCIL_TABLE const *_vertexStencils;
|
||||
STENCIL_TABLE const *_varyingStencils;
|
||||
|
||||
EvaluatorCache *_evaluatorCache;
|
||||
DEVICE_CONTEXT *_deviceContext;
|
||||
};
|
||||
|
||||
EvalOutputBase *g_evalOutput = NULL;
|
||||
|
||||
STParticles * g_particles=0;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
updateGeom() {
|
||||
|
||||
int nverts = (int)g_orgPositions.size() / 3;
|
||||
|
||||
const float *p = &g_orgPositions[0];
|
||||
@ -255,28 +457,18 @@ updateGeom() {
|
||||
Stopwatch s;
|
||||
s.Start();
|
||||
|
||||
g_vertexData->UpdateData( &g_positions[0], 0, nverts);
|
||||
// update coarse vertices
|
||||
g_evalOutput->UpdateData(&g_positions[0], 0, nverts);
|
||||
|
||||
if (! g_topologyRefiner) return;
|
||||
// update coarse varying
|
||||
if (g_drawMode == kVARYING) {
|
||||
g_evalOutput->UpdateVaryingData(&g_varyingColors[0], 0, nverts);
|
||||
|
||||
// note that for patch eval we need coarse+refined combined buffer.
|
||||
int nCoarseVertices = g_topologyRefiner->GetLevel(0).GetNumVertices();
|
||||
Osd::CpuEvaluator::EvalStencils(g_vertexData,
|
||||
Osd::VertexBufferDescriptor(0, 3, 3),
|
||||
g_vertexData,
|
||||
Osd::VertexBufferDescriptor(
|
||||
nCoarseVertices*3, 3, 3),
|
||||
g_vertexStencils);
|
||||
|
||||
if (g_varyingData) {
|
||||
Osd::CpuEvaluator::EvalStencils(g_varyingData,
|
||||
Osd::VertexBufferDescriptor(0, 3, 3),
|
||||
g_varyingData,
|
||||
Osd::VertexBufferDescriptor(
|
||||
nCoarseVertices*3, 3, 3),
|
||||
g_varyingStencils);
|
||||
}
|
||||
|
||||
// Refine
|
||||
g_evalOutput->Refine();
|
||||
|
||||
s.Stop();
|
||||
g_computeTime = float(s.GetElapsed() * 1000.0f);
|
||||
|
||||
@ -287,60 +479,28 @@ updateGeom() {
|
||||
|
||||
// Apply 'dynamics' update
|
||||
assert(g_particles);
|
||||
|
||||
g_particles->Update(g_evalTime); // XXXX g_evalTime is not really elapsed time...
|
||||
|
||||
std::vector<OpenSubdiv::Osd::PatchCoord> const &patchCoords
|
||||
= g_particles->GetPatchCoords();
|
||||
|
||||
// resolve particle positions into patch handles
|
||||
// XXX: this process should be handled by OsdKernel in parallel
|
||||
g_patchCoords.clear();
|
||||
for (int i = 0; i < g_particles->GetNumParticles(); ++i) {
|
||||
STParticles::Position const &position = g_particles->GetPositions()[i];
|
||||
Far::PatchTable::PatchHandle const *handle =
|
||||
g_patchMap->FindPatch(position.ptexIndex, position.s, position.t);
|
||||
if (handle) {
|
||||
g_patchCoords.push_back(Osd::PatchCoord(
|
||||
*handle, position.s, position.t));
|
||||
}
|
||||
}
|
||||
// update patchcoord to be evaluated
|
||||
g_evalOutput->UpdatePatchCoords(patchCoords);
|
||||
|
||||
// Evaluate the positions of the samples on the limit surface
|
||||
if (g_drawMode == kNORMAL || g_drawMode == kSHADE) {
|
||||
// evaluate positions and derivatives
|
||||
g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(
|
||||
g_vertexData, g_idesc,
|
||||
g_outVertexData, g_odesc,
|
||||
g_outDerivatives, g_duDesc,
|
||||
g_outDerivatives, g_dvDesc,
|
||||
(int)g_patchCoords.size(),
|
||||
&g_patchCoords[0],
|
||||
g_patchTable, NULL);
|
||||
g_evalOutput->EvalPatchesWithDerivatives();
|
||||
} else {
|
||||
// evaluate positions
|
||||
g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(
|
||||
g_vertexData, g_idesc,
|
||||
g_outVertexData, g_odesc,
|
||||
(int)g_patchCoords.size(),
|
||||
&g_patchCoords[0],
|
||||
g_patchTable, NULL);
|
||||
g_evalOutput->EvalPatches();
|
||||
}
|
||||
|
||||
// color
|
||||
if (g_drawMode == kUV) {
|
||||
// store patchCoords as colors
|
||||
float *p = g_outVertexData->BindCpuBuffer() + g_vdesc.offset;
|
||||
for (int i = 0; i < (int)g_patchCoords.size(); ++i) {
|
||||
p[0] = g_patchCoords[i].s;
|
||||
p[1] = g_patchCoords[i].t;
|
||||
p[2] = 0;
|
||||
p += g_vdesc.stride;
|
||||
}
|
||||
} else if (g_drawMode == kVARYING) {
|
||||
if (g_drawMode == kVARYING) {
|
||||
// XXX: is this really varying?
|
||||
Osd::CpuEvaluator::EvalPatches(g_varyingData, g_idesc,
|
||||
g_outVertexData, g_vdesc,
|
||||
(int)g_patchCoords.size(),
|
||||
&g_patchCoords[0],
|
||||
g_patchTable, NULL);
|
||||
g_evalOutput->EvalPatchesVarying();
|
||||
}
|
||||
|
||||
s.Stop();
|
||||
@ -359,8 +519,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape);
|
||||
OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape);
|
||||
|
||||
delete g_topologyRefiner;
|
||||
g_topologyRefiner =
|
||||
Far::TopologyRefiner *topologyRefiner =
|
||||
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Create(*shape,
|
||||
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Options(sdctype, sdcoptions));
|
||||
|
||||
@ -371,22 +530,16 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
|
||||
float speed = g_particles ? g_particles->GetSpeed() : 0.2f;
|
||||
|
||||
// Create the 'uv particles' manager - this class manages the limit
|
||||
// location samples (ptex face index, (s,t) and updates them between frames.
|
||||
// Note: the number of limit locations can be entirely arbitrary
|
||||
delete g_particles;
|
||||
g_particles = new STParticles(*g_topologyRefiner, g_nsamples, !g_randomStart);
|
||||
g_nparticles = g_particles->GetNumParticles();
|
||||
g_particles->SetSpeed(speed);
|
||||
|
||||
createCoarseMesh(*g_topologyRefiner);
|
||||
createCoarseMesh(*topologyRefiner);
|
||||
|
||||
Far::StencilTable const * vertexStencils = NULL;
|
||||
Far::StencilTable const * varyingStencils = NULL;
|
||||
int nverts=0;
|
||||
{
|
||||
// Apply feature adaptive refinement to the mesh so that we can use the
|
||||
// limit evaluation API features.
|
||||
Far::TopologyRefiner::AdaptiveOptions options(level);
|
||||
g_topologyRefiner->RefineAdaptive(options);
|
||||
topologyRefiner->RefineAdaptive(options);
|
||||
|
||||
// Generate stencil table to update the bi-cubic patches control
|
||||
// vertices after they have been re-posed (both for vertex & varying
|
||||
@ -395,27 +548,33 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
soptions.generateOffsets=true;
|
||||
soptions.generateIntermediateLevels=true;
|
||||
|
||||
Far::StencilTable const * vertexStencils =
|
||||
Far::StencilTableFactory::Create(*g_topologyRefiner, soptions);
|
||||
vertexStencils =
|
||||
Far::StencilTableFactory::Create(*topologyRefiner, soptions);
|
||||
|
||||
soptions.interpolationMode = Far::StencilTableFactory::INTERPOLATE_VARYING;
|
||||
Far::StencilTable const * varyingStencils =
|
||||
Far::StencilTableFactory::Create(*g_topologyRefiner, soptions);
|
||||
|
||||
varyingStencils =
|
||||
Far::StencilTableFactory::Create(*topologyRefiner, soptions);
|
||||
|
||||
// Generate bi-cubic patch table for the limit surface
|
||||
Far::PatchTableFactory::Options poptions;
|
||||
poptions.SetEndCapType(
|
||||
Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS);
|
||||
if (g_endCap == kEndCapBSplineBasis) {
|
||||
poptions.SetEndCapType(
|
||||
Far::PatchTableFactory::Options::ENDCAP_BSPLINE_BASIS);
|
||||
} else {
|
||||
poptions.SetEndCapType(
|
||||
Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS);
|
||||
}
|
||||
|
||||
Far::PatchTable const * patchTable =
|
||||
Far::PatchTableFactory::Create(*g_topologyRefiner, poptions);
|
||||
Far::PatchTableFactory::Create(*topologyRefiner, poptions);
|
||||
|
||||
// append endcap stencils
|
||||
if (Far::StencilTable const *endCapVertexStencilTable =
|
||||
patchTable->GetEndCapVertexStencilTable()) {
|
||||
Far::StencilTable const *table =
|
||||
Far::StencilTableFactory::AppendEndCapStencilTable(
|
||||
*g_topologyRefiner,
|
||||
*topologyRefiner,
|
||||
vertexStencils, endCapVertexStencilTable);
|
||||
delete vertexStencils;
|
||||
vertexStencils = table;
|
||||
@ -424,7 +583,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
patchTable->GetEndCapVaryingStencilTable()) {
|
||||
Far::StencilTable const *table =
|
||||
Far::StencilTableFactory::AppendEndCapStencilTable(
|
||||
*g_topologyRefiner,
|
||||
*topologyRefiner,
|
||||
varyingStencils, endCapVaryingStencilTable);
|
||||
delete varyingStencils;
|
||||
varyingStencils = table;
|
||||
@ -434,47 +593,103 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
nverts = vertexStencils->GetNumControlVertices() +
|
||||
vertexStencils->GetNumStencils();
|
||||
|
||||
if (g_vertexStencils) delete g_vertexStencils;
|
||||
g_vertexStencils = vertexStencils;
|
||||
if (g_varyingStencils) delete g_varyingStencils;
|
||||
g_varyingStencils = varyingStencils;
|
||||
|
||||
if (g_patchTable) delete g_patchTable;
|
||||
g_patchTable = patchTable;
|
||||
|
||||
// Create a far patch map
|
||||
if (g_patchMap) delete g_patchMap;
|
||||
g_patchMap = new Far::PatchMap(*g_patchTable);
|
||||
}
|
||||
|
||||
{ // Create vertex primvar buffer for the CVs
|
||||
delete g_vertexData;
|
||||
g_vertexData = Osd::CpuVertexBuffer::Create(3, nverts);
|
||||
// note that for patch eval we need coarse+refined combined buffer.
|
||||
int nCoarseVertices = topologyRefiner->GetLevel(0).GetNumVertices();
|
||||
|
||||
// Create varying primvar buffer for the CVs with random colors.
|
||||
// These are immediately interpolated (once) and saved for display.
|
||||
delete g_varyingData; g_varyingData = 0;
|
||||
if (g_drawMode==kVARYING) {
|
||||
g_varyingData = Osd::CpuVertexBuffer::Create(3, nverts);
|
||||
g_varyingData->UpdateData(
|
||||
&g_varyingColors[0], 0, (int)g_varyingColors.size()/3 );
|
||||
}
|
||||
|
||||
// Create output buffers for the limit samples (position & tangents)
|
||||
delete g_outVertexData;
|
||||
g_outVertexData = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
|
||||
memset(g_outVertexData->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
|
||||
if (g_drawMode==kRANDOM) {
|
||||
createRandomColors(g_nparticles, 6, g_outVertexData->BindCpuBuffer()+3);
|
||||
}
|
||||
|
||||
delete g_outDerivatives;
|
||||
g_outDerivatives = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
|
||||
memset(g_outDerivatives->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
|
||||
delete g_evalOutput;
|
||||
if (g_kernel == kCPU) {
|
||||
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
|
||||
Osd::CpuGLVertexBuffer,
|
||||
Far::StencilTable,
|
||||
Osd::CpuPatchTable,
|
||||
Osd::CpuEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable);
|
||||
#ifdef OPENSUBDIV_HAS_OPENMP
|
||||
} else if (g_kernel == kOPENMP) {
|
||||
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
|
||||
Osd::CpuGLVertexBuffer,
|
||||
Far::StencilTable,
|
||||
Osd::CpuPatchTable,
|
||||
Osd::OmpEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
} else if (g_kernel == kTBB) {
|
||||
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
|
||||
Osd::CpuGLVertexBuffer,
|
||||
Far::StencilTable,
|
||||
Osd::CpuPatchTable,
|
||||
Osd::TbbEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
} else if (g_kernel == kCUDA) {
|
||||
g_evalOutput = new EvalOutput<Osd::CudaVertexBuffer,
|
||||
Osd::CudaGLVertexBuffer,
|
||||
Osd::CudaStencilTable,
|
||||
Osd::CudaPatchTable,
|
||||
Osd::CudaEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
} else if (g_kernel == kCL) {
|
||||
static Osd::EvaluatorCacheT<Osd::CLEvaluator> clEvaluatorCache;
|
||||
g_evalOutput = new EvalOutput<Osd::CLVertexBuffer,
|
||||
Osd::CLGLVertexBuffer,
|
||||
Osd::CLStencilTable,
|
||||
Osd::CLPatchTable,
|
||||
Osd::CLEvaluator,
|
||||
CLDeviceContext>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable,
|
||||
&clEvaluatorCache, &g_clDeviceContext);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
} else if (g_kernel == kGLXFB) {
|
||||
static Osd::EvaluatorCacheT<Osd::GLXFBEvaluator> glXFBEvaluatorCache;
|
||||
g_evalOutput = new EvalOutput<Osd::GLVertexBuffer,
|
||||
Osd::GLVertexBuffer,
|
||||
Osd::GLStencilTableTBO,
|
||||
Osd::GLPatchTable,
|
||||
Osd::GLXFBEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable,
|
||||
&glXFBEvaluatorCache);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
|
||||
} else if (g_kernel == kGLCompute) {
|
||||
static Osd::EvaluatorCacheT<Osd::GLComputeEvaluator> glComputeEvaluatorCache;
|
||||
g_evalOutput = new EvalOutput<Osd::GLVertexBuffer,
|
||||
Osd::GLVertexBuffer,
|
||||
Osd::GLStencilTableSSBO,
|
||||
Osd::GLPatchTable,
|
||||
Osd::GLComputeEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable,
|
||||
&glComputeEvaluatorCache);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Create the 'uv particles' manager - this class manages the limit
|
||||
// location samples (ptex face index, (s,t) and updates them between frames.
|
||||
// Note: the number of limit locations can be entirely arbitrary
|
||||
delete g_particles;
|
||||
g_particles = new STParticles(*topologyRefiner, g_patchTable,
|
||||
g_nParticles, !g_randomStart);
|
||||
g_nParticles = g_particles->GetNumParticles();
|
||||
g_particles->SetSpeed(speed);
|
||||
|
||||
updateGeom();
|
||||
|
||||
delete topologyRefiner;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -505,32 +720,33 @@ linkDefaultProgram() {
|
||||
"in vec3 color;\n"
|
||||
"in vec3 tangentU;\n"
|
||||
"in vec3 tangentV;\n"
|
||||
"in vec2 patchCoord;\n"
|
||||
"out vec4 fragColor;\n"
|
||||
"out vec3 normal;\n"
|
||||
"uniform mat4 ModelViewMatrix;\n"
|
||||
"uniform mat4 ProjectionMatrix;\n"
|
||||
"uniform int DrawMode;\n"
|
||||
"void main() {\n"
|
||||
" fragColor = vec4(color, 1);\n"
|
||||
// XXX: fix the normal transform
|
||||
" normal = (ModelViewMatrix * vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n"
|
||||
" vec3 normal = (ModelViewMatrix * "
|
||||
" vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n"
|
||||
" gl_Position = ProjectionMatrix * ModelViewMatrix * "
|
||||
" vec4(position, 1);\n"
|
||||
" if (DrawMode == 0) {\n" // UV
|
||||
" fragColor = vec4(patchCoord.x, patchCoord.y, 0, 1);\n"
|
||||
" } else if (DrawMode == 2) {\n"
|
||||
" fragColor = vec4(normal*0.5+vec3(0.5), 1);\n"
|
||||
" } else if (DrawMode == 3) {\n"
|
||||
" fragColor = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n"
|
||||
" } else {\n" // varying
|
||||
" fragColor = vec4(color, 1);\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
static const char *fsSrc =
|
||||
GLSL_VERSION_DEFINE
|
||||
"in vec4 fragColor;\n"
|
||||
"in vec3 normal;\n"
|
||||
"uniform int DrawMode;\n"
|
||||
"out vec4 color;\n"
|
||||
"void main() {\n"
|
||||
" if (DrawMode == 3) {\n"
|
||||
" color = vec4(normal*0.5+vec3(0.5), 1);\n"
|
||||
" } else if (DrawMode == 4) {\n"
|
||||
" color = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n"
|
||||
" } else {\n"
|
||||
" color = fragColor;\n"
|
||||
" }\n"
|
||||
" color = fragColor;\n"
|
||||
"}\n";
|
||||
|
||||
GLuint program = glCreateProgram();
|
||||
@ -544,6 +760,7 @@ linkDefaultProgram() {
|
||||
glBindAttribLocation(program, 1, "color");
|
||||
glBindAttribLocation(program, 2, "tangentU");
|
||||
glBindAttribLocation(program, 3, "tangentV");
|
||||
glBindAttribLocation(program, 4, "patchCoord");
|
||||
glBindFragDataLocation(program, 0, "color");
|
||||
|
||||
glLinkProgram(program);
|
||||
@ -571,6 +788,7 @@ linkDefaultProgram() {
|
||||
g_defaultProgram.attrColor = glGetAttribLocation(program, "color");
|
||||
g_defaultProgram.attrTangentU = glGetAttribLocation(program, "tangentU");
|
||||
g_defaultProgram.attrTangentV = glGetAttribLocation(program, "tangentV");
|
||||
g_defaultProgram.attrPatchCoord = glGetAttribLocation(program, "patchCoord");
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -621,6 +839,7 @@ drawCageEdges() {
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrColor);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
|
||||
glVertexAttribPointer(g_defaultProgram.attrPosition,
|
||||
3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
|
||||
glVertexAttribPointer(g_defaultProgram.attrColor,
|
||||
@ -680,6 +899,7 @@ drawCageVertices() {
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrColor);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
|
||||
glVertexAttribPointer(g_defaultProgram.attrPosition,
|
||||
3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
|
||||
glVertexAttribPointer(g_defaultProgram.attrColor,
|
||||
@ -711,27 +931,33 @@ drawSamples() {
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrTangentU);
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrTangentV);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_outVertexData->BindVBO());
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindVertexData());
|
||||
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
|
||||
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_outDerivatives->BindVBO());
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindDerivatives());
|
||||
glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
|
||||
glVertexAttribPointer(3, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindPatchCoords());
|
||||
glVertexAttribPointer(4, 2, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 5, (float*)12);
|
||||
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrPosition);
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrColor);
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrTangentU);
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrTangentV);
|
||||
glEnableVertexAttribArray(g_defaultProgram.attrPatchCoord);
|
||||
|
||||
glPointSize(2.0f);
|
||||
glDrawArrays(GL_POINTS, 0, g_nparticles);
|
||||
int nPatchCoords = (int)g_particles->GetPatchCoords().size();
|
||||
glDrawArrays(GL_POINTS, 0, nPatchCoords);
|
||||
glPointSize(1.0f);
|
||||
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrPosition);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrColor);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
|
||||
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
|
||||
|
||||
glBindVertexArray(0);
|
||||
|
||||
@ -789,9 +1015,10 @@ display() {
|
||||
double fps = 1.0/g_fpsTimer.GetElapsed();
|
||||
g_fpsTimer.Start();
|
||||
|
||||
int nPatchCoords = (int)g_particles->GetPatchCoords().size();
|
||||
|
||||
g_hud.DrawString(10, -150, "Particle Speed ([) (]): %.1f", g_particles->GetSpeed());
|
||||
g_hud.DrawString(10, -120, "# Samples : (%d/%d)",
|
||||
g_nsamplesFound, g_outVertexData->GetNumVertices());
|
||||
g_hud.DrawString(10, -120, "# Samples : (%d / %d)", nPatchCoords, g_nParticles);
|
||||
g_hud.DrawString(10, -100, "Compute : %.3f ms", g_computeTime);
|
||||
g_hud.DrawString(10, -80, "Eval : %.3f ms", g_evalTime * 1000.f);
|
||||
g_hud.DrawString(10, -60, "GPU Draw : %.3f ms", drawGpuTime);
|
||||
@ -884,9 +1111,11 @@ void windowClose(GLFWwindow*) {
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
setSamples(bool add) {
|
||||
g_nsamples += add ? 50 : -50;
|
||||
|
||||
g_nsamples = std::max(0, g_nsamples);
|
||||
if (add) {
|
||||
g_nParticles = g_nParticles * 2;
|
||||
} else {
|
||||
g_nParticles = std::max(1, g_nParticles / 2);
|
||||
}
|
||||
|
||||
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
|
||||
}
|
||||
@ -936,6 +1165,40 @@ callbackModel(int m) {
|
||||
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
callbackEndCap(int endCap) {
|
||||
g_endCap = endCap;
|
||||
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
callbackKernel(int k) {
|
||||
|
||||
g_kernel = k;
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
|
||||
if (g_clDeviceContext.Initialize() == false) {
|
||||
printf("Error in initializing OpenCL\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
|
||||
if (g_cudaDeviceContext.Initialize() == false) {
|
||||
printf("Error in initializing Cuda\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
|
||||
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static void
|
||||
callbackLevel(int l) {
|
||||
@ -1001,10 +1264,40 @@ initHUD() {
|
||||
g_hud.AddCheckBox("Animate vertices (M)", g_moveScale != 0, 10, 50, callbackAnimate, 0, 'm');
|
||||
g_hud.AddCheckBox("Freeze (spc)", false, 10, 70, callbackFreeze, 0, ' ');
|
||||
|
||||
g_hud.AddCheckBox("Random Start", false, 10, 120, callbackCentered, g_randomStart);
|
||||
g_hud.AddCheckBox("Random Start", g_randomStart, 10, 120, callbackCentered, 0);
|
||||
|
||||
int compute_pulldown = g_hud.AddPullDown("Compute (K)", 475, 10, 300,
|
||||
callbackKernel, 'k');
|
||||
g_hud.AddPullDownButton(compute_pulldown, "CPU", kCPU);
|
||||
#ifdef OPENSUBDIV_HAS_OPENMP
|
||||
g_hud.AddPullDownButton(compute_pulldown, "OPENMP", kOPENMP);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
g_hud.AddPullDownButton(compute_pulldown, "TBB", kTBB);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
g_hud.AddPullDownButton(compute_pulldown, "GL XFB", kGLXFB);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
|
||||
g_hud.AddPullDownButton(compute_pulldown, "GL Compute", kGLCompute);
|
||||
#endif
|
||||
|
||||
int endcap_pulldown = g_hud.AddPullDown("End cap (E)", 10, 140, 200,
|
||||
callbackEndCap, 'e');
|
||||
g_hud.AddPullDownButton(endcap_pulldown, "BSpline",
|
||||
kEndCapBSplineBasis,
|
||||
g_endCap == kEndCapBSplineBasis);
|
||||
g_hud.AddPullDownButton(endcap_pulldown, "GregoryBasis",
|
||||
kEndCapGregoryBasis,
|
||||
g_endCap == kEndCapGregoryBasis);
|
||||
|
||||
int shading_pulldown = g_hud.AddPullDown("Shading (W)", 250, 10, 250, callbackDisplayVaryingColors, 'w');
|
||||
g_hud.AddPullDownButton(shading_pulldown, "Random", kRANDOM, g_drawMode==kRANDOM);
|
||||
g_hud.AddPullDownButton(shading_pulldown, "(u,v)", kUV, g_drawMode==kUV);
|
||||
g_hud.AddPullDownButton(shading_pulldown, "Varying", kVARYING, g_drawMode==kVARYING);
|
||||
g_hud.AddPullDownButton(shading_pulldown, "Normal", kNORMAL, g_drawMode==kNORMAL);
|
||||
|
@ -25,38 +25,104 @@
|
||||
#include "particles.h"
|
||||
|
||||
#include <far/ptexIndices.h>
|
||||
#include <far/patchMap.h>
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
#include <tbb/parallel_for.h>
|
||||
#include <tbb/atomic.h>
|
||||
tbb::atomic<int> g_tbbCounter;
|
||||
class TbbUpdateKernel {
|
||||
public:
|
||||
TbbUpdateKernel(float speed,
|
||||
STParticles::Position *positions,
|
||||
float *velocities,
|
||||
std::vector<STParticles::FaceInfo> const &adjacency,
|
||||
OpenSubdiv::Osd::PatchCoord *patchCoords,
|
||||
OpenSubdiv::Far::PatchMap const *patchMap) :
|
||||
_speed(speed), _positions(positions), _velocities(velocities),
|
||||
_adjacency(adjacency), _patchCoords(patchCoords), _patchMap(patchMap) {
|
||||
}
|
||||
|
||||
void operator () (tbb::blocked_range<int> const &r) const {
|
||||
for (int i = r.begin(); i < r.end(); ++i) {
|
||||
STParticles::Position * p = _positions + i;
|
||||
float *dp = _velocities + i*2;
|
||||
|
||||
// apply velocity
|
||||
p->s += dp[0] * _speed;
|
||||
p->t += dp[1] * _speed;
|
||||
|
||||
// make sure particles can't skip more than 1 face boundary at a time
|
||||
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
|
||||
|
||||
// check if the particle is jumping a boundary
|
||||
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
|
||||
// this is not treated here.
|
||||
int edge = -1;
|
||||
if (p->s >= 1.0f) edge = 1;
|
||||
if (p->s <= 0.0f) edge = 3;
|
||||
if (p->t >= 1.0f) edge = 2;
|
||||
if (p->t <= 0.0f) edge = 0;
|
||||
|
||||
if (edge>=0) {
|
||||
// warp the particle to the other side of the boundary
|
||||
STParticles::WarpParticle(_adjacency, edge, p, dp);
|
||||
}
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
|
||||
// resolve particle positions into patch handles
|
||||
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
|
||||
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
|
||||
if (handle) {
|
||||
int index = g_tbbCounter.fetch_and_add(1);
|
||||
_patchCoords[index] =
|
||||
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t);
|
||||
}
|
||||
}
|
||||
}
|
||||
private:
|
||||
float _speed;
|
||||
STParticles::Position *_positions;
|
||||
float *_velocities;
|
||||
std::vector<STParticles::FaceInfo> const &_adjacency;
|
||||
OpenSubdiv::Osd::PatchCoord *_patchCoords;
|
||||
OpenSubdiv::Far::PatchMap const *_patchMap;
|
||||
};
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
|
||||
STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered) :
|
||||
STParticles::STParticles(Refiner const & refiner,
|
||||
PatchTable const *patchTable,
|
||||
int nParticles, bool centered) :
|
||||
_speed(1.0f) {
|
||||
|
||||
OpenSubdiv::Far::PtexIndices ptexIndices(refiner);
|
||||
|
||||
int nptexfaces = ptexIndices.GetNumFaces(),
|
||||
nsamples = nptexfaces * nparticles;
|
||||
// Create a far patch map
|
||||
_patchMap = new OpenSubdiv::Far::PatchMap(*patchTable);
|
||||
|
||||
int nPtexFaces = ptexIndices.GetNumFaces();
|
||||
|
||||
srand(static_cast<int>(2147483647));
|
||||
|
||||
{ // initialize positions
|
||||
|
||||
_positions.resize(nsamples);
|
||||
_positions.resize(nParticles);
|
||||
Position * pos = &_positions[0];
|
||||
|
||||
for (int i=0; i<nptexfaces; ++i) {
|
||||
for (int j=0; j<nparticles; ++j) {
|
||||
pos->ptexIndex = i;
|
||||
pos->s = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
|
||||
pos->t = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
|
||||
++pos;
|
||||
}
|
||||
for (int i = 0; i < nParticles; ++i) {
|
||||
pos->ptexIndex = (int)(((float)rand()/(float)RAND_MAX) * nPtexFaces);
|
||||
pos->s = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
|
||||
pos->t = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
{ // initialize velocities
|
||||
_velocities.resize(nsamples*2);
|
||||
_velocities.resize(nParticles * 2);
|
||||
|
||||
for (int i=0; i<nsamples; ++i) {
|
||||
for (int i = 0; i < nParticles; ++i) {
|
||||
// initialize normalized random directions
|
||||
float s = 2.0f*(float)rand()/(float)RAND_MAX - 1.0f,
|
||||
t = 2.0f*(float)rand()/(float)RAND_MAX - 1.0f,
|
||||
@ -68,7 +134,7 @@ STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered)
|
||||
}
|
||||
|
||||
{ // initialize topology adjacency
|
||||
_adjacency.resize(nptexfaces);
|
||||
_adjacency.resize(nPtexFaces);
|
||||
|
||||
OpenSubdiv::Far::TopologyLevel const & refBaseLevel = refiner.GetLevel(0);
|
||||
|
||||
@ -97,42 +163,6 @@ STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered)
|
||||
//std::cout << *this;
|
||||
}
|
||||
|
||||
void
|
||||
STParticles::Update(float deltaTime) {
|
||||
|
||||
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
|
||||
|
||||
Position * p = &_positions[0];
|
||||
float * dp = &_velocities[0];
|
||||
|
||||
for (int i=0; i<GetNumParticles(); ++i, ++p, dp+=2) {
|
||||
|
||||
// apply velocity
|
||||
p->s += dp[0] * speed;
|
||||
p->t += dp[1] * speed;
|
||||
|
||||
// make sure particles can't skip more than 1 face boundary at a time
|
||||
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
|
||||
|
||||
// check if the particle is jumping a boundary
|
||||
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
|
||||
// this is not treated here.
|
||||
int edge = -1;
|
||||
|
||||
if (p->s >= 1.0f) edge = 1;
|
||||
if (p->s <= 0.0f) edge = 3;
|
||||
if (p->t >= 1.0f) edge = 2;
|
||||
if (p->t <= 0.0f) edge = 0;
|
||||
|
||||
if (edge>=0) {
|
||||
// warp the particle to the other side of the boundary
|
||||
warpParticle(edge, p, dp);
|
||||
}
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
inline void
|
||||
FlipS(STParticles::Position * p, float * dp) {
|
||||
p->s = 1.0f-p->s;
|
||||
@ -163,6 +193,15 @@ Rotate(int rot, STParticles::Position * p, float * dp) {
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
inline void
|
||||
Trim(STParticles::Position * p) {
|
||||
if (p->s <0.0f) p->s = 1.0f + p->s;
|
||||
if (p->s>=1.0f) p->s = p->s - 1.0f;
|
||||
if (p->t <0.0f) p->t = 1.0f + p->t;
|
||||
if (p->t>=1.0f) p->t = p->t - 1.0f;
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
inline void
|
||||
Clamp(STParticles::Position * p) {
|
||||
if (p->s<0.0f) {
|
||||
@ -192,21 +231,12 @@ Bounce(int edge, STParticles::Position * p, float * dp) {
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
inline void
|
||||
Trim(STParticles::Position * p) {
|
||||
if (p->s <0.0f) p->s = 1.0f + p->s;
|
||||
if (p->s>=1.0f) p->s = p->s - 1.0f;
|
||||
if (p->t <0.0f) p->t = 1.0f + p->t;
|
||||
if (p->t>=1.0f) p->t = p->t - 1.0f;
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
void
|
||||
STParticles::warpParticle(int edge, Position * p, float * dp) {
|
||||
|
||||
assert(p->ptexIndex<(int)_adjacency.size() and (edge>=0 and edge<4));
|
||||
STParticles::WarpParticle(std::vector<FaceInfo> const &adjacency,
|
||||
int edge, Position * p, float * dp) {
|
||||
assert(p->ptexIndex<(int)adjacency.size() and (edge>=0 and edge<4));
|
||||
|
||||
FaceInfo const & f = _adjacency[p->ptexIndex];
|
||||
FaceInfo const & f = adjacency[p->ptexIndex];
|
||||
|
||||
int afid = f.adjface(edge),
|
||||
aeid = f.adjedge(edge);
|
||||
@ -215,7 +245,7 @@ STParticles::warpParticle(int edge, Position * p, float * dp) {
|
||||
// boundary detected: bounce the particle
|
||||
Bounce(edge, p, dp);
|
||||
} else {
|
||||
FaceInfo const & af = _adjacency[afid];
|
||||
FaceInfo const & af = adjacency[afid];
|
||||
int rot = edge - aeid + 2;
|
||||
|
||||
bool fIsSubface = f.isSubface(),
|
||||
@ -233,6 +263,66 @@ STParticles::warpParticle(int edge, Position * p, float * dp) {
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
}
|
||||
|
||||
STParticles::~STParticles() {
|
||||
delete _patchMap;
|
||||
}
|
||||
|
||||
void
|
||||
STParticles::Update(float deltaTime) {
|
||||
|
||||
if (fabs(GetSpeed()) < 0.001f) return;
|
||||
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
|
||||
|
||||
_patchCoords.clear();
|
||||
|
||||
// XXX: this process should be parallelized.
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
|
||||
_patchCoords.resize((int)GetNumParticles());
|
||||
TbbUpdateKernel kernel(speed, &_positions[0], &_velocities[0],
|
||||
_adjacency, &_patchCoords[0], _patchMap);;
|
||||
g_tbbCounter = 0;
|
||||
tbb::blocked_range<int> range(0, GetNumParticles(), 256);
|
||||
tbb::parallel_for(range, kernel);
|
||||
_patchCoords.resize(g_tbbCounter);
|
||||
#else
|
||||
Position * p = &_positions[0];
|
||||
float * dp = &_velocities[0];
|
||||
for (int i=0; i<GetNumParticles(); ++i, ++p, dp+=2) {
|
||||
// apply velocity
|
||||
p->s += dp[0] * speed;
|
||||
p->t += dp[1] * speed;
|
||||
|
||||
// make sure particles can't skip more than 1 face boundary at a time
|
||||
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
|
||||
|
||||
// check if the particle is jumping a boundary
|
||||
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
|
||||
// this is not treated here.
|
||||
int edge = -1;
|
||||
|
||||
if (p->s >= 1.0f) edge = 1;
|
||||
if (p->s <= 0.0f) edge = 3;
|
||||
if (p->t >= 1.0f) edge = 2;
|
||||
if (p->t <= 0.0f) edge = 0;
|
||||
|
||||
if (edge>=0) {
|
||||
// warp the particle to the other side of the boundary
|
||||
WarpParticle(_adjacency, edge, p, dp);
|
||||
}
|
||||
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
|
||||
|
||||
// resolve particle positions into patch handles
|
||||
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
|
||||
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
|
||||
if (handle) {
|
||||
_patchCoords.push_back(
|
||||
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Dump adjacency info
|
||||
std::ostream & operator << (std::ostream & os,
|
||||
STParticles::FaceInfo const & f) {
|
||||
|
@ -26,7 +26,8 @@
|
||||
#define ST_PARTICLES_H
|
||||
|
||||
#include <far/topologyRefiner.h>
|
||||
|
||||
#include <far/patchMap.h>
|
||||
#include <osd/types.h>
|
||||
#include <iostream>
|
||||
|
||||
//
|
||||
@ -72,47 +73,6 @@ public:
|
||||
float s, t; ///< parametric location on face
|
||||
};
|
||||
|
||||
typedef OpenSubdiv::Far::TopologyRefiner Refiner;
|
||||
|
||||
STParticles(Refiner const & refiner, int nparticles, bool centered=false);
|
||||
|
||||
void Update(float deltaTime);
|
||||
|
||||
int GetNumParticles() const {
|
||||
return (int)_positions.size();
|
||||
}
|
||||
|
||||
void SetSpeed(float speed) {
|
||||
_speed = std::max(-1.0f, std::min(1.0f, speed));
|
||||
}
|
||||
|
||||
float GetSpeed() const {
|
||||
return _speed;
|
||||
}
|
||||
|
||||
std::vector<Position> & GetPositions() {
|
||||
return _positions;
|
||||
}
|
||||
|
||||
std::vector<float> & GetVelocities() {
|
||||
return _velocities;
|
||||
}
|
||||
|
||||
friend std::ostream & operator << (std::ostream & os, STParticles const & f);
|
||||
|
||||
private:
|
||||
|
||||
//
|
||||
// Particle "Dynamics"
|
||||
//
|
||||
std::vector<Position> _positions;
|
||||
|
||||
std::vector<float> _velocities;
|
||||
|
||||
float _speed; // velocity multiplier
|
||||
|
||||
private:
|
||||
|
||||
//
|
||||
// Topology adjacency (borrowed from Ptexture.h)
|
||||
//
|
||||
@ -152,11 +112,63 @@ private:
|
||||
int adjfaces[4];
|
||||
};
|
||||
|
||||
void warpParticle(int edge, Position * p, float * dp);
|
||||
typedef OpenSubdiv::Far::TopologyRefiner Refiner;
|
||||
typedef OpenSubdiv::Far::PatchTable PatchTable;
|
||||
|
||||
STParticles(Refiner const & refiner, PatchTable const *patchTable,
|
||||
int nparticles, bool centered=false);
|
||||
|
||||
~STParticles();
|
||||
|
||||
void Update(float deltaTime);
|
||||
|
||||
int GetNumParticles() const {
|
||||
return (int)_positions.size();
|
||||
}
|
||||
|
||||
void SetSpeed(float speed) {
|
||||
_speed = std::max(-1.0f, std::min(1.0f, speed));
|
||||
}
|
||||
|
||||
float GetSpeed() const {
|
||||
return _speed;
|
||||
}
|
||||
|
||||
std::vector<Position> & GetPositions() {
|
||||
return _positions;
|
||||
}
|
||||
|
||||
std::vector<float> & GetVelocities() {
|
||||
return _velocities;
|
||||
}
|
||||
|
||||
std::vector<OpenSubdiv::Osd::PatchCoord> GetPatchCoords() const {
|
||||
return _patchCoords;
|
||||
}
|
||||
|
||||
friend std::ostream & operator << (std::ostream & os, STParticles const & f);
|
||||
|
||||
static void WarpParticle(std::vector<FaceInfo> const &adjacency,
|
||||
int edge, Position * p, float * dp);
|
||||
|
||||
private:
|
||||
|
||||
//
|
||||
// Particle "Dynamics"
|
||||
//
|
||||
std::vector<Position> _positions;
|
||||
|
||||
std::vector<float> _velocities;
|
||||
|
||||
std::vector<OpenSubdiv::Osd::PatchCoord> _patchCoords;
|
||||
|
||||
float _speed; // velocity multiplier
|
||||
|
||||
friend std::ostream & operator << (std::ostream & os, FaceInfo const & f);
|
||||
|
||||
|
||||
std::vector<FaceInfo> _adjacency;
|
||||
OpenSubdiv::Far::PatchMap const *_patchMap;
|
||||
};
|
||||
|
||||
#endif // ST_PARTICLES_H
|
||||
|
@ -786,7 +786,7 @@ bindTextures() {
|
||||
}
|
||||
|
||||
static GLenum
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
|
||||
@ -868,7 +868,7 @@ display() {
|
||||
|
||||
glBindVertexArray(g_vao);
|
||||
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
if (g_displayStyle == kWire)
|
||||
@ -879,7 +879,7 @@ display() {
|
||||
|
||||
// patch drawing
|
||||
for (int i = 0; i < (int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
|
||||
GLenum primType = bindProgram(GetEffect(), patch);
|
||||
|
||||
@ -909,7 +909,7 @@ display() {
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
|
||||
|
||||
for (int i = 0; i < (int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
|
||||
GLenum primType = bindProgram(GetEffect(/*uvDraw=*/ true), patch);
|
||||
|
||||
|
4
examples/glImaging/glImaging.cpp
Executable file → Normal file
4
examples/glImaging/glImaging.cpp
Executable file → Normal file
@ -422,11 +422,11 @@ void runTest(ShapeDesc const &shapeDesc, std::string const &kernel,
|
||||
mesh->GetPatchTable()->GetPatchParamTextureBuffer());
|
||||
}
|
||||
|
||||
Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
Osd::PatchArrayVector const & patches =
|
||||
mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
for (int i=0; i<(int)patches.size(); ++i) {
|
||||
Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
Osd::PatchArray const & patch = patches[i];
|
||||
Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
Far::PatchDescriptor::Type patchType = desc.GetType();
|
||||
|
||||
|
@ -597,7 +597,7 @@ static void bindTextures(Effect effect) {
|
||||
}
|
||||
|
||||
static GLuint
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
|
||||
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
|
||||
@ -670,12 +670,12 @@ display() {
|
||||
|
||||
glBindVertexArray(g_vao);
|
||||
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
// patch drawing
|
||||
for (int i=0; i<(int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
|
||||
GLenum primType = GL_PATCHES;
|
||||
@ -807,13 +807,13 @@ drawStroke(int x, int y) {
|
||||
effect.paint = 1;
|
||||
bindTextures(effect);
|
||||
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
// patch drawing
|
||||
for (int i=0; i<(int)patches.size(); ++i) {
|
||||
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
|
||||
GLenum primType = GL_PATCHES;
|
||||
|
@ -1249,7 +1249,7 @@ bindTextures() {
|
||||
//------------------------------------------------------------------------------
|
||||
static GLenum
|
||||
bindProgram(Effect effect,
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
|
||||
OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
|
||||
GLDrawConfig *config = g_shaderCache.GetDrawConfig(effectDesc);
|
||||
@ -1296,10 +1296,10 @@ drawModel() {
|
||||
glBindVertexArray(g_vao);
|
||||
|
||||
// patch drawing
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
for (int i = 0; i < (int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
|
||||
Effect effect;
|
||||
effect.value = 0;
|
||||
|
@ -1181,7 +1181,7 @@ bindTextures() {
|
||||
|
||||
static GLenum
|
||||
bindProgram(Effect effect,
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
|
||||
OpenSubdiv::Osd::PatchArray const & patch) {
|
||||
EffectDesc effectDesc(patch.GetDescriptor(), effect);
|
||||
|
||||
// only legacy gregory needs maxValence and numElements
|
||||
@ -1299,7 +1299,7 @@ display() {
|
||||
|
||||
glBindVertexArray(g_vao);
|
||||
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
|
||||
OpenSubdiv::Osd::PatchArrayVector const & patches =
|
||||
g_mesh->GetPatchTable()->GetPatchArrays();
|
||||
|
||||
// patch drawing
|
||||
@ -1316,7 +1316,7 @@ display() {
|
||||
|
||||
// core draw-calls
|
||||
for (int i=0; i<(int)patches.size(); ++i) {
|
||||
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
|
||||
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
|
||||
|
||||
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
|
||||
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();
|
||||
|
@ -29,6 +29,7 @@
|
||||
set(CPU_SOURCE_FILES
|
||||
cpuEvaluator.cpp
|
||||
cpuKernel.cpp
|
||||
cpuPatchTable.cpp
|
||||
cpuVertexBuffer.cpp
|
||||
)
|
||||
|
||||
@ -43,10 +44,12 @@ set(PRIVATE_HEADER_FILES
|
||||
|
||||
set(PUBLIC_HEADER_FILES
|
||||
cpuEvaluator.h
|
||||
cpuPatchTable.h
|
||||
cpuVertexBuffer.h
|
||||
mesh.h
|
||||
nonCopyable.h
|
||||
opengl.h
|
||||
types.h
|
||||
vertexDescriptor.h
|
||||
)
|
||||
|
||||
@ -216,6 +219,7 @@ list(APPEND DOXY_HEADER_FILES ${DXSDK_PUBLIC_HEADERS})
|
||||
# OpenCL code & dependencies
|
||||
set(OPENCL_PUBLIC_HEADERS
|
||||
clEvaluator.h
|
||||
clPatchTable.h
|
||||
clVertexBuffer.h
|
||||
opencl.h
|
||||
)
|
||||
@ -223,6 +227,7 @@ set(OPENCL_PUBLIC_HEADERS
|
||||
if ( OPENCL_FOUND )
|
||||
list(APPEND GPU_SOURCE_FILES
|
||||
clEvaluator.cpp
|
||||
clPatchTable.cpp
|
||||
clVertexBuffer.cpp
|
||||
)
|
||||
list(APPEND PUBLIC_HEADER_FILES ${OPENCL_PUBLIC_HEADERS})
|
||||
@ -254,12 +259,14 @@ list(APPEND DOXY_HEADER_FILES ${OPENCL_PUBLIC_HEADERS})
|
||||
# CUDA code & dependencies
|
||||
set(CUDA_PUBLIC_HEADERS
|
||||
cudaEvaluator.h
|
||||
cudaPatchTable.h
|
||||
cudaVertexBuffer.h
|
||||
)
|
||||
|
||||
if( CUDA_FOUND )
|
||||
list(APPEND GPU_SOURCE_FILES
|
||||
cudaEvaluator.cpp
|
||||
cudaPatchTable.cpp
|
||||
cudaVertexBuffer.cpp
|
||||
)
|
||||
list(APPEND PUBLIC_HEADER_FILES ${CUDA_PUBLIC_HEADERS})
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
|
||||
#include "../osd/opencl.h"
|
||||
#include "../far/error.h"
|
||||
@ -87,11 +88,12 @@ CLStencilTable::~CLStencilTable() {
|
||||
|
||||
CLEvaluator::CLEvaluator(cl_context context, cl_command_queue queue)
|
||||
: _clContext(context), _clCommandQueue(queue),
|
||||
_program(NULL), _stencilsKernel(NULL) {
|
||||
_program(NULL), _stencilKernel(NULL), _patchKernel(NULL) {
|
||||
}
|
||||
|
||||
CLEvaluator::~CLEvaluator() {
|
||||
if (_stencilsKernel) clReleaseKernel(_stencilsKernel);
|
||||
if (_stencilKernel) clReleaseKernel(_stencilKernel);
|
||||
if (_patchKernel) clReleaseKernel(_patchKernel);
|
||||
if (_program) clReleaseProgram(_program);
|
||||
}
|
||||
|
||||
@ -145,7 +147,13 @@ CLEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
return false;
|
||||
}
|
||||
|
||||
_stencilsKernel = clCreateKernel(_program, "computeStencils", &errNum);
|
||||
_stencilKernel = clCreateKernel(_program, "computeStencils", &errNum);
|
||||
if (errNum != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
_patchKernel = clCreateKernel(_program, "computePatches", &errNum);
|
||||
|
||||
if (errNum != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
|
||||
@ -169,24 +177,24 @@ CLEvaluator::EvalStencils(cl_mem src,
|
||||
|
||||
size_t globalWorkSize = (size_t)(end - start);
|
||||
|
||||
clSetKernelArg(_stencilsKernel, 0, sizeof(cl_mem), &src);
|
||||
clSetKernelArg(_stencilsKernel, 1, sizeof(int), &srcDesc.offset);
|
||||
clSetKernelArg(_stencilsKernel, 2, sizeof(cl_mem), &dst);
|
||||
clSetKernelArg(_stencilsKernel, 3, sizeof(int), &dstDesc.offset);
|
||||
clSetKernelArg(_stencilsKernel, 4, sizeof(cl_mem), &sizes);
|
||||
clSetKernelArg(_stencilsKernel, 5, sizeof(cl_mem), &offsets);
|
||||
clSetKernelArg(_stencilsKernel, 6, sizeof(cl_mem), &indices);
|
||||
clSetKernelArg(_stencilsKernel, 7, sizeof(cl_mem), &weights);
|
||||
clSetKernelArg(_stencilsKernel, 8, sizeof(int), &start);
|
||||
clSetKernelArg(_stencilsKernel, 9, sizeof(int), &end);
|
||||
clSetKernelArg(_stencilKernel, 0, sizeof(cl_mem), &src);
|
||||
clSetKernelArg(_stencilKernel, 1, sizeof(int), &srcDesc.offset);
|
||||
clSetKernelArg(_stencilKernel, 2, sizeof(cl_mem), &dst);
|
||||
clSetKernelArg(_stencilKernel, 3, sizeof(int), &dstDesc.offset);
|
||||
clSetKernelArg(_stencilKernel, 4, sizeof(cl_mem), &sizes);
|
||||
clSetKernelArg(_stencilKernel, 5, sizeof(cl_mem), &offsets);
|
||||
clSetKernelArg(_stencilKernel, 6, sizeof(cl_mem), &indices);
|
||||
clSetKernelArg(_stencilKernel, 7, sizeof(cl_mem), &weights);
|
||||
clSetKernelArg(_stencilKernel, 8, sizeof(int), &start);
|
||||
clSetKernelArg(_stencilKernel, 9, sizeof(int), &end);
|
||||
|
||||
cl_int errNum = clEnqueueNDRangeKernel(
|
||||
_clCommandQueue, _stencilsKernel, 1, NULL,
|
||||
_clCommandQueue, _stencilKernel, 1, NULL,
|
||||
&globalWorkSize, NULL, 0, NULL, NULL);
|
||||
|
||||
if (errNum != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR,
|
||||
"ApplyStencilTableKernel (%d) ", errNum);
|
||||
"ApplyStencilKernel (%d) ", errNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -194,6 +202,50 @@ CLEvaluator::EvalStencils(cl_mem src,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CLEvaluator::EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc,
|
||||
cl_mem dst, VertexBufferDescriptor const &dstDesc,
|
||||
cl_mem du, VertexBufferDescriptor const &duDesc,
|
||||
cl_mem dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
cl_mem patchCoordsBuffer,
|
||||
cl_mem patchArrayBuffer,
|
||||
cl_mem patchIndexBuffer,
|
||||
cl_mem patchParamBuffer) const {
|
||||
|
||||
size_t globalWorkSize = (size_t)(numPatchCoords);
|
||||
|
||||
clSetKernelArg(_patchKernel, 0, sizeof(cl_mem), &src);
|
||||
clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst);
|
||||
clSetKernelArg(_patchKernel, 3, sizeof(int), &dstDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 4, sizeof(cl_mem), &du);
|
||||
clSetKernelArg(_patchKernel, 5, sizeof(int), &duDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 6, sizeof(int), &duDesc.stride);
|
||||
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
|
||||
clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
|
||||
clSetKernelArg(_patchKernel, 10, sizeof(cl_mem), &patchCoordsBuffer);
|
||||
clSetKernelArg(_patchKernel, 11, sizeof(cl_mem), &patchArrayBuffer);
|
||||
clSetKernelArg(_patchKernel, 12, sizeof(cl_mem), &patchIndexBuffer);
|
||||
clSetKernelArg(_patchKernel, 13, sizeof(cl_mem), &patchParamBuffer);
|
||||
|
||||
cl_int errNum = clEnqueueNDRangeKernel(
|
||||
_clCommandQueue, _patchKernel, 1, NULL,
|
||||
&globalWorkSize, NULL, 0, NULL, NULL);
|
||||
|
||||
if (errNum != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR,
|
||||
"ApplyPatchKernel (%d) ", errNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
clFinish(_clCommandQueue);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* static */
|
||||
void
|
||||
CLEvaluator::Synchronize(cl_command_queue clCommandQueue) {
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "../version.h"
|
||||
|
||||
#include "../osd/opencl.h"
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -75,9 +76,6 @@ private:
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// \brief OpenCL stencil kernel
|
||||
///
|
||||
///
|
||||
class CLEvaluator {
|
||||
public:
|
||||
typedef bool Instantiatable;
|
||||
@ -107,6 +105,12 @@ public:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static compute function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
@ -124,7 +128,7 @@ public:
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied. The table must have
|
||||
/// OpenCL memory interfaces.
|
||||
/// SSBO interfaces.
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
@ -137,25 +141,25 @@ public:
|
||||
/// cl_command_queue GetCommandQueue()
|
||||
/// methods.
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE,
|
||||
typename DEVICE_CONTEXT>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
CLEvaluator const *instance,
|
||||
DEVICE_CONTEXT deviceContext) {
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename STENCIL_TABLE, typename DEVICE_CONTEXT>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
CLEvaluator const *instance,
|
||||
DEVICE_CONTEXT deviceContext) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
return instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
instance = Create(srcDesc, dstDesc, deviceContext);
|
||||
if (instance) {
|
||||
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
bool r = instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
delete instance;
|
||||
return r;
|
||||
@ -167,15 +171,14 @@ public:
|
||||
/// Generic compute function.
|
||||
/// Dispatch the CL compute kernel asynchronously.
|
||||
/// Returns false if the kernel hasn't been compiled yet.
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
return EvalStencils(srcVertexBuffer->BindCLBuffer(_clCommandQueue),
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindCLBuffer(_clCommandQueue),
|
||||
dstBuffer->BindCLBuffer(_clCommandQueue),
|
||||
dstDesc,
|
||||
stencilTable->GetSizesBuffer(),
|
||||
stencilTable->GetOffsetsBuffer(),
|
||||
@ -187,10 +190,8 @@ public:
|
||||
|
||||
/// Dispatch the CL compute kernel asynchronously.
|
||||
/// returns false if the kernel hasn't been compiled yet.
|
||||
bool EvalStencils(cl_mem src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
cl_mem dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
bool EvalStencils(cl_mem src, VertexBufferDescriptor const &srcDesc,
|
||||
cl_mem dst, VertexBufferDescriptor const &dstDesc,
|
||||
cl_mem sizes,
|
||||
cl_mem offsets,
|
||||
cl_mem indices,
|
||||
@ -198,6 +199,278 @@ public:
|
||||
int start,
|
||||
int end) const;
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCLBuffer() method returning an
|
||||
/// array of PatchCoord struct.
|
||||
///
|
||||
/// @param patchTable CLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext client providing context class which supports
|
||||
/// cL_context GetContext()
|
||||
/// cl_command_queue GetCommandQueue()
|
||||
/// methods.
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
|
||||
typename DEVICE_CONTEXT>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CLEvaluator const *instance,
|
||||
DEVICE_CONTEXT deviceContext) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc, deviceContext);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer
|
||||
///
|
||||
/// @param duDesc
|
||||
///
|
||||
/// @param dvBuffer
|
||||
///
|
||||
/// @param dvDesc
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCLBuffer() method returning an
|
||||
/// array of PatchCoord struct
|
||||
///
|
||||
/// @param patchTable CLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext client providing context class which supports
|
||||
/// cL_context GetContext()
|
||||
/// cl_command_queue GetCommandQueue()
|
||||
/// methods.
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
|
||||
typename DEVICE_CONTEXT>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CLEvaluator const *instance,
|
||||
DEVICE_CONTEXT deviceContext) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc, deviceContext);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCLBuffer() method returning an
|
||||
/// array of PatchCoord struct.
|
||||
///
|
||||
/// @param patchTable CLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
|
||||
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
|
||||
0, VertexBufferDescriptor(),
|
||||
0, VertexBufferDescriptor(),
|
||||
numPatchCoords,
|
||||
patchCoords->BindCLBuffer(_clCommandQueue),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data of Du
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindCLBuffer() method returning a CL
|
||||
/// buffer object of destination data of Dv
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
|
||||
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
|
||||
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
|
||||
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
|
||||
numPatchCoords,
|
||||
patchCoords->BindCLBuffer(_clCommandQueue),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
bool EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc,
|
||||
cl_mem dst, VertexBufferDescriptor const &dstDesc,
|
||||
cl_mem du, VertexBufferDescriptor const &duDesc,
|
||||
cl_mem dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
cl_mem patchCoordsBuffer,
|
||||
cl_mem patchArrayBuffer,
|
||||
cl_mem patchIndexBuffer,
|
||||
cl_mem patchParamsBuffer) const;
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// Configure OpenCL kernel.
|
||||
/// Returns false if it fails to compile the kernel.
|
||||
bool Compile(VertexBufferDescriptor const &srcDesc,
|
||||
@ -215,7 +488,8 @@ private:
|
||||
cl_context _clContext;
|
||||
cl_command_queue _clCommandQueue;
|
||||
cl_program _program;
|
||||
cl_kernel _stencilsKernel;
|
||||
cl_kernel _stencilKernel;
|
||||
cl_kernel _patchKernel;
|
||||
};
|
||||
|
||||
|
||||
|
@ -85,3 +85,176 @@ __kernel void computeStencils(__global float * src,
|
||||
|
||||
writeVertex(dst, current, &v);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
struct PatchArray {
|
||||
int patchType;
|
||||
int numPatches;
|
||||
int indexBase; // an offset within the index buffer
|
||||
int primitiveIdBase; // an offset within the patch param buffer
|
||||
};
|
||||
|
||||
struct PatchCoord {
|
||||
int arrayIndex;
|
||||
int patchIndex;
|
||||
int vertIndex;
|
||||
float s;
|
||||
float t;
|
||||
};
|
||||
|
||||
struct PatchParam {
|
||||
int faceIndex;
|
||||
uint patchBits;
|
||||
float sharpness;
|
||||
};
|
||||
|
||||
static void getBSplineWeights(float t, float *point, float *deriv) {
|
||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
||||
float one6th = 1.0f / 6.0f;
|
||||
|
||||
float t2 = t * t;
|
||||
float t3 = t * t2;
|
||||
|
||||
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
||||
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
||||
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
||||
point[3] = one6th * ( t3);
|
||||
|
||||
// Derivatives of the above four basis functions at t:
|
||||
deriv[0] = -0.5f*t2 + t - 0.5f;
|
||||
deriv[1] = 1.5f*t2 - 2.0f*t;
|
||||
deriv[2] = -1.5f*t2 + t + 0.5f;
|
||||
deriv[3] = 0.5f*t2;
|
||||
}
|
||||
|
||||
static void adjustBoundaryWeights(uint bits, float *sWeights, float *tWeights) {
|
||||
int boundary = ((bits >> 4) & 0xf);
|
||||
|
||||
if (boundary & 1) {
|
||||
tWeights[2] -= tWeights[0];
|
||||
tWeights[1] += 2*tWeights[0];
|
||||
tWeights[0] = 0;
|
||||
}
|
||||
if (boundary & 2) {
|
||||
sWeights[1] -= sWeights[3];
|
||||
sWeights[2] += 2*sWeights[3];
|
||||
sWeights[3] = 0;
|
||||
}
|
||||
if (boundary & 4) {
|
||||
tWeights[1] -= tWeights[3];
|
||||
tWeights[2] += 2*tWeights[3];
|
||||
tWeights[3] = 0;
|
||||
}
|
||||
if (boundary & 8) {
|
||||
sWeights[2] -= sWeights[0];
|
||||
sWeights[1] += 2*sWeights[0];
|
||||
sWeights[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int getDepth(uint patchBits) {
|
||||
return (patchBits & 0x7);
|
||||
}
|
||||
|
||||
static float getParamFraction(uint patchBits) {
|
||||
bool nonQuadRoot = (patchBits >> 3) & 0x1;
|
||||
int depth = getDepth(patchBits);
|
||||
if (nonQuadRoot) {
|
||||
return 1.0f / (float)( 1 << (depth-1) );
|
||||
} else {
|
||||
return 1.0f / (float)( 1 << depth );
|
||||
}
|
||||
}
|
||||
|
||||
static void normalizePatchCoord(uint patchBits, float *uv) {
|
||||
float frac = getParamFraction(patchBits);
|
||||
|
||||
int iu = (patchBits >> 22) & 0x3ff;
|
||||
int iv = (patchBits >> 12) & 0x3ff;
|
||||
|
||||
// top left corner
|
||||
float pu = (float)iu*frac;
|
||||
float pv = (float)iv*frac;
|
||||
|
||||
// normalize u,v coordinates
|
||||
uv[0] = (uv[0] - pu) / frac;
|
||||
uv[1] = (uv[1] - pv) / frac;
|
||||
}
|
||||
|
||||
__kernel void computePatches(__global float *src, int srcOffset,
|
||||
__global float *dst, int dstOffset,
|
||||
__global float *du, int duOffset, int duStride,
|
||||
__global float *dv, int dvOffset, int dvStride,
|
||||
__global struct PatchCoord *patchCoords,
|
||||
__global struct PatchArray *patchArrayBuffer,
|
||||
__global int *patchIndexBuffer,
|
||||
__global struct PatchParam *patchParamBuffer) {
|
||||
int current = get_global_id(0);
|
||||
|
||||
if (src) src += srcOffset;
|
||||
if (dst) dst += dstOffset;
|
||||
if (du) du += duOffset;
|
||||
if (dv) dv += dvOffset;
|
||||
|
||||
struct PatchCoord coord = patchCoords[current];
|
||||
struct PatchArray array = patchArrayBuffer[coord.arrayIndex];
|
||||
|
||||
int patchType = 6; // array.patchType XXX: REGULAR only for now.
|
||||
int numControlVertices = 16;
|
||||
uint patchBits = patchParamBuffer[coord.patchIndex].patchBits;
|
||||
|
||||
float uv[2] = {coord.s, coord.t};
|
||||
normalizePatchCoord(patchBits, uv);
|
||||
float dScale = (float)(1 << getDepth(patchBits));
|
||||
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
if (patchType == 6) { // REGULAR
|
||||
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
|
||||
getBSplineWeights(uv[0], sWeights, dsWeights);
|
||||
getBSplineWeights(uv[1], tWeights, dtWeights);
|
||||
|
||||
adjustBoundaryWeights(patchBits, sWeights, tWeights);
|
||||
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
wP[4*k+l] = sWeights[l] * tWeights[k];
|
||||
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
|
||||
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// TODO: GREGORY BASIS
|
||||
}
|
||||
|
||||
int indexBase = array.indexBase + coord.vertIndex;
|
||||
|
||||
struct Vertex v;
|
||||
clear(&v);
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(&v, src, index, wP[i]);
|
||||
}
|
||||
writeVertex(dst, current, &v);
|
||||
|
||||
if (du) {
|
||||
struct Vertex vdu;
|
||||
clear(&vdu);
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(&vdu, src, index, wDs[i]);
|
||||
}
|
||||
writeVertex(du, current, &vdu);
|
||||
}
|
||||
if (dv) {
|
||||
struct Vertex vdv;
|
||||
clear(&vdv);
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(&vdv, src, index, wDt[i]);
|
||||
}
|
||||
writeVertex(dv, current, &vdv);
|
||||
}
|
||||
|
||||
}
|
||||
|
102
opensubdiv/osd/clPatchTable.cpp
Normal file
102
opensubdiv/osd/clPatchTable.cpp
Normal file
@ -0,0 +1,102 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#include "../osd/clPatchTable.h"
|
||||
|
||||
#include "../far/error.h"
|
||||
#include "../far/patchTable.h"
|
||||
#include "../osd/opencl.h"
|
||||
#include "../osd/cpuPatchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
CLPatchTable::CLPatchTable() :
|
||||
_patchArrays(NULL), _indexBuffer(NULL), _patchParamBuffer(NULL) {
|
||||
}
|
||||
|
||||
CLPatchTable::~CLPatchTable() {
|
||||
if (_patchArrays) clReleaseMemObject(_patchArrays);
|
||||
if (_indexBuffer) clReleaseMemObject(_indexBuffer);
|
||||
if (_patchParamBuffer) clReleaseMemObject(_patchParamBuffer);
|
||||
}
|
||||
|
||||
CLPatchTable *
|
||||
CLPatchTable::Create(Far::PatchTable const *farPatchTable,
|
||||
cl_context clContext) {
|
||||
CLPatchTable *instance = new CLPatchTable();
|
||||
if (instance->allocate(farPatchTable, clContext)) return instance;
|
||||
delete instance;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
CLPatchTable::allocate(Far::PatchTable const *farPatchTable, cl_context clContext) {
|
||||
CpuPatchTable patchTable(farPatchTable);
|
||||
|
||||
size_t numPatchArrays = patchTable.GetNumPatchArrays();
|
||||
size_t indexSize = patchTable.GetPatchIndexSize();
|
||||
size_t patchParamSize = patchTable.GetPatchParamSize();
|
||||
|
||||
cl_int err = 0;
|
||||
_patchArrays = clCreateBuffer(clContext,
|
||||
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
|
||||
numPatchArrays * sizeof(Osd::PatchArray),
|
||||
(void*)patchTable.GetPatchArrayBuffer(),
|
||||
&err);
|
||||
if (err != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
|
||||
return false;
|
||||
}
|
||||
|
||||
_indexBuffer = clCreateBuffer(clContext,
|
||||
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
|
||||
indexSize * sizeof(int),
|
||||
(void*)patchTable.GetPatchIndexBuffer(),
|
||||
&err);
|
||||
if (err != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
|
||||
return false;
|
||||
}
|
||||
|
||||
_patchParamBuffer = clCreateBuffer(clContext,
|
||||
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
|
||||
patchParamSize * sizeof(Osd::PatchParam),
|
||||
(void*)patchTable.GetPatchParamBuffer(),
|
||||
&err);
|
||||
if (err != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
} // end namespace OpenSubdiv
|
||||
|
91
opensubdiv/osd/clPatchTable.h
Normal file
91
opensubdiv/osd/clPatchTable.h
Normal file
@ -0,0 +1,91 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OPENSUBDIV3_OSD_CL_PATCH_TABLE_H
|
||||
#define OPENSUBDIV3_OSD_CL_PATCH_TABLE_H
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include "../osd/opencl.h"
|
||||
#include "../osd/nonCopyable.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Far{
|
||||
class PatchTable;
|
||||
};
|
||||
|
||||
namespace Osd {
|
||||
|
||||
/// \brief CL patch table
|
||||
///
|
||||
/// This class is a CL buffer representation of Far::PatchTable.
|
||||
///
|
||||
/// CLEvaluator consumes this table to evaluate on the patches.
|
||||
///
|
||||
///
|
||||
class CLPatchTable : private NonCopyable<CLPatchTable> {
|
||||
public:
|
||||
/// Creator. Returns NULL if error
|
||||
static CLPatchTable *Create(Far::PatchTable const *patchTable,
|
||||
cl_context clContext);
|
||||
|
||||
template <typename DEVICE_CONTEXT>
|
||||
static CLPatchTable * Create(Far::PatchTable const *patchTable,
|
||||
DEVICE_CONTEXT context) {
|
||||
return Create(patchTable, context->GetContext());
|
||||
}
|
||||
|
||||
/// Destructor
|
||||
~CLPatchTable();
|
||||
|
||||
/// Returns the CL memory of the array of Osd::PatchArray buffer
|
||||
cl_mem GetPatchArrayBuffer() const { return _patchArrays; }
|
||||
|
||||
/// Returns the CL memory of the patch control vertices
|
||||
cl_mem GetPatchIndexBuffer() const { return _indexBuffer; }
|
||||
|
||||
/// Returns the CL memory of the array of Osd::PatchParam buffer
|
||||
cl_mem GetPatchParamBuffer() const { return _patchParamBuffer; }
|
||||
|
||||
protected:
|
||||
CLPatchTable();
|
||||
|
||||
bool allocate(Far::PatchTable const *patchTable, cl_context clContext);
|
||||
|
||||
cl_mem _patchArrays;
|
||||
cl_mem _indexBuffer;
|
||||
cl_mem _patchParamBuffer;
|
||||
};
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
} // end namespace OpenSubdiv
|
||||
|
||||
#endif // OPENSUBDIV3_OSD_CL_PATCH_TABLE_H
|
@ -45,6 +45,12 @@ public:
|
||||
/// Creator. Returns NULL if error.
|
||||
static CLVertexBuffer * Create(int numElements, int numVertices, cl_context clContext);
|
||||
|
||||
template <typename DEVICE_CONTEXT>
|
||||
static CLVertexBuffer * Create(int numElements, int numVertices,
|
||||
DEVICE_CONTEXT context) {
|
||||
return Create(numElements, numVertices, context->GetContext());
|
||||
}
|
||||
|
||||
/// Destructor.
|
||||
~CLVertexBuffer();
|
||||
|
||||
@ -52,6 +58,12 @@ public:
|
||||
/// vertices data to Osd.
|
||||
void UpdateData(const float *src, int startVertex, int numVertices, cl_command_queue clQueue);
|
||||
|
||||
template<typename DEVICE_CONTEXT>
|
||||
void UpdateData(const float *src, int startVertex, int numVertices,
|
||||
DEVICE_CONTEXT context) {
|
||||
UpdateData(src, startVertex, numVertices, context->GetCommandQueue());
|
||||
}
|
||||
|
||||
/// Returns how many elements defined in this vertex buffer.
|
||||
int GetNumElements() const;
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include "../osd/cpuEvaluator.h"
|
||||
#include "../osd/cpuKernel.h"
|
||||
#include "../far/patchBasis.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
@ -34,15 +35,15 @@ namespace Osd {
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CpuEvaluator::EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
CpuEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
@ -55,30 +56,28 @@ CpuEvaluator::EvalStencils(const float *src,
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CpuEvaluator::EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDs,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
float *dstDt,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end) {
|
||||
CpuEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
if (srcDesc.length != dstDsDesc.length) return false;
|
||||
if (srcDesc.length != dstDtDesc.length) return false;
|
||||
if (srcDesc.length != duDesc.length) return false;
|
||||
if (srcDesc.length != dvDesc.length) return false;
|
||||
|
||||
CpuEvalStencils(src, srcDesc,
|
||||
dst, dstDesc,
|
||||
dstDs, dstDsDesc,
|
||||
dstDt, dstDtDesc,
|
||||
du, duDesc,
|
||||
dv, dvDesc,
|
||||
sizes, offsets, indices,
|
||||
weights, duWeights, dvWeights,
|
||||
start, end);
|
||||
@ -123,10 +122,13 @@ CpuEvaluator::EvalPatches(const float *src,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable) {
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer){
|
||||
src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
else return false;
|
||||
|
||||
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
|
||||
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
|
||||
@ -134,14 +136,38 @@ CpuEvaluator::EvalPatches(const float *src,
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
PatchCoord const &coords = patchCoords[i];
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
|
||||
|
||||
patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt);
|
||||
int patchType = array.GetPatchType();
|
||||
// XXX: patchIndex is absolute. not sure it's consistent.
|
||||
// (should be offsetted by array.primitiveIdBase?)
|
||||
// patchParamBuffer[array.primitiveIdBase + coord.handle.patchIndex]
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle);
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
return false;
|
||||
}
|
||||
const int *cvs =
|
||||
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
for (int j = 0; j < cvs.size(); ++j) {
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
}
|
||||
++dstT;
|
||||
@ -151,47 +177,67 @@ CpuEvaluator::EvalPatches(const float *src,
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CpuEvaluator::EvalPatches(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDs,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
float *dstDt,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable) {
|
||||
CpuEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer) {
|
||||
|
||||
src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (dstDs) dstDs += dstDsDesc.offset;
|
||||
if (dstDt) dstDt += dstDtDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
|
||||
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
|
||||
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
|
||||
BufferAdapter<float> dstDsT(dstDs, dstDsDesc.length, dstDsDesc.stride);
|
||||
BufferAdapter<float> dstDtT(dstDt, dstDtDesc.length, dstDtDesc.stride);
|
||||
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
|
||||
BufferAdapter<float> duT (du, duDesc.length, duDesc.stride);
|
||||
BufferAdapter<float> dvT (dv, dvDesc.length, dvDesc.stride);
|
||||
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
PatchCoord const &coords = patchCoords[i];
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
|
||||
|
||||
patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt);
|
||||
int patchType = array.GetPatchType();
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle);
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
const int *cvs =
|
||||
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
dstDsT.Clear();
|
||||
dstDtT.Clear();
|
||||
for (int j = 0; j < cvs.size(); ++j) {
|
||||
duT.Clear();
|
||||
dvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
dstDsT.AddWithWeight(srcT[cvs[j]], wDs[j]);
|
||||
dstDtT.AddWithWeight(srcT[cvs[j]], wDt[j]);
|
||||
duT.AddWithWeight (srcT[cvs[j]], wDs[j]);
|
||||
dvT.AddWithWeight (srcT[cvs[j]], wDt[j]);
|
||||
}
|
||||
++dstT;
|
||||
++dstDsT;
|
||||
++dstDtT;
|
||||
++duT;
|
||||
++dvT;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -29,33 +29,14 @@
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
#include "../far/patchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
/// \brief Coordinates set on a patch table
|
||||
/// XXX: this is a temporary structure, exists during Osd refactoring work.
|
||||
///
|
||||
struct PatchCoord {
|
||||
/// \brief Constructor
|
||||
///
|
||||
/// @param p patch handle
|
||||
///
|
||||
/// @param s parametric location on the patch
|
||||
///
|
||||
/// @param t parametric location on the patch
|
||||
///
|
||||
PatchCoord(Far::PatchTable::PatchHandle handle, float s, float t) :
|
||||
handle(handle), s(s), t(t) { }
|
||||
|
||||
Far::PatchTable::PatchHandle handle; ///< patch handle
|
||||
float s, t; ///< parametric location on patch
|
||||
};
|
||||
|
||||
class CpuEvaluator {
|
||||
public:
|
||||
/// ----------------------------------------------------------------------
|
||||
@ -80,7 +61,7 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
@ -89,20 +70,18 @@ public:
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(SRC_BUFFER *srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CpuEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CpuEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
@ -125,24 +104,27 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
/// to apply for the range [start, end)
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
static bool EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start,
|
||||
int end);
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
@ -161,19 +143,19 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDsBuffer Output s-derivative buffer
|
||||
/// @param duBuffer Output U-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDsDesc vertex buffer descriptor for the output buffer
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDtBuffer Output t-derivative buffer
|
||||
/// @param dvBuffer Output V-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDtDesc vertex buffer descriptor for the output buffer
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
@ -182,28 +164,22 @@ public:
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(SRC_BUFFER *srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *dstDsBuffer,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
DST_BUFFER *dstDtBuffer,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CpuEvaluator *evaluator = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)evaluator; // unused
|
||||
(void)deviceContext; // unused
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CpuEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
dstDsBuffer->BindCpuBuffer(),
|
||||
dstDsDesc,
|
||||
dstDtBuffer->BindCpuBuffer(),
|
||||
dstDtDesc,
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
@ -228,40 +204,44 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDs Output s-derivatives pointer. An offset of
|
||||
/// dstDsDesc will be applied internally.
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param dstDsDesc vertex buffer descriptor for the output buffer
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDt Output t-derivatives pointer. An offset of
|
||||
/// dstDtDesc will be applied internally.
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dstDtDesc vertex buffer descriptor for the output buffer
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
static bool EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDs,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
float *dstDt,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start,
|
||||
int end);
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
@ -289,32 +269,35 @@ public:
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER>
|
||||
static bool EvalPatches(SRC_BUFFER *srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable,
|
||||
CpuEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CpuEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
numPatchCoords,
|
||||
patchCoords,
|
||||
patchTable);
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
@ -333,56 +316,59 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDsBuffer Output s-derivatives buffer
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDsDesc vertex buffer descriptor for the dstDsBuffer
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dstDtBuffer Output t-derivatives buffer
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDtDesc vertex buffer descriptor for the dstDtBuffer
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER>
|
||||
static bool EvalPatches(SRC_BUFFER *srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *dstDsBuffer,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
DST_BUFFER *dstDtBuffer,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable,
|
||||
CpuEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CpuEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
dstDsBuffer->BindCpuBuffer(),
|
||||
dstDsDesc,
|
||||
dstDtBuffer->BindCpuBuffer(),
|
||||
dstDtDesc,
|
||||
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
|
||||
// ideally all buffer classes should have templated by datatype
|
||||
// so that downcast isn't needed there.
|
||||
// (e.g. Osd::CpuBuffer<PatchCoord> )
|
||||
//
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
patchCoords,
|
||||
patchTable);
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
@ -403,20 +389,23 @@ public:
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable on which primvars are evaluated
|
||||
/// for the patchCoords
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable);
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
@ -432,38 +421,45 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dstDs Output s-derivatives pointer. An offset of
|
||||
/// dstDsDesc will be applied internally.
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param dstDsDesc vertex buffer descriptor for the dstDs buffer
|
||||
/// @param duDesc vertex buffer descriptor for the du buffer
|
||||
///
|
||||
/// @param dstDt Output t-derivatives pointer. An offset of
|
||||
/// dstDtDesc will be applied internally.
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dstDtDesc vertex buffer descriptor for the dstDt buffer
|
||||
/// @param dvDesc vertex buffer descriptor for the dv buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable on which primvars are evaluated
|
||||
/// for the patchCoords
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDs,
|
||||
VertexBufferDescriptor const &dstDsDesc,
|
||||
float *dstDt,
|
||||
VertexBufferDescriptor const &dstDtDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
Far::PatchTable const *patchTable);
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief synchronize all asynchronous computation invoked on this device.
|
||||
static void Synchronize(void * /*deviceContext = NULL*/) {
|
||||
|
106
opensubdiv/osd/cpuPatchTable.cpp
Normal file
106
opensubdiv/osd/cpuPatchTable.cpp
Normal file
@ -0,0 +1,106 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#include "../osd/cpuPatchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
CpuPatchTable::CpuPatchTable(const Far::PatchTable *farPatchTable) {
|
||||
int nPatchArrays = farPatchTable->GetNumPatchArrays();
|
||||
|
||||
// count
|
||||
int numPatches = 0;
|
||||
int numIndices = 0;
|
||||
for (int j = 0; j < nPatchArrays; ++j) {
|
||||
int nPatch = farPatchTable->GetNumPatches(j);
|
||||
int nCV = farPatchTable->GetPatchArrayDescriptor(j).GetNumControlVertices();
|
||||
numPatches += nPatch;
|
||||
numIndices += nPatch * nCV;
|
||||
}
|
||||
_patchArrays.reserve(nPatchArrays);
|
||||
_indexBuffer.reserve(numIndices);
|
||||
_patchParamBuffer.reserve(numPatches);
|
||||
|
||||
// for each patchArray
|
||||
for (int j = 0; j < nPatchArrays; ++j) {
|
||||
PatchArray patchArray(farPatchTable->GetPatchArrayDescriptor(j),
|
||||
farPatchTable->GetNumPatches(j),
|
||||
(int)_indexBuffer.size(),
|
||||
(int)_patchParamBuffer.size());
|
||||
_patchArrays.push_back(patchArray);
|
||||
|
||||
// indices
|
||||
Far::ConstIndexArray indices = farPatchTable->GetPatchArrayVertices(j);
|
||||
for (int k = 0; k < indices.size(); ++k) {
|
||||
_indexBuffer.push_back(indices[k]);
|
||||
}
|
||||
|
||||
// patchParams bundling
|
||||
// XXX: this process won't be needed if Far::PatchParam includes
|
||||
// sharpness.
|
||||
#if 0
|
||||
// XXX: we need sharpness interface for patcharray or put sharpness
|
||||
// into patchParam.
|
||||
Far::ConstPatchParamArray patchParams =
|
||||
farPatchTable->GetPatchParams(j);
|
||||
for (int k = 0; k < patchParams.size(); ++k) {
|
||||
float sharpness = 0.0;
|
||||
_patchParamBuffer.push_back(patchParams[k].faceIndex);
|
||||
_patchParamBuffer.push_back(patchParams[k].bitField.field);
|
||||
_patchParamBuffer.push_back(*((unsigned int *)&sharpness));
|
||||
}
|
||||
#else
|
||||
// XXX: workaround. GetPatchParamTable() will be deprecated though.
|
||||
Far::PatchParamTable const & patchParamTable =
|
||||
farPatchTable->GetPatchParamTable();
|
||||
std::vector<Far::Index> const &sharpnessIndexTable =
|
||||
farPatchTable->GetSharpnessIndexTable();
|
||||
int numPatches = farPatchTable->GetNumPatches(j);
|
||||
for (int k = 0; k < numPatches; ++k) {
|
||||
float sharpness = 0.0;
|
||||
int patchIndex = (int)_patchParamBuffer.size();
|
||||
if (patchIndex < (int)sharpnessIndexTable.size()) {
|
||||
int sharpnessIndex = sharpnessIndexTable[patchIndex];
|
||||
if (sharpnessIndex >= 0)
|
||||
sharpness = farPatchTable->GetSharpnessValues()[sharpnessIndex];
|
||||
}
|
||||
PatchParam param;
|
||||
//param.patchParam = patchParamTable[patchIndex];
|
||||
param.faceIndex = patchParamTable[patchIndex].faceIndex;
|
||||
param.patchBits = patchParamTable[patchIndex].bitField.field;
|
||||
param.sharpness = sharpness;
|
||||
_patchParamBuffer.push_back(param);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
} // end namespace OpenSubdiv
|
||||
|
102
opensubdiv/osd/cpuPatchTable.h
Normal file
102
opensubdiv/osd/cpuPatchTable.h
Normal file
@ -0,0 +1,102 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H
|
||||
#define OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include <vector>
|
||||
#include "../far/patchDescriptor.h"
|
||||
#include "../osd/nonCopyable.h"
|
||||
#include "../osd/opengl.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Far{
|
||||
class PatchTable;
|
||||
};
|
||||
|
||||
namespace Osd {
|
||||
|
||||
/// \brief Cpu patch table
|
||||
///
|
||||
/// XXX: We can use just Far::PatchTable for typical CpuEval use cases.
|
||||
///
|
||||
/// Currently this class exists because of the template resolution
|
||||
/// for the CpuEvaluator's generic interface functions
|
||||
/// (glEvalLimit example uses), and
|
||||
/// device-specific patch tables such as GLPatchTables internally use
|
||||
/// as a staging buffer to splice patcharray and interleave sharpnesses.
|
||||
///
|
||||
/// Ideally Far::PatchTables should have the same data representation
|
||||
/// and accessors so that we don't have to copy data unnecessarily.
|
||||
///
|
||||
class CpuPatchTable {
|
||||
public:
|
||||
static CpuPatchTable *Create(const Far::PatchTable *patchTable,
|
||||
void *deviceContext = NULL) {
|
||||
(void)deviceContext; // unused
|
||||
return new CpuPatchTable(patchTable);
|
||||
}
|
||||
|
||||
explicit CpuPatchTable(const Far::PatchTable *patchTable);
|
||||
~CpuPatchTable() {}
|
||||
|
||||
const PatchArray *GetPatchArrayBuffer() const {
|
||||
return &_patchArrays[0];
|
||||
}
|
||||
const int *GetPatchIndexBuffer() const {
|
||||
return &_indexBuffer[0];
|
||||
}
|
||||
const PatchParam *GetPatchParamBuffer() const {
|
||||
return &_patchParamBuffer[0];
|
||||
}
|
||||
|
||||
size_t GetNumPatchArrays() const {
|
||||
return _patchArrays.size();
|
||||
}
|
||||
size_t GetPatchIndexSize() const {
|
||||
return _indexBuffer.size();
|
||||
}
|
||||
size_t GetPatchParamSize() const {
|
||||
return _patchParamBuffer.size();
|
||||
}
|
||||
|
||||
protected:
|
||||
PatchArrayVector _patchArrays;
|
||||
std::vector<int> _indexBuffer;
|
||||
PatchParamVector _patchParamBuffer;
|
||||
};
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
} // end namespace OpenSubdiv
|
||||
|
||||
#endif // OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H
|
@ -52,7 +52,8 @@ CpuVertexBuffer::Create(int numElements, int numVertices,
|
||||
}
|
||||
|
||||
void
|
||||
CpuVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices) {
|
||||
CpuVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices,
|
||||
void * /*deviceContext*/) {
|
||||
|
||||
memcpy(_cpuBuffer + startVertex * _numElements,
|
||||
src, GetNumElements() * numVertices * sizeof(float));
|
||||
|
@ -50,7 +50,8 @@ public:
|
||||
|
||||
/// This method is meant to be used in client code in order to provide
|
||||
/// coarse vertices data to Osd.
|
||||
void UpdateData(const float *src, int startVertex, int numVertices);
|
||||
void UpdateData(const float *src, int startVertex, int numVertices,
|
||||
void *deviceContext = NULL);
|
||||
|
||||
/// Returns how many elements defined in this vertex buffer.
|
||||
int GetNumElements() const;
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../far/stencilTable.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
extern "C" {
|
||||
void CudaEvalStencils(const float *src,
|
||||
@ -41,6 +42,25 @@ extern "C" {
|
||||
const float * weights,
|
||||
int start,
|
||||
int end);
|
||||
|
||||
void CudaEvalPatches(
|
||||
const float *src, float *dst,
|
||||
int length, int srcStride, int dstStride,
|
||||
int numPatchCoords,
|
||||
const void *patchCoords,
|
||||
const void *patchArrays,
|
||||
const int *patchIndices,
|
||||
const void *patchParams);
|
||||
|
||||
void CudaEvalPatchesWithDerivatives(
|
||||
const float *src, float *dst, float *du, float *dv,
|
||||
int length,
|
||||
int srcStride, int dstStride, int dvStride, int duStride,
|
||||
int numPatchCoords,
|
||||
const void *patchCoords,
|
||||
const void *patchArrays,
|
||||
const int *patchIndices,
|
||||
const void *patchParams);
|
||||
}
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -102,6 +122,8 @@ CudaEvaluator::EvalStencils(const float *src,
|
||||
const float * weights,
|
||||
int start,
|
||||
int end) {
|
||||
if (dst == NULL) return false;
|
||||
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dst + dstDesc.offset,
|
||||
srcDesc.length,
|
||||
@ -112,6 +134,104 @@ CudaEvaluator::EvalStencils(const float *src,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CudaEvaluator::EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float *dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start,
|
||||
int end) {
|
||||
// PERFORMANCE: need to combine 3 launches together
|
||||
if (dst) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dst + dstDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
dstDesc.stride,
|
||||
sizes, offsets, indices, weights,
|
||||
start, end);
|
||||
}
|
||||
if (dstDu) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dstDu + dstDuDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
dstDuDesc.stride,
|
||||
sizes, offsets, indices, duWeights,
|
||||
start, end);
|
||||
}
|
||||
if (dstDv) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dstDv + dstDvDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
dstDvDesc.stride,
|
||||
sizes, offsets, indices, dvWeights,
|
||||
start, end);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CudaEvaluator::EvalPatches(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndices,
|
||||
const PatchParam *patchParams) {
|
||||
if (src) src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
|
||||
CudaEvalPatches(src, dst,
|
||||
srcDesc.length, srcDesc.stride, dstDesc.stride,
|
||||
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CudaEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndices,
|
||||
const PatchParam *patchParams) {
|
||||
|
||||
if (src) src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
|
||||
CudaEvalPatchesWithDerivatives(
|
||||
src, dst, du, dv,
|
||||
srcDesc.length, srcDesc.stride,
|
||||
dstDesc.stride, duDesc.stride, dvDesc.stride,
|
||||
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* static */
|
||||
void
|
||||
CudaEvaluator::Synchronize(void * /*deviceContext*/) {
|
||||
|
@ -29,11 +29,13 @@
|
||||
|
||||
#include <vector>
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Far {
|
||||
class PatchTable;
|
||||
class StencilTable;
|
||||
}
|
||||
|
||||
@ -43,7 +45,7 @@ namespace Osd {
|
||||
///
|
||||
/// This class is a cuda buffer representation of Far::StencilTable.
|
||||
///
|
||||
/// CudaComputeKernel consumes this table to apply stencils
|
||||
/// CudaEvaluator consumes this table to apply stencils
|
||||
///
|
||||
///
|
||||
class CudaStencilTable {
|
||||
@ -72,10 +74,14 @@ private:
|
||||
int _numStencils;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
class CudaEvaluator {
|
||||
public:
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static compute function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
@ -99,21 +105,18 @@ public:
|
||||
///
|
||||
/// @param deviceContext not used in the CudaEvaluator
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const void *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const void *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
return EvalStencils(srcVertexBuffer->BindCudaBuffer(),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindCudaBuffer(),
|
||||
dstDesc,
|
||||
return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
(int const *)stencilTable->GetSizesBuffer(),
|
||||
(int const *)stencilTable->GetOffsetsBuffer(),
|
||||
(int const *)stencilTable->GetIndicesBuffer(),
|
||||
@ -122,17 +125,369 @@ public:
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
static bool EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start,
|
||||
int end);
|
||||
/// \brief Static eval stencils function which takes raw cuda buffers for
|
||||
/// input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivative buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivative buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
///
|
||||
/// @param instance not used in the cuda kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the cuda kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CudaEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
duBuffer->BindCudaBuffer(), duDesc,
|
||||
dvBuffer->BindCudaBuffer(), dvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
&stencilTable->GetDuWeights()[0],
|
||||
&stencilTable->GetDvWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw cuda pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCudaBuffer() method returning an
|
||||
/// array of PatchCoord struct in cuda memory.
|
||||
///
|
||||
/// @param patchTable CudaPatchTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cuda evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cuda evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord *)patchCoords->BindCudaBuffer(),
|
||||
(const PatchArray *)patchTable->GetPatchArrayBuffer(),
|
||||
(const int *)patchTable->GetPatchIndexBuffer(),
|
||||
(const PatchParam *)patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CudaPatchTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cuda evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cuda evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
duBuffer->BindCudaBuffer(), duDesc,
|
||||
dvBuffer->BindCudaBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord *)patchCoords->BindCudaBuffer(),
|
||||
(const PatchArray *)patchTable->GetPatchArrayBuffer(),
|
||||
(const int *)patchTable->GetPatchIndexBuffer(),
|
||||
(const PatchParam *)patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndices an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParams an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndices,
|
||||
const PatchParam *patchParams);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the du buffer
|
||||
///
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dv buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndices an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParams an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndices,
|
||||
const PatchParam *patchParams);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
static void Synchronize(void *deviceContext = NULL);
|
||||
};
|
||||
|
||||
|
@ -238,6 +238,186 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs,
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// Osd::PatchCoord osd/types.h
|
||||
struct PatchCoord {
|
||||
int arrayIndex;
|
||||
int patchIndex;
|
||||
int vertIndex;
|
||||
float s;
|
||||
float t;
|
||||
};
|
||||
struct PatchArray {
|
||||
int patchType; // Far::PatchDescriptor::Type
|
||||
int numPatches;
|
||||
int indexBase; // offset in the index buffer
|
||||
int primitiveIdBase; // offset in the patch param buffer
|
||||
};
|
||||
struct PatchParam {
|
||||
int faceIndex;
|
||||
unsigned int bitField;
|
||||
float sharpness;
|
||||
};
|
||||
|
||||
__device__ void
|
||||
getBSplineWeights(float t, float point[4], float deriv[4]) {
|
||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
||||
float const one6th = 1.0f / 6.0f;
|
||||
|
||||
float t2 = t * t;
|
||||
float t3 = t * t2;
|
||||
|
||||
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
||||
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
||||
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
||||
point[3] = one6th * ( t3);
|
||||
|
||||
// Derivatives of the above four basis functions at t:
|
||||
if (deriv) {
|
||||
deriv[0] = -0.5f*t2 + t - 0.5f;
|
||||
deriv[1] = 1.5f*t2 - 2.0f*t;
|
||||
deriv[2] = -1.5f*t2 + t + 0.5f;
|
||||
deriv[3] = 0.5f*t2;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void
|
||||
adjustBoundaryWeights(unsigned int bits, float sWeights[4], float tWeights[4]) {
|
||||
int boundary = ((bits >> 4) & 0xf); // far/patchParam.h
|
||||
|
||||
if (boundary & 1) {
|
||||
tWeights[2] -= tWeights[0];
|
||||
tWeights[1] += 2*tWeights[0];
|
||||
tWeights[0] = 0;
|
||||
}
|
||||
if (boundary & 2) {
|
||||
sWeights[1] -= sWeights[3];
|
||||
sWeights[2] += 2*sWeights[3];
|
||||
sWeights[3] = 0;
|
||||
}
|
||||
if (boundary & 4) {
|
||||
tWeights[1] -= tWeights[3];
|
||||
tWeights[2] += 2*tWeights[3];
|
||||
tWeights[3] = 0;
|
||||
}
|
||||
if (boundary & 8) {
|
||||
sWeights[2] -= sWeights[0];
|
||||
sWeights[1] += 2*sWeights[0];
|
||||
sWeights[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
__device__
|
||||
int getDepth(unsigned int patchBits) {
|
||||
return (patchBits & 0x7);
|
||||
}
|
||||
|
||||
__device__
|
||||
float getParamFraction(unsigned int patchBits) {
|
||||
bool nonQuadRoot = (patchBits >> 3) & 0x1;
|
||||
int depth = getDepth(patchBits);
|
||||
if (nonQuadRoot) {
|
||||
return 1.0f / float( 1 << (depth-1) );
|
||||
} else {
|
||||
return 1.0f / float( 1 << depth );
|
||||
}
|
||||
}
|
||||
|
||||
__device__
|
||||
void normalizePatchCoord(unsigned int patchBits, float *u, float *v) {
|
||||
float frac = getParamFraction(patchBits);
|
||||
|
||||
int iu = (patchBits >> 22) & 0x3ff;
|
||||
int iv = (patchBits >> 12) & 0x3ff;
|
||||
|
||||
// top left corner
|
||||
float pu = (float)iu*frac;
|
||||
float pv = (float)iv*frac;
|
||||
|
||||
// normalize u,v coordinates
|
||||
*u = (*u - pu) / frac;
|
||||
*v = (*v - pv) / frac;
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
|
||||
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
|
||||
int numPatchCoords, const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
int first = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
|
||||
// PERFORMANCE: not yet optimized
|
||||
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
|
||||
for (int i = first; i < numPatchCoords; i += blockDim.x * gridDim.x) {
|
||||
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrayBuffer[coord.arrayIndex];
|
||||
|
||||
int patchType = 6; // array.patchType XXX: REGULAR only for now.
|
||||
int numControlVertices = 16;
|
||||
// note: patchIndex is absolute.
|
||||
unsigned int patchBits = patchParamBuffer[coord.patchIndex].bitField;
|
||||
|
||||
// normalize
|
||||
float s = coord.s;
|
||||
float t = coord.t;
|
||||
normalizePatchCoord(patchBits, &s, &t);
|
||||
float dScale = (float)(1 << getDepth(patchBits));
|
||||
|
||||
if (patchType == 6) {
|
||||
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
|
||||
getBSplineWeights(s, sWeights, dsWeights);
|
||||
getBSplineWeights(t, tWeights, dtWeights);
|
||||
|
||||
// Compute the tensor product weight of the (s,t) basis function
|
||||
// corresponding to each control vertex:
|
||||
adjustBoundaryWeights(patchBits, sWeights, tWeights);
|
||||
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
wP[4*k+l] = sWeights[l] * tWeights[k];
|
||||
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
|
||||
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// TODO: Gregory Basis.
|
||||
continue;
|
||||
}
|
||||
const int *cvs = patchIndexBuffer + array.indexBase + coord.vertIndex;
|
||||
|
||||
float * dstVert = dst + i * dstStride;
|
||||
clear(dstVert, length);
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
const float * srcVert = src + cvs[j] * srcStride;
|
||||
addWithWeight(dstVert, srcVert, wP[j], length);
|
||||
}
|
||||
if (dstDu) {
|
||||
float *d = dstDu + i * dstDuStride;
|
||||
clear(d, length);
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
const float * srcVert = src + cvs[j] * srcStride;
|
||||
addWithWeight(d, srcVert, wDs[j], length);
|
||||
}
|
||||
}
|
||||
if (dstDv) {
|
||||
float *d = dstDv + i * dstDvStride;
|
||||
clear(d, length);
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
const float * srcVert = src + cvs[j] * srcStride;
|
||||
addWithWeight(d, srcVert, wDt[j], length);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#define OPT_KERNEL(NUM_ELEMENTS, KERNEL, X, Y, ARG) \
|
||||
@ -257,20 +437,12 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs,
|
||||
|
||||
extern "C" {
|
||||
|
||||
void CudaEvalStencils(const float *src,
|
||||
float *dst,
|
||||
int length,
|
||||
int srcStride,
|
||||
int dstStride,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start,
|
||||
int end)
|
||||
{
|
||||
// assert(cvs and dst and sizes and offsets and indices and weights and (end>=start));
|
||||
|
||||
void CudaEvalStencils(
|
||||
const float *src, float *dst,
|
||||
int length, int srcStride, int dstStride,
|
||||
const int * sizes, const int * offsets, const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
if (length == 0 or srcStride == 0 or dstStride == 0 or (end <= start)) {
|
||||
return;
|
||||
}
|
||||
@ -301,4 +473,36 @@ void CudaEvalStencils(const float *src,
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void CudaEvalPatches(
|
||||
const float *src, float *dst,
|
||||
int length, int srcStride, int dstStride,
|
||||
int numPatchCoords, const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
// PERFORMANCE: not optimized at all
|
||||
|
||||
computePatches <<<512, 32>>>(
|
||||
src, dst, NULL, NULL, length, srcStride, dstStride, 0, 0,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
}
|
||||
|
||||
void CudaEvalPatchesWithDerivatives(
|
||||
const float *src, float *dst, float *dstDu, float *dstDv,
|
||||
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
|
||||
int numPatchCoords, const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
// PERFORMANCE: not optimized at all
|
||||
|
||||
computePatches <<<512, 32>>>(
|
||||
src, dst, dstDu, dstDv, length, srcStride, dstStride, dstDuStride, dstDvStride,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
}
|
||||
|
||||
} /* extern "C" */
|
||||
|
103
opensubdiv/osd/cudaPatchTable.cpp
Normal file
103
opensubdiv/osd/cudaPatchTable.cpp
Normal file
@ -0,0 +1,103 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#include "../osd/cudaPatchTable.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "../far/patchTable.h"
|
||||
#include "../osd/cpuPatchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
CudaPatchTable::CudaPatchTable() :
|
||||
_patchArrays(NULL), _indexBuffer(NULL), _patchParamBuffer(NULL) {
|
||||
}
|
||||
|
||||
CudaPatchTable::~CudaPatchTable() {
|
||||
if (_patchArrays) cudaFree(_patchArrays);
|
||||
if (_indexBuffer) cudaFree(_indexBuffer);
|
||||
if (_patchParamBuffer) cudaFree(_patchParamBuffer);
|
||||
}
|
||||
|
||||
CudaPatchTable *
|
||||
CudaPatchTable::Create(Far::PatchTable const *farPatchTable,
|
||||
void * /*deviceContext*/) {
|
||||
CudaPatchTable *instance = new CudaPatchTable();
|
||||
if (instance->allocate(farPatchTable)) return instance;
|
||||
delete instance;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
CudaPatchTable::allocate(Far::PatchTable const *farPatchTable) {
|
||||
CpuPatchTable patchTable(farPatchTable);
|
||||
|
||||
size_t numPatchArrays = patchTable.GetNumPatchArrays();
|
||||
size_t indexSize = patchTable.GetPatchIndexSize();
|
||||
size_t patchParamSize = patchTable.GetPatchParamSize();
|
||||
|
||||
cudaError_t err;
|
||||
err = cudaMalloc(&_patchArrays, numPatchArrays * sizeof(Osd::PatchArray));
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
err = cudaMalloc(&_indexBuffer, indexSize * sizeof(int));
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
err = cudaMalloc(&_patchParamBuffer, patchParamSize * sizeof(Osd::PatchParam));
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
// copy patch array
|
||||
err = cudaMemcpy(_patchArrays,
|
||||
patchTable.GetPatchArrayBuffer(),
|
||||
numPatchArrays * sizeof(Osd::PatchArray),
|
||||
cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
// copy index buffer
|
||||
err = cudaMemcpy(_indexBuffer,
|
||||
patchTable.GetPatchIndexBuffer(),
|
||||
indexSize * sizeof(int),
|
||||
cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
// patch param buffer
|
||||
err = cudaMemcpy(_patchParamBuffer,
|
||||
patchTable.GetPatchParamBuffer(),
|
||||
patchParamSize * sizeof(Osd::PatchParam),
|
||||
cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
} // end namespace OpenSubdiv
|
||||
|
83
opensubdiv/osd/cudaPatchTable.h
Normal file
83
opensubdiv/osd/cudaPatchTable.h
Normal file
@ -0,0 +1,83 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H
|
||||
#define OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include "../osd/nonCopyable.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Far{
|
||||
class PatchTable;
|
||||
};
|
||||
|
||||
namespace Osd {
|
||||
|
||||
/// \brief CUDA patch table
|
||||
///
|
||||
/// This class is a cuda buffer representation of Far::PatchTable.
|
||||
///
|
||||
/// CudaEvaluator consumes this table to evaluate on the patches.
|
||||
///
|
||||
///
|
||||
class CudaPatchTable : private NonCopyable<CudaPatchTable> {
|
||||
public:
|
||||
/// Creator. Returns NULL if error
|
||||
static CudaPatchTable *Create(Far::PatchTable const *patchTable,
|
||||
void *deviceContext = NULL);
|
||||
/// Destructor
|
||||
~CudaPatchTable();
|
||||
|
||||
/// Returns the cuda memory of the array of Osd::PatchArray buffer
|
||||
void *GetPatchArrayBuffer() const { return _patchArrays; }
|
||||
|
||||
/// Returns the cuda memory of the patch control vertices
|
||||
void *GetPatchIndexBuffer() const { return _indexBuffer; }
|
||||
|
||||
/// Returns the cuda memory of the array of Osd::PatchParam buffer
|
||||
void *GetPatchParamBuffer() const { return _patchParamBuffer; }
|
||||
|
||||
protected:
|
||||
CudaPatchTable();
|
||||
|
||||
bool allocate(Far::PatchTable const *patchTable);
|
||||
|
||||
void *_patchArrays;
|
||||
void *_indexBuffer;
|
||||
void *_patchParamBuffer;
|
||||
};
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
} // end namespace OpenSubdiv
|
||||
|
||||
#endif // OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H
|
@ -43,7 +43,8 @@ CudaVertexBuffer::~CudaVertexBuffer() {
|
||||
}
|
||||
|
||||
CudaVertexBuffer *
|
||||
CudaVertexBuffer::Create(int numElements, int numVertices) {
|
||||
CudaVertexBuffer::Create(int numElements, int numVertices,
|
||||
void * /*deviceContext */) {
|
||||
CudaVertexBuffer *instance =
|
||||
new CudaVertexBuffer(numElements, numVertices);
|
||||
if (instance->allocate()) return instance;
|
||||
@ -52,7 +53,8 @@ CudaVertexBuffer::Create(int numElements, int numVertices) {
|
||||
}
|
||||
|
||||
void
|
||||
CudaVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices) {
|
||||
CudaVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices,
|
||||
void * /*deviceContext*/) {
|
||||
|
||||
size_t size = _numElements * numVertices * sizeof(float);
|
||||
|
||||
|
@ -27,6 +27,8 @@
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
@ -41,14 +43,16 @@ class CudaVertexBuffer {
|
||||
|
||||
public:
|
||||
/// Creator. Returns NULL if error.
|
||||
static CudaVertexBuffer * Create(int numElements, int numVertices);
|
||||
static CudaVertexBuffer * Create(int numElements, int numVertices,
|
||||
void *deviceContext = NULL);
|
||||
|
||||
/// Destructor.
|
||||
~CudaVertexBuffer();
|
||||
|
||||
/// This method is meant to be used in client code in order to provide coarse
|
||||
/// vertices data to Osd.
|
||||
void UpdateData(const float *src, int startVertex, int numVertices);
|
||||
void UpdateData(const float *src, int startVertex, int numVertices,
|
||||
void *deviceContext=NULL);
|
||||
|
||||
/// Returns how many elements defined in this vertex buffer.
|
||||
int GetNumElements() const;
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <vector>
|
||||
#include "../far/patchDescriptor.h"
|
||||
#include "../osd/nonCopyable.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
struct ID3D11Buffer;
|
||||
struct ID3D11ShaderResourceView;
|
||||
@ -49,33 +50,6 @@ class D3D11PatchTable : private NonCopyable<D3D11PatchTable> {
|
||||
public:
|
||||
typedef ID3D11Buffer * VertexBufferBinding;
|
||||
|
||||
// XXX: this struct will be further refactored.
|
||||
class PatchArray {
|
||||
public:
|
||||
PatchArray(Far::PatchDescriptor desc, int numPatches,
|
||||
int indexBase, int primitiveIdBase) :
|
||||
desc(desc), numPatches(numPatches), indexBase(indexBase),
|
||||
primitiveIdBase(primitiveIdBase) {}
|
||||
Far::PatchDescriptor const &GetDescriptor() const {
|
||||
return desc;
|
||||
}
|
||||
int GetNumPatches() const {
|
||||
return numPatches;
|
||||
}
|
||||
int GetIndexBase() const {
|
||||
return indexBase;
|
||||
}
|
||||
int GetPrimitiveIdBase() const {
|
||||
return primitiveIdBase;
|
||||
}
|
||||
private:
|
||||
Far::PatchDescriptor desc;
|
||||
int numPatches;
|
||||
int indexBase; // an offset within the index buffer
|
||||
int primitiveIdBase; // an offset within the patch param buffer
|
||||
};
|
||||
typedef std::vector<PatchArray> PatchArrayVector;
|
||||
|
||||
D3D11PatchTable();
|
||||
~D3D11PatchTable();
|
||||
|
||||
|
@ -88,16 +88,66 @@ GLStencilTableSSBO::~GLStencilTableSSBO() {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
GLComputeEvaluator::GLComputeEvaluator() :
|
||||
_program(0), _workGroupSize(64) {
|
||||
GLComputeEvaluator::GLComputeEvaluator() : _workGroupSize(64) {
|
||||
memset (&_stencilKernel, 0, sizeof(_stencilKernel));
|
||||
memset (&_patchKernel, 0, sizeof(_patchKernel));
|
||||
}
|
||||
|
||||
GLComputeEvaluator::~GLComputeEvaluator() {
|
||||
if (_program) {
|
||||
glDeleteProgram(_program);
|
||||
if (_stencilKernel.program) {
|
||||
glDeleteProgram(_stencilKernel.program);
|
||||
}
|
||||
if (_patchKernel.program) {
|
||||
glDeleteProgram(_patchKernel.program);
|
||||
}
|
||||
}
|
||||
|
||||
static GLuint
|
||||
compileKernel(VertexBufferDescriptor const &srcDesc,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const char *kernelDefine,
|
||||
int workGroupSize) {
|
||||
GLuint program = glCreateProgram();
|
||||
|
||||
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
|
||||
|
||||
std::ostringstream defines;
|
||||
defines << "#define LENGTH " << srcDesc.length << "\n"
|
||||
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
|
||||
<< "#define DST_STRIDE " << dstDesc.stride << "\n"
|
||||
<< "#define WORK_GROUP_SIZE " << workGroupSize << "\n"
|
||||
<< kernelDefine << "\n";
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *shaderSources[3] = {"#version 430\n", 0, 0};
|
||||
shaderSources[1] = defineStr.c_str();
|
||||
shaderSources[2] = shaderSource;
|
||||
glShaderSource(shader, 3, shaderSources, NULL);
|
||||
glCompileShader(shader);
|
||||
glAttachShader(program, shader);
|
||||
|
||||
GLint linked = 0;
|
||||
glLinkProgram(program);
|
||||
glGetProgramiv(program, GL_LINK_STATUS, &linked);
|
||||
|
||||
if (linked == GL_FALSE) {
|
||||
char buffer[1024];
|
||||
glGetShaderInfoLog(shader, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glGetProgramInfoLog(program, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glDeleteProgram(program);
|
||||
return 0;
|
||||
}
|
||||
|
||||
glDeleteShader(shader);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
VertexBufferDescriptor const &dstDesc) {
|
||||
@ -108,58 +158,55 @@ GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (_program) {
|
||||
glDeleteProgram(_program);
|
||||
_program = 0;
|
||||
// create stencil kernel
|
||||
if (_stencilKernel.program) {
|
||||
glDeleteProgram(_stencilKernel.program);
|
||||
}
|
||||
_program = glCreateProgram();
|
||||
|
||||
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
|
||||
|
||||
std::ostringstream defines;
|
||||
defines << "#define LENGTH " << srcDesc.length << "\n"
|
||||
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
|
||||
<< "#define DST_STRIDE " << dstDesc.stride << "\n"
|
||||
<< "#define WORK_GROUP_SIZE " << _workGroupSize << "\n";
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *shaderSources[3] = {"#version 430\n", 0, 0};
|
||||
shaderSources[1] = defineStr.c_str();
|
||||
shaderSources[2] = shaderSource;
|
||||
glShaderSource(shader, 3, shaderSources, NULL);
|
||||
glCompileShader(shader);
|
||||
glAttachShader(_program, shader);
|
||||
|
||||
GLint linked = 0;
|
||||
glLinkProgram(_program);
|
||||
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
|
||||
|
||||
if (linked == GL_FALSE) {
|
||||
char buffer[1024];
|
||||
glGetShaderInfoLog(shader, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glGetProgramInfoLog(_program, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glDeleteProgram(_program);
|
||||
_program = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
glDeleteShader(shader);
|
||||
_stencilKernel.program = compileKernel(
|
||||
srcDesc, dstDesc,
|
||||
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS",
|
||||
_workGroupSize);
|
||||
if (_stencilKernel.program == 0) return false;
|
||||
|
||||
// store uniform locations for the compute kernel program.
|
||||
_uniformSizes = glGetUniformLocation(_program, "stencilSizes");
|
||||
_uniformOffsets = glGetUniformLocation(_program, "stencilOffsets");
|
||||
_uniformIndices = glGetUniformLocation(_program, "stencilIndices");
|
||||
_uniformWeights = glGetUniformLocation(_program, "stencilIWeights");
|
||||
_stencilKernel.uniformSizes =
|
||||
glGetUniformLocation(_stencilKernel.program, "stencilSizes");
|
||||
_stencilKernel.uniformOffsets =
|
||||
glGetUniformLocation(_stencilKernel.program, "stencilOffsets");
|
||||
_stencilKernel.uniformIndices =
|
||||
glGetUniformLocation(_stencilKernel.program, "stencilIndices");
|
||||
_stencilKernel.uniformWeights =
|
||||
glGetUniformLocation(_stencilKernel.program, "stencilIWeights");
|
||||
_stencilKernel.uniformStart =
|
||||
glGetUniformLocation(_stencilKernel.program, "batchStart");
|
||||
_stencilKernel.uniformEnd =
|
||||
glGetUniformLocation(_stencilKernel.program, "batchEnd");
|
||||
_stencilKernel.uniformSrcOffset =
|
||||
glGetUniformLocation(_stencilKernel.program, "srcOffset");
|
||||
_stencilKernel.uniformDstOffset =
|
||||
glGetUniformLocation(_stencilKernel.program, "dstOffset");
|
||||
|
||||
_uniformStart = glGetUniformLocation(_program, "batchStart");
|
||||
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
|
||||
// create patch kernel
|
||||
if (_patchKernel.program) {
|
||||
glDeleteProgram(_patchKernel.program);
|
||||
}
|
||||
_patchKernel.program = compileKernel(
|
||||
srcDesc, dstDesc,
|
||||
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES",
|
||||
_workGroupSize);
|
||||
if (_patchKernel.program == 0) return false;
|
||||
|
||||
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
|
||||
_uniformDstOffset = glGetUniformLocation(_program, "dstOffset");
|
||||
// uniform locaitons
|
||||
_patchKernel.uniformSrcOffset =
|
||||
glGetUniformLocation(_patchKernel.program, "srcOffset");
|
||||
_patchKernel.uniformDstOffset =
|
||||
glGetUniformLocation(_patchKernel.program, "dstOffset");
|
||||
_patchKernel.uniformPatchArray =
|
||||
glGetUniformLocation(_patchKernel.program, "patchArray");
|
||||
_patchKernel.uniformDuDesc =
|
||||
glGetUniformLocation(_patchKernel.program, "dstDuDesc");
|
||||
_patchKernel.uniformDvDesc =
|
||||
glGetUniformLocation(_patchKernel.program, "dstDvDesc");
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -183,7 +230,7 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
GLuint weightsBuffer,
|
||||
int start,
|
||||
int end) const {
|
||||
if (!_program) return false;
|
||||
if (!_stencilKernel.program) return false;
|
||||
int count = end - start;
|
||||
if (count <= 0) {
|
||||
return true;
|
||||
@ -196,12 +243,12 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, indicesBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, weightsBuffer);
|
||||
|
||||
glUseProgram(_program);
|
||||
glUseProgram(_stencilKernel.program);
|
||||
|
||||
glUniform1i(_uniformStart, start);
|
||||
glUniform1i(_uniformEnd, end);
|
||||
glUniform1i(_uniformSrcOffset, srcDesc.offset);
|
||||
glUniform1i(_uniformDstOffset, dstDesc.offset);
|
||||
glUniform1i(_stencilKernel.uniformStart, start);
|
||||
glUniform1i(_stencilKernel.uniformEnd, end);
|
||||
glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
|
||||
glUniform1i(_stencilKernel.uniformDstOffset, dstDesc.offset);
|
||||
|
||||
glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
|
||||
|
||||
@ -218,6 +265,52 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GLComputeEvaluator::EvalPatches(
|
||||
GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
GLuint patchCoordsBuffer,
|
||||
const PatchArrayVector &patchArrays,
|
||||
GLuint patchIndexBuffer,
|
||||
GLuint patchParamsBuffer) const {
|
||||
|
||||
if (!_patchKernel.program) return false;
|
||||
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, patchCoordsBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchIndexBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchParamsBuffer);
|
||||
|
||||
glUseProgram(_patchKernel.program);
|
||||
|
||||
glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
|
||||
glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset);
|
||||
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
|
||||
(const GLint*)&patchArrays[0]);
|
||||
glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride);
|
||||
glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride);
|
||||
|
||||
glDispatchCompute((numPatchCoords + _workGroupSize - 1) / _workGroupSize, 1, 1);
|
||||
|
||||
glUseProgram(0);
|
||||
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "../version.h"
|
||||
|
||||
#include "../osd/opengl.h"
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -92,26 +93,32 @@ public:
|
||||
/// Destructor. note that the GL context must be made current.
|
||||
~GLComputeEvaluator();
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static compute function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a
|
||||
/// const float pointer for read
|
||||
/// GL buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a
|
||||
/// float pointer for write
|
||||
/// GL buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied. The table must have
|
||||
/// SSBO interfaces.
|
||||
///
|
||||
/// @param evaluator cached compiled instance. Clients are supposed to
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
@ -119,25 +126,25 @@ public:
|
||||
///
|
||||
/// @param deviceContext not used in the GLSL kernel
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
GLComputeEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
GLComputeEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
return instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
} else {
|
||||
// Create a kernel on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
bool r = instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
delete instance;
|
||||
return r;
|
||||
@ -148,15 +155,14 @@ public:
|
||||
|
||||
/// Dispatch the GLSL compute kernel on GPU asynchronously.
|
||||
/// returns false if the kernel hasn't been compiled yet.
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
return EvalStencils(srcVertexBuffer->BindVBO(),
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
return EvalStencils(srcBuffer->BindVBO(),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindVBO(),
|
||||
dstBuffer->BindVBO(),
|
||||
dstDesc,
|
||||
stencilTable->GetSizesBuffer(),
|
||||
stencilTable->GetOffsetsBuffer(),
|
||||
@ -168,10 +174,8 @@ public:
|
||||
|
||||
/// Dispatch the GLSL compute kernel on GPU asynchronously.
|
||||
/// returns false if the kernel hasn't been compiled yet.
|
||||
bool EvalStencils(GLuint srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
bool EvalStencils(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint sizesBuffer,
|
||||
GLuint offsetsBuffer,
|
||||
GLuint indicesBuffer,
|
||||
@ -179,6 +183,271 @@ public:
|
||||
int start,
|
||||
int end) const;
|
||||
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindVBO() method returning an
|
||||
/// array of PatchCoord struct in VBO.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext not used in the GLXFB evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
GLComputeEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer
|
||||
///
|
||||
/// @param duDesc
|
||||
///
|
||||
/// @param dvBuffer
|
||||
///
|
||||
/// @param dvDesc
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindVBO() method returning an
|
||||
/// array of PatchCoord struct in VBO.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext not used in the GLXFB evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
GLComputeEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBOBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindVBO() method returning an
|
||||
/// array of PatchCoord struct in VBO.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
|
||||
dstBuffer->BindVBO(), dstDesc,
|
||||
0, VertexBufferDescriptor(),
|
||||
0, VertexBufferDescriptor(),
|
||||
numPatchCoords,
|
||||
patchCoords->BindVBO(),
|
||||
patchTable->GetPatchArrays(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindVBO() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindVBO() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
|
||||
dstBuffer->BindVBO(), dstDesc,
|
||||
duBuffer->BindVBO(), duDesc,
|
||||
dvBuffer->BindVBO(), dvDesc,
|
||||
numPatchCoords,
|
||||
patchCoords->BindVBO(),
|
||||
patchTable->GetPatchArrays(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
bool EvalPatches(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
GLuint patchCoordsBuffer,
|
||||
const PatchArrayVector &patchArrays,
|
||||
GLuint patchIndexBuffer,
|
||||
GLuint patchParamsBuffer) const;
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// Configure GLSL kernel. A valid GL context must be made current before
|
||||
/// calling this function. Returns false if it fails to compile the kernel.
|
||||
bool Compile(VertexBufferDescriptor const &srcDesc,
|
||||
@ -188,18 +457,27 @@ public:
|
||||
static void Synchronize(void *deviceContext);
|
||||
|
||||
private:
|
||||
GLuint _program;
|
||||
struct _StencilKernel {
|
||||
GLuint program;
|
||||
GLuint uniformSizes;
|
||||
GLuint uniformOffsets;
|
||||
GLuint uniformIndices;
|
||||
GLuint uniformWeights;
|
||||
GLuint uniformStart;
|
||||
GLuint uniformEnd;
|
||||
GLuint uniformSrcOffset;
|
||||
GLuint uniformDstOffset;
|
||||
} _stencilKernel;
|
||||
|
||||
GLuint _uniformSizes, // stencil table
|
||||
_uniformOffsets,
|
||||
_uniformIndices,
|
||||
_uniformWeights,
|
||||
struct _PatchKernel {
|
||||
GLuint program;
|
||||
GLuint uniformSrcOffset;
|
||||
GLuint uniformDstOffset;
|
||||
GLuint uniformPatchArray;
|
||||
GLuint uniformDuDesc;
|
||||
GLuint uniformDvDesc;
|
||||
|
||||
_uniformStart, // range
|
||||
_uniformEnd,
|
||||
|
||||
_uniformSrcOffset, // src buffer offset (in elements)
|
||||
_uniformDstOffset; // dst buffer offset (in elements)
|
||||
} _patchKernel;
|
||||
|
||||
int _workGroupSize;
|
||||
};
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
#include "../far/patchTable.h"
|
||||
#include "../osd/opengl.h"
|
||||
#include "../osd/cpuPatchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
@ -33,11 +34,14 @@ namespace OPENSUBDIV_VERSION {
|
||||
namespace Osd {
|
||||
|
||||
GLPatchTable::GLPatchTable() :
|
||||
_indexBuffer(0), _patchParamTexture(0) {
|
||||
_patchIndexBuffer(0), _patchParamBuffer(0),
|
||||
_patchIndexTexture(0), _patchParamTexture(0) {
|
||||
}
|
||||
|
||||
GLPatchTable::~GLPatchTable() {
|
||||
if (_indexBuffer) glDeleteBuffers(1, &_indexBuffer);
|
||||
if (_patchIndexBuffer) glDeleteBuffers(1, &_patchIndexBuffer);
|
||||
if (_patchParamBuffer) glDeleteBuffers(1, &_patchParamBuffer);
|
||||
if (_patchIndexTexture) glDeleteTextures(1, &_patchIndexTexture);
|
||||
if (_patchParamTexture) glDeleteTextures(1, &_patchParamTexture);
|
||||
}
|
||||
|
||||
@ -52,84 +56,56 @@ GLPatchTable::Create(Far::PatchTable const *farPatchTable,
|
||||
|
||||
bool
|
||||
GLPatchTable::allocate(Far::PatchTable const *farPatchTable) {
|
||||
glGenBuffers(1, &_indexBuffer);
|
||||
glGenBuffers(1, &_patchIndexBuffer);
|
||||
glGenBuffers(1, &_patchParamBuffer);
|
||||
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _indexBuffer);
|
||||
std::vector<int> buffer;
|
||||
std::vector<unsigned int> ppBuffer;
|
||||
CpuPatchTable patchTable(farPatchTable);
|
||||
|
||||
// needs reserve?
|
||||
size_t numPatchArrays = patchTable.GetNumPatchArrays();
|
||||
GLsizei indexSize = (GLsizei)patchTable.GetPatchIndexSize();
|
||||
GLsizei patchParamSize = (GLsizei)patchTable.GetPatchParamSize();
|
||||
|
||||
int nPatchArrays = farPatchTable->GetNumPatchArrays();
|
||||
|
||||
// for each patchArray
|
||||
for (int j = 0; j < nPatchArrays; ++j) {
|
||||
PatchArray patchArray(farPatchTable->GetPatchArrayDescriptor(j),
|
||||
farPatchTable->GetNumPatches(j),
|
||||
(int)buffer.size(),
|
||||
(int)ppBuffer.size()/3);
|
||||
_patchArrays.push_back(patchArray);
|
||||
|
||||
// indices
|
||||
Far::ConstIndexArray indices = farPatchTable->GetPatchArrayVertices(j);
|
||||
for (int k = 0; k < indices.size(); ++k) {
|
||||
buffer.push_back(indices[k]);
|
||||
}
|
||||
|
||||
// patchParams
|
||||
#if 0
|
||||
// XXX: we need sharpness interface for patcharray or put sharpness
|
||||
// into patchParam.
|
||||
Far::ConstPatchParamArray patchParams =
|
||||
farPatchTable->GetPatchParams(j);
|
||||
for (int k = 0; k < patchParams.size(); ++k) {
|
||||
float sharpness = 0.0;
|
||||
ppBuffer.push_back(patchParams[k].faceIndex);
|
||||
ppBuffer.push_back(patchParams[k].bitField.field);
|
||||
ppBuffer.push_back(*((unsigned int *)&sharpness));
|
||||
}
|
||||
#else
|
||||
// XXX: workaround. GetPatchParamTable() will be deprecated though.
|
||||
Far::PatchParamTable const & patchParamTable =
|
||||
farPatchTable->GetPatchParamTable();
|
||||
std::vector<Far::Index> const &sharpnessIndexTable =
|
||||
farPatchTable->GetSharpnessIndexTable();
|
||||
int numPatches = farPatchTable->GetNumPatches(j);
|
||||
for (int k = 0; k < numPatches; ++k) {
|
||||
float sharpness = 0.0;
|
||||
int patchIndex = (int)ppBuffer.size()/3;
|
||||
if (patchIndex < (int)sharpnessIndexTable.size()) {
|
||||
int sharpnessIndex = sharpnessIndexTable[patchIndex];
|
||||
if (sharpnessIndex >= 0)
|
||||
sharpness = farPatchTable->GetSharpnessValues()[sharpnessIndex];
|
||||
}
|
||||
ppBuffer.push_back(patchParamTable[patchIndex].faceIndex);
|
||||
ppBuffer.push_back(patchParamTable[patchIndex].bitField.field);
|
||||
ppBuffer.push_back(*((unsigned int *)&sharpness));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
// copy patch array
|
||||
_patchArrays.insert(_patchArrays.end(),
|
||||
patchTable.GetPatchArrayBuffer(),
|
||||
patchTable.GetPatchArrayBuffer() + numPatchArrays);
|
||||
|
||||
// copy index buffer
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _patchIndexBuffer);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER,
|
||||
(int)buffer.size()*sizeof(int), &buffer[0], GL_STATIC_DRAW);
|
||||
|
||||
// patchParam is currently expected to be texture (it can be SSBO)
|
||||
GLuint texBuffer = 0;
|
||||
glGenBuffers(1, &texBuffer);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, texBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, ppBuffer.size()*sizeof(unsigned int),
|
||||
&ppBuffer[0], GL_STATIC_DRAW);
|
||||
|
||||
glGenTextures(1, &_patchParamTexture);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _patchParamTexture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGB32I, texBuffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
|
||||
glDeleteBuffers(1, &texBuffer);
|
||||
|
||||
indexSize * sizeof(GLint),
|
||||
patchTable.GetPatchIndexBuffer(),
|
||||
GL_STATIC_DRAW);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
|
||||
// copy patchparam buffer
|
||||
glBindBuffer(GL_ARRAY_BUFFER, _patchParamBuffer);
|
||||
glBufferData(GL_ARRAY_BUFFER,
|
||||
patchParamSize * sizeof(PatchParam),
|
||||
patchTable.GetPatchParamBuffer(),
|
||||
GL_STATIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
|
||||
// make both buffer as texture buffers too.
|
||||
glGenTextures(1, &_patchIndexTexture);
|
||||
glGenTextures(1, &_patchParamTexture);
|
||||
|
||||
GLuint buffer;
|
||||
glGenBuffers(1, &buffer);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, buffer);
|
||||
glBufferData(GL_ARRAY_BUFFER,
|
||||
indexSize * sizeof(GLint),
|
||||
patchTable.GetPatchIndexBuffer(),
|
||||
GL_STATIC_DRAW);
|
||||
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _patchIndexTexture);
|
||||
// glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, _patchIndexBuffer);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, buffer);
|
||||
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _patchParamTexture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGB32I, _patchParamBuffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -27,10 +27,9 @@
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include <vector>
|
||||
#include "../far/patchDescriptor.h"
|
||||
#include "../osd/nonCopyable.h"
|
||||
#include "../osd/opengl.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
@ -45,34 +44,6 @@ class GLPatchTable : private NonCopyable<GLPatchTable> {
|
||||
public:
|
||||
typedef GLuint VertexBufferBinding;
|
||||
|
||||
// XXX: this struct will be further refactored.
|
||||
class PatchArray {
|
||||
public:
|
||||
PatchArray(Far::PatchDescriptor desc, int numPatches,
|
||||
int indexBase, int primitiveIdBase) :
|
||||
desc(desc), numPatches(numPatches), indexBase(indexBase),
|
||||
primitiveIdBase(primitiveIdBase) {}
|
||||
Far::PatchDescriptor const &GetDescriptor() const {
|
||||
return desc;
|
||||
}
|
||||
int GetNumPatches() const {
|
||||
return numPatches;
|
||||
}
|
||||
int GetIndexBase() const {
|
||||
return indexBase;
|
||||
}
|
||||
int GetPrimitiveIdBase() const {
|
||||
return primitiveIdBase;
|
||||
}
|
||||
private:
|
||||
Far::PatchDescriptor desc;
|
||||
int numPatches;
|
||||
int indexBase; // an offset within the index buffer
|
||||
int primitiveIdBase; // an offset within the patch param buffer
|
||||
};
|
||||
typedef std::vector<PatchArray> PatchArrayVector;
|
||||
|
||||
GLPatchTable();
|
||||
~GLPatchTable();
|
||||
|
||||
static GLPatchTable *Create(Far::PatchTable const *farPatchTable,
|
||||
@ -84,7 +55,17 @@ public:
|
||||
|
||||
/// Returns the GL index buffer containing the patch control vertices
|
||||
GLuint GetPatchIndexBuffer() const {
|
||||
return _indexBuffer;
|
||||
return _patchIndexBuffer;
|
||||
}
|
||||
|
||||
/// Returns the GL index buffer containing the patch parameter
|
||||
GLuint GetPatchParamBuffer() const {
|
||||
return _patchParamBuffer;
|
||||
}
|
||||
|
||||
/// Returns the GL texture buffer containing the patch control vertices
|
||||
GLuint GetPatchIndexTextureBuffer() const {
|
||||
return _patchIndexTexture;
|
||||
}
|
||||
|
||||
/// Returns the GL texture buffer containing the patch parameter
|
||||
@ -93,11 +74,17 @@ public:
|
||||
}
|
||||
|
||||
protected:
|
||||
GLPatchTable();
|
||||
|
||||
// allocate buffers from patchTable
|
||||
bool allocate(Far::PatchTable const *farPatchTable);
|
||||
|
||||
PatchArrayVector _patchArrays;
|
||||
GLuint _indexBuffer;
|
||||
|
||||
GLuint _patchIndexBuffer;
|
||||
GLuint _patchParamBuffer;
|
||||
|
||||
GLuint _patchIndexTexture;
|
||||
GLuint _patchParamTexture;
|
||||
};
|
||||
|
||||
|
@ -108,37 +108,36 @@ GLStencilTableTBO::~GLStencilTableTBO() {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
GLXFBEvaluator::GLXFBEvaluator() :
|
||||
_program(0), _srcBufferTexture(0),
|
||||
_uniformSrcBufferTexture(0), _uniformSizesTexture(0),
|
||||
_uniformOffsetsTexture(0), _uniformIndicesTexture(0),
|
||||
_uniformWeightsTexture(0), _uniformStart(0), _uniformEnd(0),
|
||||
_uniformSrcOffset(0) {
|
||||
GLXFBEvaluator::GLXFBEvaluator() : _srcBufferTexture(0) {
|
||||
memset (&_stencilKernel, 0, sizeof(_stencilKernel));
|
||||
memset (&_patchKernel, 0, sizeof(_patchKernel));
|
||||
}
|
||||
|
||||
GLXFBEvaluator::~GLXFBEvaluator() {
|
||||
if (_program) {
|
||||
glDeleteProgram(_program);
|
||||
if (_stencilKernel.program) {
|
||||
glDeleteProgram(_stencilKernel.program);
|
||||
}
|
||||
if (_patchKernel.program) {
|
||||
glDeleteProgram(_patchKernel.program);
|
||||
}
|
||||
if (_srcBufferTexture) {
|
||||
glDeleteTextures(1, &_srcBufferTexture);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
VertexBufferDescriptor const &dstDesc) {
|
||||
if (_program) {
|
||||
glDeleteProgram(_program);
|
||||
_program = 0;
|
||||
}
|
||||
_program = glCreateProgram();
|
||||
static GLuint
|
||||
compileKernel(VertexBufferDescriptor const &srcDesc,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const char *kernelDefine) {
|
||||
|
||||
GLuint program = glCreateProgram();
|
||||
|
||||
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
|
||||
|
||||
std::ostringstream defines;
|
||||
defines << "#define LENGTH " << srcDesc.length << "\n"
|
||||
<< "#define SRC_STRIDE " << srcDesc.stride << "\n";
|
||||
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
|
||||
<< kernelDefine << "\n";
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *shaderSources[3] = {"#version 410\n", NULL, NULL};
|
||||
@ -147,7 +146,7 @@ GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
shaderSources[2] = shaderSource;
|
||||
glShaderSource(shader, 3, shaderSources, NULL);
|
||||
glCompileShader(shader);
|
||||
glAttachShader(_program, shader);
|
||||
glAttachShader(program, shader);
|
||||
|
||||
std::vector<std::string> outputs;
|
||||
std::vector<const char *> pOutputs;
|
||||
@ -183,40 +182,83 @@ GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
}
|
||||
}
|
||||
|
||||
glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(),
|
||||
glTransformFeedbackVaryings(program, (GLsizei)outputs.size(),
|
||||
&pOutputs[0], GL_INTERLEAVED_ATTRIBS);
|
||||
|
||||
GLint linked = 0;
|
||||
glLinkProgram(_program);
|
||||
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
|
||||
glLinkProgram(program);
|
||||
glGetProgramiv(program, GL_LINK_STATUS, &linked);
|
||||
|
||||
if (linked == GL_FALSE) {
|
||||
char buffer[1024];
|
||||
glGetShaderInfoLog(shader, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glGetProgramInfoLog(_program, 1024, NULL, buffer);
|
||||
glGetProgramInfoLog(program, 1024, NULL, buffer);
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
|
||||
|
||||
glDeleteProgram(_program);
|
||||
_program = 0;
|
||||
return false;
|
||||
glDeleteProgram(program);
|
||||
program = 0;
|
||||
}
|
||||
|
||||
glDeleteShader(shader);
|
||||
|
||||
// set uniform locations for compute kernels
|
||||
_uniformSrcBufferTexture = glGetUniformLocation(_program, "vertexBuffer");
|
||||
return program;
|
||||
}
|
||||
|
||||
_uniformSizesTexture = glGetUniformLocation(_program, "sizes");
|
||||
_uniformOffsetsTexture = glGetUniformLocation(_program, "offsets");
|
||||
_uniformIndicesTexture = glGetUniformLocation(_program, "indices");
|
||||
_uniformWeightsTexture = glGetUniformLocation(_program, "weights");
|
||||
bool
|
||||
GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
|
||||
VertexBufferDescriptor const &dstDesc) {
|
||||
|
||||
_uniformStart = glGetUniformLocation(_program, "batchStart");
|
||||
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
|
||||
// create stencil kernel
|
||||
if (_stencilKernel.program) {
|
||||
glDeleteProgram(_stencilKernel.program);
|
||||
}
|
||||
_stencilKernel.program = compileKernel(
|
||||
srcDesc, dstDesc,
|
||||
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS");
|
||||
if (_stencilKernel.program == 0) return false;
|
||||
|
||||
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
|
||||
// cache uniform locations
|
||||
_stencilKernel.uniformSrcBufferTexture
|
||||
= glGetUniformLocation(_stencilKernel.program, "vertexBuffer");
|
||||
_stencilKernel.uniformSrcOffset
|
||||
= glGetUniformLocation(_stencilKernel.program, "srcOffset");
|
||||
|
||||
_stencilKernel.uniformSizesTexture
|
||||
= glGetUniformLocation(_stencilKernel.program, "sizes");
|
||||
_stencilKernel.uniformOffsetsTexture
|
||||
= glGetUniformLocation(_stencilKernel.program, "offsets");
|
||||
_stencilKernel.uniformIndicesTexture
|
||||
= glGetUniformLocation(_stencilKernel.program, "indices");
|
||||
_stencilKernel.uniformWeightsTexture
|
||||
= glGetUniformLocation(_stencilKernel.program, "weights");
|
||||
_stencilKernel.uniformStart
|
||||
= glGetUniformLocation(_stencilKernel.program, "batchStart");
|
||||
_stencilKernel.uniformEnd
|
||||
= glGetUniformLocation(_stencilKernel.program, "batchEnd");
|
||||
|
||||
// create patch kernel
|
||||
if (_patchKernel.program) {
|
||||
glDeleteProgram(_patchKernel.program);
|
||||
}
|
||||
_patchKernel.program = compileKernel(
|
||||
srcDesc, dstDesc,
|
||||
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES");
|
||||
if (_patchKernel.program == 0) return false;
|
||||
|
||||
// cache uniform locations
|
||||
_patchKernel.uniformSrcBufferTexture
|
||||
= glGetUniformLocation(_patchKernel.program, "vertexBuffer");
|
||||
_patchKernel.uniformSrcOffset
|
||||
= glGetUniformLocation(_patchKernel.program, "srcOffset");
|
||||
|
||||
_patchKernel.uniformPatchArray
|
||||
= glGetUniformLocation(_patchKernel.program, "patchArray");
|
||||
_patchKernel.uniformPatchParamTexture
|
||||
= glGetUniformLocation(_patchKernel.program, "patchParamBuffer");
|
||||
_patchKernel.uniformPatchIndexTexture
|
||||
= glGetUniformLocation(_patchKernel.program, "patchIndexBuffer");
|
||||
|
||||
// create a texture for input buffer
|
||||
if (!_srcBufferTexture) {
|
||||
@ -255,7 +297,7 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
GLuint weightsTexture,
|
||||
int start,
|
||||
int end) const {
|
||||
if (!_program) return false;
|
||||
if (!_stencilKernel.program) return false;
|
||||
int count = end - start;
|
||||
if (count <= 0) {
|
||||
return true;
|
||||
@ -268,25 +310,25 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
glBindVertexArray(vao);
|
||||
|
||||
glEnable(GL_RASTERIZER_DISCARD);
|
||||
glUseProgram(_program);
|
||||
glUseProgram(_stencilKernel.program);
|
||||
|
||||
// Set input VBO as a texture buffer.
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
|
||||
bindTexture(_uniformSrcBufferTexture, _srcBufferTexture, 0);
|
||||
bindTexture(_stencilKernel.uniformSrcBufferTexture, _srcBufferTexture, 0);
|
||||
|
||||
// bind stencil table textures.
|
||||
bindTexture(_uniformSizesTexture, sizesTexture, 1);
|
||||
bindTexture(_uniformOffsetsTexture, offsetsTexture, 2);
|
||||
bindTexture(_uniformIndicesTexture, indicesTexture, 3);
|
||||
bindTexture(_uniformWeightsTexture, weightsTexture, 4);
|
||||
bindTexture(_stencilKernel.uniformSizesTexture, sizesTexture, 1);
|
||||
bindTexture(_stencilKernel.uniformOffsetsTexture, offsetsTexture, 2);
|
||||
bindTexture(_stencilKernel.uniformIndicesTexture, indicesTexture, 3);
|
||||
bindTexture(_stencilKernel.uniformWeightsTexture, weightsTexture, 4);
|
||||
|
||||
// set batch range
|
||||
glUniform1i(_uniformStart, start);
|
||||
glUniform1i(_uniformEnd, end);
|
||||
glUniform1i(_uniformSrcOffset, srcDesc.offset);
|
||||
glUniform1i(_stencilKernel.uniformStart, start);
|
||||
glUniform1i(_stencilKernel.uniformEnd, end);
|
||||
glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
|
||||
|
||||
// The destination buffer is bound at vertex boundary.
|
||||
//
|
||||
@ -347,6 +389,94 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GLXFBEvaluator::EvalPatches(
|
||||
GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, VertexBufferDescriptor const & /*duDesc*/,
|
||||
GLuint dvBuffer, VertexBufferDescriptor const & /*dvDesc*/,
|
||||
int numPatchCoords,
|
||||
GLuint patchCoordsBuffer,
|
||||
const PatchArrayVector &patchArrays,
|
||||
GLuint patchIndexTexture,
|
||||
GLuint patchParamTexture) const {
|
||||
if (!_patchKernel.program) return false;
|
||||
|
||||
if (duBuffer != 0 || dvBuffer != 0) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR,
|
||||
"GLXFBEvaluator doesn't support derivative evaluation yet.\n");
|
||||
}
|
||||
|
||||
// bind vertex array
|
||||
// always create new one, to be safe with multiple contexts (slow though)
|
||||
GLuint vao = 0;
|
||||
glGenVertexArrays(1, &vao);
|
||||
glBindVertexArray(vao);
|
||||
|
||||
glEnable(GL_RASTERIZER_DISCARD);
|
||||
glUseProgram(_patchKernel.program);
|
||||
|
||||
// Set input VBO as a texture buffer.
|
||||
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
|
||||
bindTexture(_patchKernel.uniformSrcBufferTexture, _srcBufferTexture, 0);
|
||||
|
||||
// bind patch index and patch param textures.
|
||||
bindTexture(_patchKernel.uniformPatchParamTexture, patchParamTexture, 1);
|
||||
bindTexture(_patchKernel.uniformPatchIndexTexture, patchIndexTexture, 2);
|
||||
|
||||
// set other uniforms
|
||||
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
|
||||
(const GLint*)&patchArrays[0]);
|
||||
glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
|
||||
|
||||
// input patchcoords
|
||||
glEnableVertexAttribArray(0);
|
||||
glEnableVertexAttribArray(1);
|
||||
int stride = sizeof(int) * 5; // patchcoord = int*5 struct
|
||||
glBindBuffer(GL_ARRAY_BUFFER, patchCoordsBuffer);
|
||||
glVertexAttribIPointer(0, 3, GL_UNSIGNED_INT, stride, (void*)0);
|
||||
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, stride, (void*)(sizeof(int)*3));
|
||||
|
||||
int dstBufferBindOffset =
|
||||
dstDesc.offset - (dstDesc.offset % dstDesc.stride);
|
||||
|
||||
// bind destination buffer
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
0, dstBuffer,
|
||||
dstBufferBindOffset * sizeof(float),
|
||||
numPatchCoords * dstDesc.stride * sizeof(float));
|
||||
|
||||
glBeginTransformFeedback(GL_POINTS);
|
||||
glDrawArrays(GL_POINTS, 0, numPatchCoords);
|
||||
glEndTransformFeedback();
|
||||
|
||||
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
|
||||
|
||||
// unbind textures
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
glActiveTexture(GL_TEXTURE0 + i);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
}
|
||||
|
||||
glDisable(GL_RASTERIZER_DISCARD);
|
||||
glUseProgram(0);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
|
||||
glDisableVertexAttribArray(0);
|
||||
glDisableVertexAttribArray(1);
|
||||
|
||||
// revert vao
|
||||
glBindVertexArray(0);
|
||||
glDeleteVertexArrays(1, &vao);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "../version.h"
|
||||
|
||||
#include "../osd/opengl.h"
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -93,6 +94,12 @@ public:
|
||||
/// Destructor. note that the GL context must be made current.
|
||||
~GLXFBEvaluator();
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static stencil function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
@ -120,25 +127,25 @@ public:
|
||||
///
|
||||
/// @param deviceContext not used in the GLSLTransformFeedback kernel
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
GLXFBEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
GLXFBEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
return instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
|
||||
dstVertexBuffer, dstDesc,
|
||||
bool r = instance->EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
stencilTable);
|
||||
delete instance;
|
||||
return r;
|
||||
@ -147,18 +154,30 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/// Dispatch the GLSL compute kernel on GPU asynchronously.
|
||||
/// returns false if the kernel hasn't been compiled yet.
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
return EvalStencils(srcVertexBuffer->BindVBO(),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindVBO(),
|
||||
dstDesc,
|
||||
/// \brief dispatch eval stencils function.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object for destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable) const {
|
||||
|
||||
return EvalStencils(srcBuffer->BindVBO(), srcDesc,
|
||||
dstBuffer->BindVBO(), dstDesc,
|
||||
stencilTable->GetSizesTexture(),
|
||||
stencilTable->GetOffsetsTexture(),
|
||||
stencilTable->GetIndicesTexture(),
|
||||
@ -167,12 +186,31 @@ public:
|
||||
/* end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// Dispatch the GLSL compute kernel on GPU asynchronously.
|
||||
/// returns false if the kernel hasn't been compiled yet.
|
||||
bool EvalStencils(GLuint srcBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
/// \brief Static eval stencils function, dispatch the GLSL XFB kernel on
|
||||
/// on GPU asynchronously.
|
||||
///
|
||||
/// @param srcBuffer GL buffer of input primvars.
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer GL buffer of output primvars.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizesBuffer GL buffer of the sizes in the stencil table
|
||||
///
|
||||
/// @param offsetsBuffer GL buffer of the offsets in the stencil table
|
||||
///
|
||||
/// @param indicesBuffer GL buffer of the indices in the stencil table
|
||||
///
|
||||
/// @param weightsBuffer GL buffer of the weifgrs in the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
bool EvalStencils(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint sizesBuffer,
|
||||
GLuint offsetsBuffer,
|
||||
GLuint indicesBuffer,
|
||||
@ -180,6 +218,270 @@ public:
|
||||
int start,
|
||||
int end) const;
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindVBO() method returning an
|
||||
/// array of PatchCoord struct in VBO.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext not used in the GLXFB evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
GLXFBEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of source data
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindVBO() method returning a GL
|
||||
/// buffer object of destination data
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer
|
||||
///
|
||||
/// @param duDesc
|
||||
///
|
||||
/// @param dvBuffer
|
||||
///
|
||||
/// @param dvDesc
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindVBO() method returning an
|
||||
/// array of PatchCoord struct in VBO.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
/// @param instance cached compiled instance. Clients are supposed to
|
||||
/// pre-compile an instance of this class and provide
|
||||
/// to this function. If it's null the kernel still
|
||||
/// compute by instantiating on-demand kernel although
|
||||
/// it may cause a performance problem.
|
||||
///
|
||||
/// @param deviceContext not used in the GLXFB evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
GLXFBEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
if (instance) {
|
||||
return instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
} else {
|
||||
// Create an instance on demand (slow)
|
||||
(void)deviceContext; // unused
|
||||
instance = Create(srcDesc, dstDesc);
|
||||
if (instance) {
|
||||
bool r = instance->EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchTable);
|
||||
delete instance;
|
||||
return r;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCudaBuffer() method returning an
|
||||
/// array of PatchCoord struct in cuda memory.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
|
||||
dstBuffer->BindVBO(), dstDesc,
|
||||
0, VertexBufferDescriptor(),
|
||||
0, VertexBufferDescriptor(),
|
||||
numPatchCoords,
|
||||
patchCoords->BindVBO(),
|
||||
patchTable->GetPatchArrays(),
|
||||
patchTable->GetPatchIndexTextureBuffer(),
|
||||
patchTable->GetPatchParamTextureBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output s-derivatives buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output t-derivatives buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable GLPatchTable or equivalent
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable) const {
|
||||
|
||||
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
|
||||
dstBuffer->BindVBO(), dstDesc,
|
||||
duBuffer->BindVBO(), duDesc,
|
||||
dvBuffer->BindVBO(), dvDesc,
|
||||
numPatchCoords,
|
||||
patchCoords->BindVBO(),
|
||||
patchTable->GetPatchArrays(),
|
||||
patchTable->GetPatchIndexTextureBuffer(),
|
||||
patchTable->GetPatchParamTextureBuffer());
|
||||
}
|
||||
|
||||
bool EvalPatches(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
GLuint patchCoordsBuffer,
|
||||
const PatchArrayVector &patchArrays,
|
||||
GLuint patchIndexBuffer,
|
||||
GLuint patchParamsBuffer) const;
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// Configure GLSL kernel. A valid GL context must be made current before
|
||||
/// calling this function. Returns false if it fails to compile the kernel.
|
||||
bool Compile(VertexBufferDescriptor const &srcDesc,
|
||||
@ -189,19 +491,31 @@ public:
|
||||
static void Synchronize(void *kernel);
|
||||
|
||||
private:
|
||||
GLuint _program;
|
||||
|
||||
GLuint _srcBufferTexture;
|
||||
|
||||
GLuint _uniformSrcBufferTexture;
|
||||
GLuint _uniformSizesTexture;
|
||||
GLuint _uniformOffsetsTexture;
|
||||
GLuint _uniformIndicesTexture;
|
||||
GLuint _uniformWeightsTexture;
|
||||
struct _StencilKernel {
|
||||
GLuint program;
|
||||
GLuint uniformSrcBufferTexture;
|
||||
GLuint uniformSrcOffset; // src buffer offset (in elements)
|
||||
|
||||
GLuint uniformSizesTexture;
|
||||
GLuint uniformOffsetsTexture;
|
||||
GLuint uniformIndicesTexture;
|
||||
GLuint uniformWeightsTexture;
|
||||
GLuint uniformStart; // range
|
||||
GLuint uniformEnd;
|
||||
} _stencilKernel;
|
||||
|
||||
struct _PatchKernel {
|
||||
GLuint program;
|
||||
GLuint uniformSrcBufferTexture;
|
||||
GLuint uniformSrcOffset; // src buffer offset (in elements)
|
||||
|
||||
GLuint uniformPatchArray;
|
||||
GLuint uniformPatchParamTexture;
|
||||
GLuint uniformPatchIndexTexture;
|
||||
} _patchKernel;
|
||||
|
||||
GLuint _uniformStart; // range
|
||||
GLuint _uniformEnd;
|
||||
GLuint _uniformSrcOffset; // src buffer offset (in elements)
|
||||
};
|
||||
|
||||
} // end namespace Osd
|
||||
|
@ -24,18 +24,10 @@
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
uniform int batchStart = 0;
|
||||
uniform int batchEnd = 0;
|
||||
uniform int srcOffset = 0;
|
||||
uniform int dstOffset = 0;
|
||||
|
||||
layout(binding=0) buffer src_buffer { float srcVertexBuffer[]; };
|
||||
layout(binding=1) buffer dst_buffer { float dstVertexBuffer[]; };
|
||||
layout(binding=2) buffer stencilSizes { int _sizes[]; };
|
||||
layout(binding=3) buffer stencilOffsets { int _offsets[]; };
|
||||
layout(binding=4) buffer stencilIndices { int _indices[]; };
|
||||
layout(binding=5) buffer stencilWeights { float _weights[]; };
|
||||
|
||||
layout(local_size_x=WORK_GROUP_SIZE, local_size_y=1, local_size_z=1) in;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -73,6 +65,15 @@ void addWithWeight(inout Vertex v, const Vertex src, float weight) {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
|
||||
|
||||
uniform int batchStart = 0;
|
||||
uniform int batchEnd = 0;
|
||||
layout(binding=2) buffer stencilSizes { int _sizes[]; };
|
||||
layout(binding=3) buffer stencilOffsets { int _offsets[]; };
|
||||
layout(binding=4) buffer stencilIndices { int _indices[]; };
|
||||
layout(binding=5) buffer stencilWeights { float _weights[]; };
|
||||
|
||||
void main() {
|
||||
|
||||
int current = int(gl_GlobalInvocationID.x) + batchStart;
|
||||
@ -94,4 +95,194 @@ void main() {
|
||||
writeVertex(current, dst);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES)
|
||||
|
||||
// PERFORMANCE: stride could be constant, but not as significant as length
|
||||
|
||||
//struct PatchArray {
|
||||
// int patchType;
|
||||
// int numPatches;
|
||||
// int indexBase; // an offset within the index buffer
|
||||
// int primitiveIdBase; // an offset within the patch param buffer
|
||||
//};
|
||||
// # of patcharrays is 1 or 2.
|
||||
|
||||
uniform ivec4 patchArray[2];
|
||||
uniform ivec3 dstDuDesc;
|
||||
uniform ivec3 dstDvDesc;
|
||||
layout(binding=2) buffer du_buffer { float dstDuBuffer[]; };
|
||||
layout(binding=3) buffer dv_buffer { float dstDvBuffer[]; };
|
||||
|
||||
struct PatchCoord {
|
||||
int arrayIndex;
|
||||
int patchIndex;
|
||||
int vertIndex;
|
||||
float s;
|
||||
float t;
|
||||
};
|
||||
|
||||
struct PatchParam {
|
||||
int faceIndex;
|
||||
uint patchBits;
|
||||
float sharpness;
|
||||
};
|
||||
|
||||
layout(binding=4) buffer patchCoord_buffer { PatchCoord patchCoords[]; };
|
||||
layout(binding=5) buffer patchIndex_buffer { int patchIndexBuffer[]; };
|
||||
layout(binding=6) buffer patchParam_buffer { PatchParam patchParamBuffer[]; };
|
||||
|
||||
void writeDu(int index, Vertex du) {
|
||||
int duIndex = dstDuDesc.x + index * dstDuDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
dstDuBuffer[duIndex + i] = du.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDv(int index, Vertex dv) {
|
||||
int dvIndex = dstDvDesc.x + index * dstDvDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
dstDvBuffer[dvIndex + i] = dv.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void getBSplineWeights(float t, inout vec4 point, inout vec4 deriv) {
|
||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
||||
float one6th = 1.0f / 6.0f;
|
||||
|
||||
float t2 = t * t;
|
||||
float t3 = t * t2;
|
||||
|
||||
point.x = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
||||
point.y = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
||||
point.z = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
||||
point.w = one6th * ( t3);
|
||||
|
||||
// Derivatives of the above four basis functions at t:
|
||||
deriv.x = -0.5f*t2 + t - 0.5f;
|
||||
deriv.y = 1.5f*t2 - 2.0f*t;
|
||||
deriv.z = -1.5f*t2 + t + 0.5f;
|
||||
deriv.w = 0.5f*t2;
|
||||
}
|
||||
|
||||
uint getDepth(uint patchBits) {
|
||||
return (patchBits & 0x7);
|
||||
}
|
||||
|
||||
float getParamFraction(uint patchBits) {
|
||||
uint nonQuadRoot = (patchBits >> 3) & 0x1;
|
||||
uint depth = getDepth(patchBits);
|
||||
if (nonQuadRoot == 1) {
|
||||
return 1.0f / float( 1 << (depth-1) );
|
||||
} else {
|
||||
return 1.0f / float( 1 << depth );
|
||||
}
|
||||
}
|
||||
|
||||
vec2 normalizePatchCoord(uint patchBits, vec2 uv) {
|
||||
float frac = getParamFraction(patchBits);
|
||||
|
||||
uint iu = (patchBits >> 22) & 0x3ff;
|
||||
uint iv = (patchBits >> 12) & 0x3ff;
|
||||
|
||||
// top left corner
|
||||
float pu = float(iu*frac);
|
||||
float pv = float(iv*frac);
|
||||
|
||||
// normalize u,v coordinates
|
||||
return vec2((uv.x - pu) / frac, (uv.y - pv) / frac);
|
||||
}
|
||||
|
||||
void adjustBoundaryWeights(uint bits, inout vec4 sWeights, inout vec4 tWeights) {
|
||||
uint boundary = ((bits >> 4) & 0xf);
|
||||
|
||||
if ((boundary & 1) != 0) {
|
||||
tWeights[2] -= tWeights[0];
|
||||
tWeights[1] += 2*tWeights[0];
|
||||
tWeights[0] = 0;
|
||||
}
|
||||
if ((boundary & 2) != 0) {
|
||||
sWeights[1] -= sWeights[3];
|
||||
sWeights[2] += 2*sWeights[3];
|
||||
sWeights[3] = 0;
|
||||
}
|
||||
if ((boundary & 4) != 0) {
|
||||
tWeights[1] -= tWeights[3];
|
||||
tWeights[2] += 2*tWeights[3];
|
||||
tWeights[3] = 0;
|
||||
}
|
||||
if ((boundary & 8) != 0) {
|
||||
sWeights[2] -= sWeights[0];
|
||||
sWeights[1] += 2*sWeights[0];
|
||||
sWeights[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
|
||||
int current = int(gl_GlobalInvocationID.x);
|
||||
|
||||
PatchCoord coord = patchCoords[current];
|
||||
int patchIndex = coord.patchIndex;
|
||||
|
||||
ivec4 array = patchArray[coord.arrayIndex];
|
||||
int patchType = 6; // array.x XXX: REGULAR only for now.
|
||||
int numControlVertices = 16;
|
||||
|
||||
uint patchBits = patchParamBuffer[patchIndex].patchBits;
|
||||
vec2 uv = normalizePatchCoord(patchBits, vec2(coord.s, coord.t));
|
||||
float dScale = float(1 << getDepth(patchBits));
|
||||
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
if (patchType == 6) { // REGULAR
|
||||
vec4 sWeights, tWeights, dsWeights, dtWeights;
|
||||
getBSplineWeights(uv.x, sWeights, dsWeights);
|
||||
getBSplineWeights(uv.y, tWeights, dtWeights);
|
||||
|
||||
adjustBoundaryWeights(patchBits, sWeights, tWeights);
|
||||
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
wP[4*k+l] = sWeights[l] * tWeights[k];
|
||||
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
|
||||
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// TODO: GREGORY BASIS
|
||||
}
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
int indexBase = array.z + coord.vertIndex;
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(dst, readVertex(index), wP[i]);
|
||||
}
|
||||
writeVertex(current, dst);
|
||||
|
||||
if (dstDuDesc.y > 0) { // length
|
||||
Vertex du;
|
||||
clear(du);
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(du, readVertex(index), wDs[i]);
|
||||
}
|
||||
writeDu(current, du);
|
||||
}
|
||||
if (dstDvDesc.y > 0) {
|
||||
Vertex dv;
|
||||
clear(dv);
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(dv, readVertex(index), wDt[i]);
|
||||
}
|
||||
writeDv(current, dv);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -25,18 +25,8 @@
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
uniform samplerBuffer vertexBuffer;
|
||||
|
||||
out float outVertexBuffer[LENGTH];
|
||||
|
||||
uniform usamplerBuffer sizes;
|
||||
uniform isamplerBuffer offsets;
|
||||
uniform isamplerBuffer indices;
|
||||
uniform samplerBuffer weights;
|
||||
|
||||
uniform int batchStart = 0;
|
||||
uniform int batchEnd = 0;
|
||||
|
||||
uniform int srcOffset = 0;
|
||||
out float outVertexBuffer[LENGTH];
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
@ -72,6 +62,16 @@ void writeVertex(Vertex v) {
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS)
|
||||
|
||||
uniform usamplerBuffer sizes;
|
||||
uniform isamplerBuffer offsets;
|
||||
uniform isamplerBuffer indices;
|
||||
uniform samplerBuffer weights;
|
||||
uniform int batchStart = 0;
|
||||
uniform int batchEnd = 0;
|
||||
|
||||
void main() {
|
||||
|
||||
int current = gl_VertexID + batchStart;
|
||||
@ -97,4 +97,146 @@ void main() {
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES)
|
||||
|
||||
layout (location = 0) in ivec3 patchHandles;
|
||||
layout (location = 1) in vec2 patchCoords;
|
||||
|
||||
//struct PatchArray {
|
||||
// int patchType;
|
||||
// int numPatches;
|
||||
// int indexBase; // an offset within the index buffer
|
||||
// int primitiveIdBase; // an offset within the patch param buffer
|
||||
//};
|
||||
// # of patcharrays is 1 or 2.
|
||||
|
||||
uniform ivec4 patchArray[2];
|
||||
uniform isamplerBuffer patchParamBuffer;
|
||||
uniform isamplerBuffer patchIndexBuffer;
|
||||
|
||||
void getBSplineWeights(float t, inout vec4 point, vec4 deriv) {
|
||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
||||
float one6th = 1.0f / 6.0f;
|
||||
|
||||
float t2 = t * t;
|
||||
float t3 = t * t2;
|
||||
|
||||
point.x = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
||||
point.y = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
||||
point.z = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
||||
point.w = one6th * ( t3);
|
||||
|
||||
// Derivatives of the above four basis functions at t:
|
||||
/* if (deriv) { */
|
||||
/* deriv[0] = -0.5f*t2 + t - 0.5f; */
|
||||
/* deriv[1] = 1.5f*t2 - 2.0f*t; */
|
||||
/* deriv[2] = -1.5f*t2 + t + 0.5f; */
|
||||
/* deriv[3] = 0.5f*t2; */
|
||||
/* } */
|
||||
}
|
||||
|
||||
uint getDepth(uint patchBits) {
|
||||
return (patchBits & 0x7);
|
||||
}
|
||||
|
||||
float getParamFraction(uint patchBits) {
|
||||
uint nonQuadRoot = (patchBits >> 3) & 0x1;
|
||||
uint depth = getDepth(patchBits);
|
||||
if (nonQuadRoot == 1) {
|
||||
return 1.0f / float( 1 << (depth-1) );
|
||||
} else {
|
||||
return 1.0f / float( 1 << depth );
|
||||
}
|
||||
}
|
||||
|
||||
vec2 normalizePatchCoord(uint patchBits, vec2 uv) {
|
||||
float frac = getParamFraction(patchBits);
|
||||
|
||||
uint iu = (patchBits >> 22) & 0x3ff;
|
||||
uint iv = (patchBits >> 12) & 0x3ff;
|
||||
|
||||
// top left corner
|
||||
float pu = float(iu*frac);
|
||||
float pv = float(iv*frac);
|
||||
|
||||
// normalize u,v coordinates
|
||||
return vec2((uv.x - pu) / frac, (uv.y - pv) / frac);
|
||||
}
|
||||
|
||||
void adjustBoundaryWeights(uint bits, inout vec4 sWeights, inout vec4 tWeights) {
|
||||
uint boundary = ((bits >> 4) & 0xf);
|
||||
|
||||
if ((boundary & 1) != 0) {
|
||||
tWeights[2] -= tWeights[0];
|
||||
tWeights[1] += 2*tWeights[0];
|
||||
tWeights[0] = 0;
|
||||
}
|
||||
if ((boundary & 2) != 0) {
|
||||
sWeights[1] -= sWeights[3];
|
||||
sWeights[2] += 2*sWeights[3];
|
||||
sWeights[3] = 0;
|
||||
}
|
||||
if ((boundary & 4) != 0) {
|
||||
tWeights[1] -= tWeights[3];
|
||||
tWeights[2] += 2*tWeights[3];
|
||||
tWeights[3] = 0;
|
||||
}
|
||||
if ((boundary & 8) != 0) {
|
||||
sWeights[2] -= sWeights[0];
|
||||
sWeights[1] += 2*sWeights[0];
|
||||
sWeights[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
int current = gl_VertexID;
|
||||
|
||||
ivec3 handle = patchHandles;
|
||||
int patchIndex = handle.y;
|
||||
|
||||
vec2 coord = patchCoords;
|
||||
ivec4 array = patchArray[handle.x];
|
||||
int patchType = array.x;
|
||||
int numControlVertices = 16;
|
||||
|
||||
uint patchBits = texelFetch(patchParamBuffer, patchIndex).y;
|
||||
|
||||
// normalize
|
||||
coord = normalizePatchCoord(patchBits, coord);
|
||||
|
||||
// XXX: dScale for derivative
|
||||
|
||||
// if regular
|
||||
float wP[20];
|
||||
{
|
||||
vec4 sWeights, tWeights, dsWeights, dtWeights;
|
||||
getBSplineWeights(coord.s, sWeights, dsWeights);
|
||||
getBSplineWeights(coord.t, tWeights, dtWeights);
|
||||
|
||||
adjustBoundaryWeights(patchBits, sWeights, tWeights);
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
wP[4*k+l] = sWeights[l] * tWeights[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Vertex dst;
|
||||
clear(dst);
|
||||
|
||||
int indexBase = array.z + handle.z;
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = texelFetch(patchIndexBuffer, indexBase + i).x;
|
||||
addWithWeight(dst, readVertex(index), wP[i]);
|
||||
}
|
||||
|
||||
writeVertex(dst);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -171,7 +171,12 @@ public:
|
||||
|
||||
for(typename Evaluators::iterator it = _evaluators.begin();
|
||||
it != _evaluators.end(); ++it) {
|
||||
if (it->srcDesc.length == srcDesc.length and
|
||||
// Note: XFB kernel needs to be configured with the local offset
|
||||
// of the dstDesc to skip preceding primvars.
|
||||
int dstOffset1 = it->dstDesc.offset % it->dstDesc.stride;
|
||||
int dstOffset2 = dstDesc.offset % dstDesc.stride;
|
||||
if (dstOffset1 == dstOffset2 and
|
||||
it->srcDesc.length == srcDesc.length and
|
||||
it->srcDesc.stride == srcDesc.stride and
|
||||
it->dstDesc.length == dstDesc.length and
|
||||
it->dstDesc.stride == dstDesc.stride) {
|
||||
|
@ -24,6 +24,7 @@
|
||||
|
||||
#include "../osd/ompEvaluator.h"
|
||||
#include "../osd/ompKernel.h"
|
||||
#include "../far/patchBasis.h"
|
||||
#include <omp.h>
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -33,24 +34,212 @@ namespace Osd {
|
||||
|
||||
/* static */
|
||||
bool
|
||||
OmpEvaluator::EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
if (end <= start) return true;
|
||||
OmpEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
|
||||
// we can probably expand cpuKernel.cpp to here.
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
// XXX: we can probably expand cpuKernel.cpp to here.
|
||||
OmpEvalStencils(src, srcDesc, dst, dstDesc,
|
||||
sizes, offsets, indices, weights, start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
OmpEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
if (srcDesc.length != duDesc.length) return false;
|
||||
if (srcDesc.length != dvDesc.length) return false;
|
||||
|
||||
OmpEvalStencils(src, srcDesc,
|
||||
dst, dstDesc,
|
||||
du, duDesc,
|
||||
dv, dvDesc,
|
||||
sizes, offsets, indices,
|
||||
weights, duWeights, dvWeights,
|
||||
start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct BufferAdapter {
|
||||
BufferAdapter(T *p, int length, int stride) :
|
||||
_p(p), _length(length), _stride(stride) { }
|
||||
void Clear() {
|
||||
for (int i = 0; i < _length; ++i) _p[i] = 0;
|
||||
}
|
||||
void AddWithWeight(T const *src, float w) {
|
||||
if (_p) {
|
||||
// TODO: derivatives.
|
||||
for (int i = 0; i < _length; ++i) {
|
||||
_p[i] += src[i] * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
const T *operator[] (int index) const {
|
||||
return _p + _stride * index;
|
||||
}
|
||||
BufferAdapter<T> & operator ++() {
|
||||
if (_p) {
|
||||
_p += _stride;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
T *_p;
|
||||
int _length;
|
||||
int _stride;
|
||||
};
|
||||
|
||||
/* static */
|
||||
bool
|
||||
OmpEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer){
|
||||
|
||||
src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
else return false;
|
||||
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
|
||||
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
|
||||
|
||||
int patchType = array.GetPatchType();
|
||||
// XXX: patchIndex is absolute. not sure it's consistent.
|
||||
// (should be offsetted by array.primitiveIdBase?)
|
||||
// patchParamBuffer[array.primitiveIdBase + coord.handle.patchIndex]
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
const int *cvs =
|
||||
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
OmpEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer) {
|
||||
|
||||
src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
|
||||
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
|
||||
BufferAdapter<float> duT(du + duDesc.stride*i, duDesc.length, duDesc.stride);
|
||||
BufferAdapter<float> dvT(dv + dvDesc.stride*i, dvDesc.length, dvDesc.stride);
|
||||
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
|
||||
|
||||
int patchType = array.GetPatchType();
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
const int *cvs =
|
||||
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
duT.Clear();
|
||||
dvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
duT.AddWithWeight(srcT[cvs[j]], wDs[j]);
|
||||
dvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
|
||||
}
|
||||
++dstT;
|
||||
++duT;
|
||||
++dvT;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
void
|
||||
OmpEvaluator::Synchronize(void * /*deviceContext*/) {
|
||||
|
@ -28,7 +28,7 @@
|
||||
#include "../version.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
@ -38,9 +38,15 @@ namespace Osd {
|
||||
|
||||
class OmpEvaluator {
|
||||
public:
|
||||
/// \brief Generic static compute function. This function has a same
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static eval stencils function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
/// in the same way from OsdMesh template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
@ -54,29 +60,27 @@ public:
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the omp kernel
|
||||
/// @param instance not used in the omp kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the omp kernel
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
OmpEvaluator const * instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused;
|
||||
(void)deviceContext; // unused;
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const OmpEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
return EvalStencils(srcVertexBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
@ -85,17 +89,376 @@ public:
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// stencil compute function.
|
||||
static bool EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start,
|
||||
int end);
|
||||
/// \brief Static eval stencils function which takes raw CPU pointers for
|
||||
/// input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
/// to apply for the range [start, end)
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the omp kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the omp kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const OmpEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
&stencilTable->GetDuWeights()[0],
|
||||
&stencilTable->GetDvWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw CPU pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the omp evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the omp evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
OmpEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the omp evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the omp evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
OmpEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
|
||||
// ideally all buffer classes should have templated by datatype
|
||||
// so that downcast isn't needed there.
|
||||
// (e.g. Osd::CpuBuffer<PatchCoord> )
|
||||
//
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the du buffer
|
||||
///
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dv buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
static void Synchronize(void *deviceContext = NULL);
|
||||
|
||||
|
@ -117,10 +117,82 @@ OmpEvalStencils(float const * src,
|
||||
|
||||
copy(dst, i, threadResult, dstDesc);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OmpEvalStencils(float const * src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float * dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float * dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float * dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
int start, int end) {
|
||||
if (start > 0) {
|
||||
sizes += start;
|
||||
indices += offsets[start];
|
||||
weights += offsets[start];
|
||||
duWeights += offsets[start];
|
||||
dvWeights += offsets[start];
|
||||
}
|
||||
|
||||
src += srcDesc.offset;
|
||||
dst += dstDesc.offset;
|
||||
dstDu += dstDuDesc.offset;
|
||||
dstDv += dstDvDesc.offset;
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
int n = end - start;
|
||||
|
||||
float * result = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
float * resultDu = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
float * resultDv = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < n; ++i) {
|
||||
|
||||
int index = i + (start > 0 ? start : 0); // Stencil index
|
||||
|
||||
// Get thread-local pointers
|
||||
int const * threadIndices = indices + offsets[index];
|
||||
float const * threadWeights = weights + offsets[index];
|
||||
float const * threadWeightsDu = duWeights + offsets[index];
|
||||
float const * threadWeightsDv = dvWeights + offsets[index];
|
||||
|
||||
int threadId = omp_get_thread_num();
|
||||
|
||||
float * threadResult = result + threadId*srcDesc.length;
|
||||
float * threadResultDu = resultDu + threadId*srcDesc.length;
|
||||
float * threadResultDv = resultDv + threadId*srcDesc.length;
|
||||
|
||||
clear(threadResult, dstDesc);
|
||||
clear(threadResultDu, dstDuDesc);
|
||||
clear(threadResultDv, dstDvDesc);
|
||||
|
||||
for (int j=0; j<(int)sizes[index]; ++j) {
|
||||
addWithWeight(threadResult, src,
|
||||
threadIndices[j], threadWeights[j], srcDesc);
|
||||
addWithWeight(threadResultDu, src,
|
||||
threadIndices[j], threadWeightsDu[j], srcDesc);
|
||||
addWithWeight(threadResultDv, src,
|
||||
threadIndices[j], threadWeightsDv[j], srcDesc);
|
||||
}
|
||||
|
||||
copy(dst, i, threadResult, dstDesc);
|
||||
copy(dstDu, i, threadResultDu, dstDuDesc);
|
||||
copy(dstDv, i, threadResultDv, dstDvDesc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} // end namespace Osd
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
} // end namespace OpenSubdiv
|
||||
|
@ -45,6 +45,23 @@ OmpEvalStencils(float const * src,
|
||||
float const * weights,
|
||||
int start, int end);
|
||||
|
||||
void
|
||||
OmpEvalStencils(float const * src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float * dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float * dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float * dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -34,15 +34,15 @@ namespace Osd {
|
||||
|
||||
/* static */
|
||||
bool
|
||||
TbbEvaluator::EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
TbbEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
|
||||
TbbEvalStencils(src, srcDesc, dst, dstDesc,
|
||||
@ -51,6 +51,82 @@ TbbEvaluator::EvalStencils(const float *src,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
TbbEvaluator::EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
if (srcDesc.length != duDesc.length) return false;
|
||||
if (srcDesc.length != dvDesc.length) return false;
|
||||
|
||||
TbbEvalStencils(src, srcDesc,
|
||||
dst, dstDesc,
|
||||
du, duDesc,
|
||||
dv, dvDesc,
|
||||
sizes, offsets, indices,
|
||||
weights, duWeights, dvWeights,
|
||||
start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
TbbEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
TbbEvalPatches(src, srcDesc, dst, dstDesc,
|
||||
NULL, VertexBufferDescriptor(),
|
||||
NULL, VertexBufferDescriptor(),
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
TbbEvaluator::EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
TbbEvalPatches(src, srcDesc, dst, dstDesc,
|
||||
du, duDesc, dv, dvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
void
|
||||
TbbEvaluator::Synchronize(void *) {
|
||||
|
@ -26,7 +26,9 @@
|
||||
#define OPENSUBDIV3_OSD_TBB_EVALUATOR_H
|
||||
|
||||
#include "../version.h"
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
#include "../far/patchTable.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
@ -37,9 +39,15 @@ namespace Osd {
|
||||
|
||||
class TbbEvaluator {
|
||||
public:
|
||||
/// \brief Generic static stencil eval function. This function has a same
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static eval stencils function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// transparently from OsdMesh template interface.
|
||||
/// in the same way from OsdMesh template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
@ -55,27 +63,25 @@ public:
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
///
|
||||
/// @param instance not used in the tbb kernel
|
||||
/// @param instance not used in the tbb kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the tbb kernel
|
||||
///
|
||||
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
VERTEX_BUFFER *dstVertexBuffer,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void *deviceContext = NULL) {
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void *deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcVertexBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstVertexBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
@ -84,19 +90,381 @@ public:
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
static bool EvalStencils(const float *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
const int *sizes,
|
||||
const int *offsets,
|
||||
const int *indices,
|
||||
const float *weights,
|
||||
int start,
|
||||
int end);
|
||||
/// \brief Static eval stencils function which takes raw CPU pointers for
|
||||
/// input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
/// to apply for the range [start, end)
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
const int *sizes,
|
||||
const int *offsets,
|
||||
const int *indices,
|
||||
const float *weights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
///
|
||||
/// @param instance not used in the tbb kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the tbb kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const TbbEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
&stencilTable->GetDuWeights()[0],
|
||||
&stencilTable->GetDvWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw CPU pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output s-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dv Output t-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
/// to apply for the range [start, end)
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the u-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the v-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(),
|
||||
srcDesc,
|
||||
dstBuffer->BindCpuBuffer(),
|
||||
dstDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output s-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output t-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable Far::PatchTable
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(
|
||||
srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output s-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the du buffer
|
||||
///
|
||||
/// @param dv Output t-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dv buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, VertexBufferDescriptor const &srcDesc,
|
||||
float *dst, VertexBufferDescriptor const &dstDesc,
|
||||
float *du, VertexBufferDescriptor const &duDesc,
|
||||
float *dv, VertexBufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief synchronize all asynchronous computation invoked on this device.
|
||||
static void Synchronize(void *deviceContext = NULL);
|
||||
|
||||
/// \brief initialize tbb task schedular
|
||||
/// (optional: client may use tbb::task_scheduler_init)
|
||||
///
|
||||
/// @param numThreads how many threads
|
||||
///
|
||||
static void SetNumThreads(int numThreads);
|
||||
};
|
||||
|
||||
|
@ -24,7 +24,9 @@
|
||||
|
||||
#include "../osd/cpuKernel.h"
|
||||
#include "../osd/tbbKernel.h"
|
||||
#include "../osd/types.h"
|
||||
#include "../osd/vertexDescriptor.h"
|
||||
#include "../far/patchBasis.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
@ -187,6 +189,274 @@ TbbEvalStencils(float const * src,
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
void
|
||||
TbbEvalStencils(float const * src, VertexBufferDescriptor const &srcDesc,
|
||||
float * dst, VertexBufferDescriptor const &dstDesc,
|
||||
float * du, VertexBufferDescriptor const &duDesc,
|
||||
float * dv, VertexBufferDescriptor const &dvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
int start, int end) {
|
||||
if (start > 0) {
|
||||
sizes += start;
|
||||
indices += offsets[start];
|
||||
weights += offsets[start];
|
||||
duWeights += offsets[start];
|
||||
dvWeights += offsets[start];
|
||||
}
|
||||
|
||||
if (src) src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
|
||||
// PERFORMANCE: need to combine 3 launches together
|
||||
if (dst) {
|
||||
TBBStencilKernel kernel(src, srcDesc, dst, dstDesc,
|
||||
sizes, offsets, indices, weights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
if (du) {
|
||||
TBBStencilKernel kernel(src, srcDesc, du, duDesc,
|
||||
sizes, offsets, indices, duWeights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
if (dv) {
|
||||
TBBStencilKernel kernel(src, srcDesc, dv, dvDesc,
|
||||
sizes, offsets, indices, dvWeights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
struct BufferAdapter {
|
||||
BufferAdapter(T *p, int length, int stride) :
|
||||
_p(p), _length(length), _stride(stride) { }
|
||||
void Clear() {
|
||||
for (int i = 0; i < _length; ++i) _p[i] = 0;
|
||||
}
|
||||
void AddWithWeight(T const *src, float w) {
|
||||
if (_p) {
|
||||
for (int i = 0; i < _length; ++i) {
|
||||
_p[i] += src[i] * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
const T *operator[] (int index) const {
|
||||
return _p + _stride * index;
|
||||
}
|
||||
BufferAdapter<T> & operator ++() {
|
||||
if (_p) {
|
||||
_p += _stride;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
T *_p;
|
||||
int _length;
|
||||
int _stride;
|
||||
};
|
||||
|
||||
class TbbEvalPatchesKernel {
|
||||
VertexBufferDescriptor _srcDesc;
|
||||
VertexBufferDescriptor _dstDesc;
|
||||
VertexBufferDescriptor _dstDuDesc;
|
||||
VertexBufferDescriptor _dstDvDesc;
|
||||
float const * _src;
|
||||
float * _dst;
|
||||
float * _dstDu;
|
||||
float * _dstDv;
|
||||
int _numPatchCoords;
|
||||
const PatchCoord *_patchCoords;
|
||||
const PatchArray *_patchArrayBuffer;
|
||||
const int *_patchIndexBuffer;
|
||||
const PatchParam *_patchParamBuffer;
|
||||
|
||||
public:
|
||||
TbbEvalPatchesKernel(float const *src,
|
||||
VertexBufferDescriptor srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor dstDesc,
|
||||
float *dstDu,
|
||||
VertexBufferDescriptor dstDuDesc,
|
||||
float *dstDv,
|
||||
VertexBufferDescriptor dstDvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) :
|
||||
_srcDesc(srcDesc), _dstDesc(dstDesc),
|
||||
_dstDuDesc(dstDuDesc), _dstDvDesc(dstDvDesc),
|
||||
_src(src), _dst(dst), _dstDu(dstDu), _dstDv(dstDv),
|
||||
_numPatchCoords(numPatchCoords),
|
||||
_patchCoords(patchCoords),
|
||||
_patchArrayBuffer(patchArrayBuffer),
|
||||
_patchIndexBuffer(patchIndexBuffer),
|
||||
_patchParamBuffer(patchParamBuffer) {
|
||||
}
|
||||
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
if (_dstDu == NULL && _dstDv == NULL) {
|
||||
compute(r);
|
||||
} else {
|
||||
computeWithDerivative(r);
|
||||
}
|
||||
}
|
||||
|
||||
void compute(tbb::blocked_range<int> const &r) const {
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
|
||||
_srcDesc.length,
|
||||
_srcDesc.stride);
|
||||
BufferAdapter<float> dstT(_dst + _dstDesc.offset
|
||||
+ r.begin() * _dstDesc.stride,
|
||||
_dstDesc.length,
|
||||
_dstDesc.stride);
|
||||
|
||||
BufferAdapter<float> dstDuT(_dstDu,
|
||||
_dstDuDesc.length,
|
||||
_dstDuDesc.stride);
|
||||
BufferAdapter<float> dstDvT(_dstDv,
|
||||
_dstDvDesc.length,
|
||||
_dstDvDesc.stride);
|
||||
|
||||
for (int i = r.begin(); i < r.end(); ++i) {
|
||||
PatchCoord const &coord = _patchCoords[i];
|
||||
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
|
||||
|
||||
int patchType = array.GetPatchType();
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&_patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
const int *cvs =
|
||||
&_patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
}
|
||||
++dstT;
|
||||
}
|
||||
}
|
||||
|
||||
void computeWithDerivative(tbb::blocked_range<int> const &r) const {
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
|
||||
_srcDesc.length,
|
||||
_srcDesc.stride);
|
||||
BufferAdapter<float> dstT(_dst + _dstDesc.offset
|
||||
+ r.begin() * _dstDesc.stride,
|
||||
_dstDesc.length,
|
||||
_dstDesc.stride);
|
||||
BufferAdapter<float> dstDuT(_dstDu + _dstDuDesc.offset
|
||||
+ r.begin() * _dstDuDesc.stride,
|
||||
_dstDuDesc.length,
|
||||
_dstDuDesc.stride);
|
||||
BufferAdapter<float> dstDvT(_dstDv + _dstDvDesc.offset
|
||||
+ r.begin() * _dstDvDesc.stride,
|
||||
_dstDvDesc.length,
|
||||
_dstDvDesc.stride);
|
||||
|
||||
for (int i = r.begin(); i < r.end(); ++i) {
|
||||
PatchCoord const &coord = _patchCoords[i];
|
||||
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
|
||||
|
||||
int patchType = array.GetPatchType();
|
||||
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
|
||||
&_patchParamBuffer[coord.handle.patchIndex].patchBits;
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(patchBits,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
const int *cvs =
|
||||
&_patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
|
||||
|
||||
dstT.Clear();
|
||||
dstDuT.Clear();
|
||||
dstDvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
dstDuT.AddWithWeight(srcT[cvs[j]], wDs[j]);
|
||||
dstDvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
|
||||
}
|
||||
++dstT;
|
||||
++dstDuT;
|
||||
++dstDvT;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
TbbEvalPatches(float const *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float *dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
TbbEvalPatchesKernel kernel(src, srcDesc, dst, dstDesc,
|
||||
dstDu, dstDuDesc, dstDv, dstDvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer,
|
||||
patchIndexBuffer,
|
||||
patchParamBuffer);
|
||||
|
||||
tbb::blocked_range<int> range(0, numPatchCoords, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
|
||||
}
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -32,6 +32,9 @@ namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
struct PatchArray;
|
||||
struct PatchCoord;
|
||||
struct PatchParam;
|
||||
struct VertexBufferDescriptor;
|
||||
|
||||
void
|
||||
@ -45,6 +48,38 @@ TbbEvalStencils(float const * src,
|
||||
float const * weights,
|
||||
int start, int end);
|
||||
|
||||
void
|
||||
TbbEvalStencils(float const * src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float * dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float * dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float * dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
void
|
||||
TbbEvalPatches(float const *src,
|
||||
VertexBufferDescriptor const &srcDesc,
|
||||
float *dst,
|
||||
VertexBufferDescriptor const &dstDesc,
|
||||
float *dstDu,
|
||||
VertexBufferDescriptor const &dstDuDesc,
|
||||
float *dstDv,
|
||||
VertexBufferDescriptor const &dstDvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
111
opensubdiv/osd/types.h
Normal file
111
opensubdiv/osd/types.h
Normal file
@ -0,0 +1,111 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OPENSUBDIV3_OSD_TYPES_H
|
||||
#define OPENSUBDIV3_OSD_TYPES_H
|
||||
|
||||
#include "../version.h"
|
||||
#include "../far/patchTable.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
/// \brief Coordinates set on a patch table
|
||||
///
|
||||
/// XXX: this class may be moved into Far
|
||||
///
|
||||
struct PatchCoord {
|
||||
// 5-ints struct.
|
||||
|
||||
/// \brief Constructor
|
||||
///
|
||||
/// @param handle patch handle
|
||||
///
|
||||
/// @param s parametric location on the patch
|
||||
///
|
||||
/// @param t parametric location on the patch
|
||||
///
|
||||
PatchCoord(Far::PatchTable::PatchHandle handle, float s, float t) :
|
||||
handle(handle), s(s), t(t) { }
|
||||
|
||||
PatchCoord() : s(0), t(0) {
|
||||
handle.arrayIndex = 0;
|
||||
handle.patchIndex = 0;
|
||||
handle.vertIndex = 0;
|
||||
}
|
||||
|
||||
Far::PatchTable::PatchHandle handle; ///< patch handle
|
||||
float s, t; ///< parametric location on patch
|
||||
};
|
||||
|
||||
struct PatchArray {
|
||||
// 4-ints struct.
|
||||
PatchArray(Far::PatchDescriptor desc, int numPatches,
|
||||
int indexBase, int primitiveIdBase) :
|
||||
desc(desc), numPatches(numPatches), indexBase(indexBase),
|
||||
primitiveIdBase(primitiveIdBase) {}
|
||||
|
||||
Far::PatchDescriptor const &GetDescriptor() const {
|
||||
return desc;
|
||||
}
|
||||
|
||||
int GetPatchType() const {
|
||||
return desc.GetType();
|
||||
}
|
||||
int GetNumPatches() const {
|
||||
return numPatches;
|
||||
}
|
||||
int GetIndexBase() const {
|
||||
return indexBase;
|
||||
}
|
||||
int GetPrimitiveIdBase() const {
|
||||
return primitiveIdBase;
|
||||
}
|
||||
Far::PatchDescriptor desc;
|
||||
int numPatches;
|
||||
int indexBase; // an offset within the index buffer
|
||||
int primitiveIdBase; // an offset within the patch param buffer
|
||||
};
|
||||
|
||||
struct PatchParam {
|
||||
// int3 struct.
|
||||
int faceIndex;
|
||||
unsigned int patchBits;
|
||||
float sharpness;
|
||||
};
|
||||
|
||||
typedef std::vector<PatchArray> PatchArrayVector;
|
||||
typedef std::vector<PatchParam> PatchParamVector;
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
} // end namespace OpenSubdiv
|
||||
|
||||
|
||||
#endif // OPENSUBDIV3_OSD_TYPES_H
|
Loading…
Reference in New Issue
Block a user