Merge pull request #526 from takahito-tejima/dev

Osd API refactor: EvalStencils and EvalPatches
This commit is contained in:
David G Yu 2015-05-26 17:07:43 -04:00
commit f5008a544e
49 changed files with 5858 additions and 1038 deletions

6
examples/dxPtexViewer/dxPtexViewer.cpp Executable file → Normal file
View File

@ -819,7 +819,7 @@ createOsdMesh(int level, int kernel) {
//------------------------------------------------------------------------------
static void
bindProgram(Effect effect, OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch) {
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
@ -984,7 +984,7 @@ drawModel() {
UINT hOffsets = 0;
g_pd3dDeviceContext->IASetVertexBuffers(0, 1, &buffer, &hStrides, &hOffsets);
OpenSubdiv::Osd::D3D11PatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
g_pd3dDeviceContext->IASetIndexBuffer(
@ -993,7 +993,7 @@ drawModel() {
// patch drawing
for (int i = 0; i < (int)patches.size(); ++i) {
OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();

View File

@ -630,7 +630,7 @@ ShaderCache g_shaderCache;
//------------------------------------------------------------------------------
static void
bindProgram(Effect effect, OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch) {
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
typedef OpenSubdiv::Far::PatchDescriptor Descriptor;
@ -822,7 +822,7 @@ display() {
UINT hOffsets = 0;
g_pd3dDeviceContext->IASetVertexBuffers(0, 1, &buffer, &hStrides, &hOffsets);
OpenSubdiv::Osd::D3D11PatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
g_pd3dDeviceContext->IASetIndexBuffer(
@ -834,7 +834,7 @@ display() {
int numDrawCalls = 0;
for (int i=0; i<(int)patches.size(); ++i) {
OpenSubdiv::Osd::D3D11PatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();

631
examples/glEvalLimit/glEvalLimit.cpp Executable file → Normal file
View File

@ -44,15 +44,54 @@ GLFWmonitor* g_primary=0;
#include <osd/cpuEvaluator.h>
#include <osd/cpuVertexBuffer.h>
#include <osd/cpuPatchTable.h>
#include <osd/cpuGLVertexBuffer.h>
#include <osd/mesh.h>
#ifdef OPENSUBDIV_HAS_TBB
#include <osd/tbbEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_OPENMP
#include <osd/ompEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_CUDA
#include <osd/cudaEvaluator.h>
#include <osd/cudaVertexBuffer.h>
#include <osd/cudaGLVertexBuffer.h>
#include <osd/cudaPatchTable.h>
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clVertexBuffer.h>
#include <osd/clGLVertexBuffer.h>
#include <osd/clEvaluator.h>
#include <osd/clPatchTable.h>
#include "../common/clDeviceContext.h"
CLDeviceContext g_clDeviceContext;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
#include <osd/glXFBEvaluator.h>
#include <osd/glVertexBuffer.h>
#include <osd/glPatchTable.h>
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
#include <osd/glComputeEvaluator.h>
#include <osd/glVertexBuffer.h>
#include <osd/glPatchTable.h>
#endif
#include <far/gregoryBasis.h>
#include <far/endCapGregoryBasisPatchFactory.h>
#include <far/topologyRefiner.h>
#include <far/stencilTableFactory.h>
#include <far/patchTableFactory.h>
#include <far/patchMap.h>
#include <far/error.h>
@ -75,32 +114,44 @@ GLFWmonitor* g_primary=0;
using namespace OpenSubdiv;
//------------------------------------------------------------------------------
enum KernelType { kCPU = 0,
kOPENMP = 1,
kTBB = 2,
kCUDA = 3,
kCL = 4,
kGLXFB = 5,
kGLCompute = 6 };
enum EndCap { kEndCapBSplineBasis,
kEndCapGregoryBasis };
enum DrawMode { kUV,
kVARYING,
kNORMAL,
kSHADE,
kFACEVARYING };
std::vector<float> g_orgPositions,
g_positions,
g_varyingColors;
int g_currentShape = 0,
g_level = 3,
g_kernel = kCPU,
g_endCap = kEndCapBSplineBasis,
g_numElements = 3;
std::vector<int> g_coarseEdges;
std::vector<float> g_coarseEdgeSharpness;
std::vector<float> g_coarseVertexSharpness;
enum DrawMode { kRANDOM=0,
kUV,
kVARYING,
kNORMAL,
kSHADE,
kFACEVARYING };
int g_running = 1,
g_width = 1024,
g_height = 1024,
g_fullscreen = 0,
g_drawCageEdges = 1,
g_drawCageVertices = 1,
g_drawMode = kVARYING,
g_drawMode = kUV,
g_prev_x = 0,
g_prev_y = 0,
g_mbutton[3] = {0, 0, 0},
@ -131,11 +182,9 @@ float g_computeTime = 0;
Stopwatch g_fpsTimer;
//------------------------------------------------------------------------------
int g_nparticles=0,
g_nsamples=101,
g_nsamplesFound=0;
int g_nParticles = 65536;
bool g_randomStart=false;
bool g_randomStart = true;//false;
GLuint g_cageEdgeVAO = 0,
g_cageEdgeVBO = 0,
@ -155,6 +204,7 @@ struct Program {
GLuint attrColor;
GLuint attrTangentU;
GLuint attrTangentV;
GLuint attrPatchCoord;
} g_defaultProgram;
//------------------------------------------------------------------------------
@ -204,37 +254,189 @@ createCoarseMesh(OpenSubdiv::Far::TopologyRefiner const & refiner) {
}
//------------------------------------------------------------------------------
Far::TopologyRefiner * g_topologyRefiner = 0;
Far::StencilTable const * g_vertexStencils = NULL;
Far::StencilTable const * g_varyingStencils = NULL;
Far::PatchTable const * g_patchTable = NULL;
Far::PatchMap const * g_patchMap = NULL;
std::vector<Osd::PatchCoord> g_patchCoords;
Osd::VertexBufferDescriptor g_idesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 3),
g_odesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
g_vdesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
g_duDesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
g_dvDesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
g_fvidesc(/*offset*/ 0, /*legnth*/ 2, /*stride*/ 2),
g_fvodesc(/*offset*/ 3, /*legnth*/ 2, /*stride*/ 6);
// input and output vertex data
class EvalOutputBase {
public:
virtual ~EvalOutputBase() {}
virtual GLuint BindVertexData() const = 0;
virtual GLuint BindDerivatives() const = 0;
virtual GLuint BindPatchCoords() const = 0;
virtual void UpdateData(const float *src, int startVertex, int numVertices) = 0;
virtual void UpdateVaryingData(const float *src, int startVertex, int numVertices) = 0;
virtual void Refine() = 0;
virtual void EvalPatches() = 0;
virtual void EvalPatchesWithDerivatives() = 0;
virtual void EvalPatchesVarying() = 0;
virtual void UpdatePatchCoords(
std::vector<Osd::PatchCoord> const &patchCoords) = 0;
};
// input vertex data (coarse + refined)
Osd::CpuVertexBuffer * g_vertexData = 0;
Osd::CpuVertexBuffer * g_varyingData = 0;
// note: Since we don't have a class for device-patchcoord container in osd,
// we cheat to use vertexbuffer as a patch-coord (5int) container.
//
// Please don't follow the pattern in your actual application.
//
template<typename SRC_VERTEX_BUFFER, typename EVAL_VERTEX_BUFFER,
typename STENCIL_TABLE, typename PATCH_TABLE, typename EVALUATOR,
typename DEVICE_CONTEXT = void>
class EvalOutput : public EvalOutputBase {
public:
typedef OpenSubdiv::Osd::EvaluatorCacheT<EVALUATOR> EvaluatorCache;
// output vertex data (limit locations)
Osd::CpuGLVertexBuffer * g_outVertexData = NULL;
Osd::CpuGLVertexBuffer * g_outDerivatives = NULL;
EvalOutput(Far::StencilTable const *vertexStencils,
Far::StencilTable const *varyingStencils,
int numCoarseVerts, int numTotalVerts, int numParticles,
Far::PatchTable const *patchTable,
EvaluatorCache *evaluatorCache = NULL,
DEVICE_CONTEXT *deviceContext = NULL)
: _srcDesc( /*offset*/ 0, /*length*/ 3, /*stride*/ 3),
_srcVaryingDesc(/*offset*/ 0, /*length*/ 3, /*stride*/ 3),
_vertexDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
_varyingDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
_duDesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
_dvDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
_deviceContext(deviceContext) {
_srcData = SRC_VERTEX_BUFFER::Create(3, numTotalVerts, _deviceContext);
_srcVaryingData = SRC_VERTEX_BUFFER::Create(3, numTotalVerts, _deviceContext);
_vertexData = EVAL_VERTEX_BUFFER::Create(6, numParticles, _deviceContext);
_derivatives = EVAL_VERTEX_BUFFER::Create(6, numParticles, _deviceContext);
_patchTable = PATCH_TABLE::Create(patchTable, _deviceContext);
_patchCoords = NULL;
_numCoarseVerts = numCoarseVerts;
_vertexStencils =
Osd::convertToCompatibleStencilTable<STENCIL_TABLE>(vertexStencils, _deviceContext);
_varyingStencils =
Osd::convertToCompatibleStencilTable<STENCIL_TABLE>(varyingStencils, _deviceContext);
_evaluatorCache = evaluatorCache;
}
~EvalOutput() {
delete _srcData;
delete _srcVaryingData;
delete _vertexData;
delete _derivatives;
delete _patchTable;
delete _patchCoords;
}
virtual GLuint BindVertexData() const {
return _vertexData->BindVBO();
}
virtual GLuint BindDerivatives() const {
return _derivatives->BindVBO();
}
virtual GLuint BindPatchCoords() const {
return _patchCoords->BindVBO();
}
virtual void UpdateData(const float *src, int startVertex, int numVertices) {
_srcData->UpdateData(src, startVertex, numVertices, _deviceContext);
}
virtual void UpdateVaryingData(const float *src, int startVertex, int numVertices) {
_srcVaryingData->UpdateData(src, startVertex, numVertices, _deviceContext);
}
virtual void Refine() {
Osd::VertexBufferDescriptor dstDesc = _srcDesc;
dstDesc.offset += _numCoarseVerts * _srcDesc.stride;
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
_evaluatorCache, _srcDesc, dstDesc, _deviceContext);
EVALUATOR::EvalStencils(_srcData, _srcDesc,
_srcData, dstDesc,
_vertexStencils,
evalInstance,
_deviceContext);
dstDesc = _srcVaryingDesc;
dstDesc.offset += _numCoarseVerts * _srcVaryingDesc.stride;
evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
_evaluatorCache, _srcVaryingDesc, dstDesc, _deviceContext);
EVALUATOR::EvalStencils(_srcVaryingData, _srcVaryingDesc,
_srcVaryingData, dstDesc,
_varyingStencils,
evalInstance,
_deviceContext);
}
virtual void EvalPatches() {
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
_evaluatorCache, _srcDesc, _vertexDesc, _deviceContext);
EVALUATOR::EvalPatches(
_srcData, _srcDesc,
_vertexData, _vertexDesc,
_patchCoords->GetNumVertices(),
_patchCoords,
_patchTable, evalInstance, _deviceContext);
}
virtual void EvalPatchesWithDerivatives() {
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
_evaluatorCache, _srcDesc, _vertexDesc, _deviceContext);
EVALUATOR::EvalPatches(
_srcData, _srcDesc,
_vertexData, _vertexDesc,
_derivatives, _duDesc,
_derivatives, _dvDesc,
_patchCoords->GetNumVertices(),
_patchCoords,
_patchTable, evalInstance, _deviceContext);
}
virtual void EvalPatchesVarying() {
EVALUATOR const *evalInstance = OpenSubdiv::Osd::GetEvaluator<EVALUATOR>(
_evaluatorCache, _srcVaryingDesc, _varyingDesc, _deviceContext);
EVALUATOR::EvalPatches(
_srcVaryingData, _srcVaryingDesc,
// varyingdata is interleved in vertexData.
_vertexData, _varyingDesc,
_patchCoords->GetNumVertices(),
_patchCoords,
_patchTable, evalInstance, _deviceContext);
}
virtual void UpdatePatchCoords(
std::vector<Osd::PatchCoord> const &patchCoords) {
if (_patchCoords and
_patchCoords->GetNumVertices() != (int)patchCoords.size()) {
delete _patchCoords;
_patchCoords = NULL;
}
if (not _patchCoords) {
_patchCoords = EVAL_VERTEX_BUFFER::Create(5,
(int)patchCoords.size(),
_deviceContext);
}
_patchCoords->UpdateData((float*)&patchCoords[0], 0, (int)patchCoords.size(), _deviceContext);
}
private:
SRC_VERTEX_BUFFER *_srcData;
SRC_VERTEX_BUFFER *_srcVaryingData;
EVAL_VERTEX_BUFFER *_vertexData;
EVAL_VERTEX_BUFFER *_derivatives;
EVAL_VERTEX_BUFFER *_varyingData;
EVAL_VERTEX_BUFFER *_patchCoords;
PATCH_TABLE *_patchTable;
Osd::VertexBufferDescriptor _srcDesc;
Osd::VertexBufferDescriptor _srcVaryingDesc;
Osd::VertexBufferDescriptor _vertexDesc;
Osd::VertexBufferDescriptor _varyingDesc;
Osd::VertexBufferDescriptor _duDesc;
Osd::VertexBufferDescriptor _dvDesc;
int _numCoarseVerts;
STENCIL_TABLE const *_vertexStencils;
STENCIL_TABLE const *_varyingStencils;
EvaluatorCache *_evaluatorCache;
DEVICE_CONTEXT *_deviceContext;
};
EvalOutputBase *g_evalOutput = NULL;
STParticles * g_particles=0;
//------------------------------------------------------------------------------
static void
updateGeom() {
int nverts = (int)g_orgPositions.size() / 3;
const float *p = &g_orgPositions[0];
@ -255,28 +457,18 @@ updateGeom() {
Stopwatch s;
s.Start();
g_vertexData->UpdateData( &g_positions[0], 0, nverts);
// update coarse vertices
g_evalOutput->UpdateData(&g_positions[0], 0, nverts);
if (! g_topologyRefiner) return;
// update coarse varying
if (g_drawMode == kVARYING) {
g_evalOutput->UpdateVaryingData(&g_varyingColors[0], 0, nverts);
// note that for patch eval we need coarse+refined combined buffer.
int nCoarseVertices = g_topologyRefiner->GetLevel(0).GetNumVertices();
Osd::CpuEvaluator::EvalStencils(g_vertexData,
Osd::VertexBufferDescriptor(0, 3, 3),
g_vertexData,
Osd::VertexBufferDescriptor(
nCoarseVertices*3, 3, 3),
g_vertexStencils);
if (g_varyingData) {
Osd::CpuEvaluator::EvalStencils(g_varyingData,
Osd::VertexBufferDescriptor(0, 3, 3),
g_varyingData,
Osd::VertexBufferDescriptor(
nCoarseVertices*3, 3, 3),
g_varyingStencils);
}
// Refine
g_evalOutput->Refine();
s.Stop();
g_computeTime = float(s.GetElapsed() * 1000.0f);
@ -287,60 +479,28 @@ updateGeom() {
// Apply 'dynamics' update
assert(g_particles);
g_particles->Update(g_evalTime); // XXXX g_evalTime is not really elapsed time...
std::vector<OpenSubdiv::Osd::PatchCoord> const &patchCoords
= g_particles->GetPatchCoords();
// resolve particle positions into patch handles
// XXX: this process should be handled by OsdKernel in parallel
g_patchCoords.clear();
for (int i = 0; i < g_particles->GetNumParticles(); ++i) {
STParticles::Position const &position = g_particles->GetPositions()[i];
Far::PatchTable::PatchHandle const *handle =
g_patchMap->FindPatch(position.ptexIndex, position.s, position.t);
if (handle) {
g_patchCoords.push_back(Osd::PatchCoord(
*handle, position.s, position.t));
}
}
// update patchcoord to be evaluated
g_evalOutput->UpdatePatchCoords(patchCoords);
// Evaluate the positions of the samples on the limit surface
if (g_drawMode == kNORMAL || g_drawMode == kSHADE) {
// evaluate positions and derivatives
g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(
g_vertexData, g_idesc,
g_outVertexData, g_odesc,
g_outDerivatives, g_duDesc,
g_outDerivatives, g_dvDesc,
(int)g_patchCoords.size(),
&g_patchCoords[0],
g_patchTable, NULL);
g_evalOutput->EvalPatchesWithDerivatives();
} else {
// evaluate positions
g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(
g_vertexData, g_idesc,
g_outVertexData, g_odesc,
(int)g_patchCoords.size(),
&g_patchCoords[0],
g_patchTable, NULL);
g_evalOutput->EvalPatches();
}
// color
if (g_drawMode == kUV) {
// store patchCoords as colors
float *p = g_outVertexData->BindCpuBuffer() + g_vdesc.offset;
for (int i = 0; i < (int)g_patchCoords.size(); ++i) {
p[0] = g_patchCoords[i].s;
p[1] = g_patchCoords[i].t;
p[2] = 0;
p += g_vdesc.stride;
}
} else if (g_drawMode == kVARYING) {
if (g_drawMode == kVARYING) {
// XXX: is this really varying?
Osd::CpuEvaluator::EvalPatches(g_varyingData, g_idesc,
g_outVertexData, g_vdesc,
(int)g_patchCoords.size(),
&g_patchCoords[0],
g_patchTable, NULL);
g_evalOutput->EvalPatchesVarying();
}
s.Stop();
@ -359,8 +519,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape);
OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape);
delete g_topologyRefiner;
g_topologyRefiner =
Far::TopologyRefiner *topologyRefiner =
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Create(*shape,
OpenSubdiv::Far::TopologyRefinerFactory<Shape>::Options(sdctype, sdcoptions));
@ -371,22 +530,16 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
float speed = g_particles ? g_particles->GetSpeed() : 0.2f;
// Create the 'uv particles' manager - this class manages the limit
// location samples (ptex face index, (s,t) and updates them between frames.
// Note: the number of limit locations can be entirely arbitrary
delete g_particles;
g_particles = new STParticles(*g_topologyRefiner, g_nsamples, !g_randomStart);
g_nparticles = g_particles->GetNumParticles();
g_particles->SetSpeed(speed);
createCoarseMesh(*g_topologyRefiner);
createCoarseMesh(*topologyRefiner);
Far::StencilTable const * vertexStencils = NULL;
Far::StencilTable const * varyingStencils = NULL;
int nverts=0;
{
// Apply feature adaptive refinement to the mesh so that we can use the
// limit evaluation API features.
Far::TopologyRefiner::AdaptiveOptions options(level);
g_topologyRefiner->RefineAdaptive(options);
topologyRefiner->RefineAdaptive(options);
// Generate stencil table to update the bi-cubic patches control
// vertices after they have been re-posed (both for vertex & varying
@ -395,27 +548,33 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
soptions.generateOffsets=true;
soptions.generateIntermediateLevels=true;
Far::StencilTable const * vertexStencils =
Far::StencilTableFactory::Create(*g_topologyRefiner, soptions);
vertexStencils =
Far::StencilTableFactory::Create(*topologyRefiner, soptions);
soptions.interpolationMode = Far::StencilTableFactory::INTERPOLATE_VARYING;
Far::StencilTable const * varyingStencils =
Far::StencilTableFactory::Create(*g_topologyRefiner, soptions);
varyingStencils =
Far::StencilTableFactory::Create(*topologyRefiner, soptions);
// Generate bi-cubic patch table for the limit surface
Far::PatchTableFactory::Options poptions;
poptions.SetEndCapType(
Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS);
if (g_endCap == kEndCapBSplineBasis) {
poptions.SetEndCapType(
Far::PatchTableFactory::Options::ENDCAP_BSPLINE_BASIS);
} else {
poptions.SetEndCapType(
Far::PatchTableFactory::Options::ENDCAP_GREGORY_BASIS);
}
Far::PatchTable const * patchTable =
Far::PatchTableFactory::Create(*g_topologyRefiner, poptions);
Far::PatchTableFactory::Create(*topologyRefiner, poptions);
// append endcap stencils
if (Far::StencilTable const *endCapVertexStencilTable =
patchTable->GetEndCapVertexStencilTable()) {
Far::StencilTable const *table =
Far::StencilTableFactory::AppendEndCapStencilTable(
*g_topologyRefiner,
*topologyRefiner,
vertexStencils, endCapVertexStencilTable);
delete vertexStencils;
vertexStencils = table;
@ -424,7 +583,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
patchTable->GetEndCapVaryingStencilTable()) {
Far::StencilTable const *table =
Far::StencilTableFactory::AppendEndCapStencilTable(
*g_topologyRefiner,
*topologyRefiner,
varyingStencils, endCapVaryingStencilTable);
delete varyingStencils;
varyingStencils = table;
@ -434,47 +593,103 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
nverts = vertexStencils->GetNumControlVertices() +
vertexStencils->GetNumStencils();
if (g_vertexStencils) delete g_vertexStencils;
g_vertexStencils = vertexStencils;
if (g_varyingStencils) delete g_varyingStencils;
g_varyingStencils = varyingStencils;
if (g_patchTable) delete g_patchTable;
g_patchTable = patchTable;
// Create a far patch map
if (g_patchMap) delete g_patchMap;
g_patchMap = new Far::PatchMap(*g_patchTable);
}
{ // Create vertex primvar buffer for the CVs
delete g_vertexData;
g_vertexData = Osd::CpuVertexBuffer::Create(3, nverts);
// note that for patch eval we need coarse+refined combined buffer.
int nCoarseVertices = topologyRefiner->GetLevel(0).GetNumVertices();
// Create varying primvar buffer for the CVs with random colors.
// These are immediately interpolated (once) and saved for display.
delete g_varyingData; g_varyingData = 0;
if (g_drawMode==kVARYING) {
g_varyingData = Osd::CpuVertexBuffer::Create(3, nverts);
g_varyingData->UpdateData(
&g_varyingColors[0], 0, (int)g_varyingColors.size()/3 );
}
// Create output buffers for the limit samples (position & tangents)
delete g_outVertexData;
g_outVertexData = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
memset(g_outVertexData->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
if (g_drawMode==kRANDOM) {
createRandomColors(g_nparticles, 6, g_outVertexData->BindCpuBuffer()+3);
}
delete g_outDerivatives;
g_outDerivatives = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
memset(g_outDerivatives->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
delete g_evalOutput;
if (g_kernel == kCPU) {
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
Osd::CpuGLVertexBuffer,
Far::StencilTable,
Osd::CpuPatchTable,
Osd::CpuEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable);
#ifdef OPENSUBDIV_HAS_OPENMP
} else if (g_kernel == kOPENMP) {
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
Osd::CpuGLVertexBuffer,
Far::StencilTable,
Osd::CpuPatchTable,
Osd::OmpEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable);
#endif
#ifdef OPENSUBDIV_HAS_TBB
} else if (g_kernel == kTBB) {
g_evalOutput = new EvalOutput<Osd::CpuVertexBuffer,
Osd::CpuGLVertexBuffer,
Far::StencilTable,
Osd::CpuPatchTable,
Osd::TbbEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if (g_kernel == kCUDA) {
g_evalOutput = new EvalOutput<Osd::CudaVertexBuffer,
Osd::CudaGLVertexBuffer,
Osd::CudaStencilTable,
Osd::CudaPatchTable,
Osd::CudaEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
} else if (g_kernel == kCL) {
static Osd::EvaluatorCacheT<Osd::CLEvaluator> clEvaluatorCache;
g_evalOutput = new EvalOutput<Osd::CLVertexBuffer,
Osd::CLGLVertexBuffer,
Osd::CLStencilTable,
Osd::CLPatchTable,
Osd::CLEvaluator,
CLDeviceContext>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable,
&clEvaluatorCache, &g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
} else if (g_kernel == kGLXFB) {
static Osd::EvaluatorCacheT<Osd::GLXFBEvaluator> glXFBEvaluatorCache;
g_evalOutput = new EvalOutput<Osd::GLVertexBuffer,
Osd::GLVertexBuffer,
Osd::GLStencilTableTBO,
Osd::GLPatchTable,
Osd::GLXFBEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable,
&glXFBEvaluatorCache);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
} else if (g_kernel == kGLCompute) {
static Osd::EvaluatorCacheT<Osd::GLComputeEvaluator> glComputeEvaluatorCache;
g_evalOutput = new EvalOutput<Osd::GLVertexBuffer,
Osd::GLVertexBuffer,
Osd::GLStencilTableSSBO,
Osd::GLPatchTable,
Osd::GLComputeEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable,
&glComputeEvaluatorCache);
#endif
}
// Create the 'uv particles' manager - this class manages the limit
// location samples (ptex face index, (s,t) and updates them between frames.
// Note: the number of limit locations can be entirely arbitrary
delete g_particles;
g_particles = new STParticles(*topologyRefiner, g_patchTable,
g_nParticles, !g_randomStart);
g_nParticles = g_particles->GetNumParticles();
g_particles->SetSpeed(speed);
updateGeom();
delete topologyRefiner;
}
//------------------------------------------------------------------------------
@ -505,32 +720,33 @@ linkDefaultProgram() {
"in vec3 color;\n"
"in vec3 tangentU;\n"
"in vec3 tangentV;\n"
"in vec2 patchCoord;\n"
"out vec4 fragColor;\n"
"out vec3 normal;\n"
"uniform mat4 ModelViewMatrix;\n"
"uniform mat4 ProjectionMatrix;\n"
"uniform int DrawMode;\n"
"void main() {\n"
" fragColor = vec4(color, 1);\n"
// XXX: fix the normal transform
" normal = (ModelViewMatrix * vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n"
" vec3 normal = (ModelViewMatrix * "
" vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n"
" gl_Position = ProjectionMatrix * ModelViewMatrix * "
" vec4(position, 1);\n"
" if (DrawMode == 0) {\n" // UV
" fragColor = vec4(patchCoord.x, patchCoord.y, 0, 1);\n"
" } else if (DrawMode == 2) {\n"
" fragColor = vec4(normal*0.5+vec3(0.5), 1);\n"
" } else if (DrawMode == 3) {\n"
" fragColor = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n"
" } else {\n" // varying
" fragColor = vec4(color, 1);\n"
" }\n"
"}\n";
static const char *fsSrc =
GLSL_VERSION_DEFINE
"in vec4 fragColor;\n"
"in vec3 normal;\n"
"uniform int DrawMode;\n"
"out vec4 color;\n"
"void main() {\n"
" if (DrawMode == 3) {\n"
" color = vec4(normal*0.5+vec3(0.5), 1);\n"
" } else if (DrawMode == 4) {\n"
" color = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n"
" } else {\n"
" color = fragColor;\n"
" }\n"
" color = fragColor;\n"
"}\n";
GLuint program = glCreateProgram();
@ -544,6 +760,7 @@ linkDefaultProgram() {
glBindAttribLocation(program, 1, "color");
glBindAttribLocation(program, 2, "tangentU");
glBindAttribLocation(program, 3, "tangentV");
glBindAttribLocation(program, 4, "patchCoord");
glBindFragDataLocation(program, 0, "color");
glLinkProgram(program);
@ -571,6 +788,7 @@ linkDefaultProgram() {
g_defaultProgram.attrColor = glGetAttribLocation(program, "color");
g_defaultProgram.attrTangentU = glGetAttribLocation(program, "tangentU");
g_defaultProgram.attrTangentV = glGetAttribLocation(program, "tangentV");
g_defaultProgram.attrPatchCoord = glGetAttribLocation(program, "patchCoord");
return true;
}
@ -621,6 +839,7 @@ drawCageEdges() {
glEnableVertexAttribArray(g_defaultProgram.attrColor);
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
glVertexAttribPointer(g_defaultProgram.attrPosition,
3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
glVertexAttribPointer(g_defaultProgram.attrColor,
@ -680,6 +899,7 @@ drawCageVertices() {
glEnableVertexAttribArray(g_defaultProgram.attrColor);
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
glVertexAttribPointer(g_defaultProgram.attrPosition,
3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
glVertexAttribPointer(g_defaultProgram.attrColor,
@ -711,27 +931,33 @@ drawSamples() {
glEnableVertexAttribArray(g_defaultProgram.attrTangentU);
glEnableVertexAttribArray(g_defaultProgram.attrTangentV);
glBindBuffer(GL_ARRAY_BUFFER, g_outVertexData->BindVBO());
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindVertexData());
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
glBindBuffer(GL_ARRAY_BUFFER, g_outDerivatives->BindVBO());
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindDerivatives());
glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
glVertexAttribPointer(3, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
glBindBuffer(GL_ARRAY_BUFFER, g_evalOutput->BindPatchCoords());
glVertexAttribPointer(4, 2, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 5, (float*)12);
glEnableVertexAttribArray(g_defaultProgram.attrPosition);
glEnableVertexAttribArray(g_defaultProgram.attrColor);
glEnableVertexAttribArray(g_defaultProgram.attrTangentU);
glEnableVertexAttribArray(g_defaultProgram.attrTangentV);
glEnableVertexAttribArray(g_defaultProgram.attrPatchCoord);
glPointSize(2.0f);
glDrawArrays(GL_POINTS, 0, g_nparticles);
int nPatchCoords = (int)g_particles->GetPatchCoords().size();
glDrawArrays(GL_POINTS, 0, nPatchCoords);
glPointSize(1.0f);
glDisableVertexAttribArray(g_defaultProgram.attrPosition);
glDisableVertexAttribArray(g_defaultProgram.attrColor);
glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
glDisableVertexAttribArray(g_defaultProgram.attrPatchCoord);
glBindVertexArray(0);
@ -789,9 +1015,10 @@ display() {
double fps = 1.0/g_fpsTimer.GetElapsed();
g_fpsTimer.Start();
int nPatchCoords = (int)g_particles->GetPatchCoords().size();
g_hud.DrawString(10, -150, "Particle Speed ([) (]): %.1f", g_particles->GetSpeed());
g_hud.DrawString(10, -120, "# Samples : (%d/%d)",
g_nsamplesFound, g_outVertexData->GetNumVertices());
g_hud.DrawString(10, -120, "# Samples : (%d / %d)", nPatchCoords, g_nParticles);
g_hud.DrawString(10, -100, "Compute : %.3f ms", g_computeTime);
g_hud.DrawString(10, -80, "Eval : %.3f ms", g_evalTime * 1000.f);
g_hud.DrawString(10, -60, "GPU Draw : %.3f ms", drawGpuTime);
@ -884,9 +1111,11 @@ void windowClose(GLFWwindow*) {
//------------------------------------------------------------------------------
static void
setSamples(bool add) {
g_nsamples += add ? 50 : -50;
g_nsamples = std::max(0, g_nsamples);
if (add) {
g_nParticles = g_nParticles * 2;
} else {
g_nParticles = std::max(1, g_nParticles / 2);
}
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
}
@ -936,6 +1165,40 @@ callbackModel(int m) {
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
}
//------------------------------------------------------------------------------
static void
callbackEndCap(int endCap) {
g_endCap = endCap;
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
}
//------------------------------------------------------------------------------
static void
callbackKernel(int k) {
g_kernel = k;
#ifdef OPENSUBDIV_HAS_OPENCL
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
if (g_clDeviceContext.Initialize() == false) {
printf("Error in initializing OpenCL\n");
exit(1);
}
}
#endif
#ifdef OPENSUBDIV_HAS_CUDA
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
if (g_cudaDeviceContext.Initialize() == false) {
printf("Error in initializing Cuda\n");
exit(1);
}
}
#endif
createOsdMesh(g_defaultShapes[g_currentShape], g_level);
}
//------------------------------------------------------------------------------
static void
callbackLevel(int l) {
@ -1001,10 +1264,40 @@ initHUD() {
g_hud.AddCheckBox("Animate vertices (M)", g_moveScale != 0, 10, 50, callbackAnimate, 0, 'm');
g_hud.AddCheckBox("Freeze (spc)", false, 10, 70, callbackFreeze, 0, ' ');
g_hud.AddCheckBox("Random Start", false, 10, 120, callbackCentered, g_randomStart);
g_hud.AddCheckBox("Random Start", g_randomStart, 10, 120, callbackCentered, 0);
int compute_pulldown = g_hud.AddPullDown("Compute (K)", 475, 10, 300,
callbackKernel, 'k');
g_hud.AddPullDownButton(compute_pulldown, "CPU", kCPU);
#ifdef OPENSUBDIV_HAS_OPENMP
g_hud.AddPullDownButton(compute_pulldown, "OPENMP", kOPENMP);
#endif
#ifdef OPENSUBDIV_HAS_TBB
g_hud.AddPullDownButton(compute_pulldown, "TBB", kTBB);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
g_hud.AddPullDownButton(compute_pulldown, "GL XFB", kGLXFB);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
g_hud.AddPullDownButton(compute_pulldown, "GL Compute", kGLCompute);
#endif
int endcap_pulldown = g_hud.AddPullDown("End cap (E)", 10, 140, 200,
callbackEndCap, 'e');
g_hud.AddPullDownButton(endcap_pulldown, "BSpline",
kEndCapBSplineBasis,
g_endCap == kEndCapBSplineBasis);
g_hud.AddPullDownButton(endcap_pulldown, "GregoryBasis",
kEndCapGregoryBasis,
g_endCap == kEndCapGregoryBasis);
int shading_pulldown = g_hud.AddPullDown("Shading (W)", 250, 10, 250, callbackDisplayVaryingColors, 'w');
g_hud.AddPullDownButton(shading_pulldown, "Random", kRANDOM, g_drawMode==kRANDOM);
g_hud.AddPullDownButton(shading_pulldown, "(u,v)", kUV, g_drawMode==kUV);
g_hud.AddPullDownButton(shading_pulldown, "Varying", kVARYING, g_drawMode==kVARYING);
g_hud.AddPullDownButton(shading_pulldown, "Normal", kNORMAL, g_drawMode==kNORMAL);

View File

@ -25,38 +25,104 @@
#include "particles.h"
#include <far/ptexIndices.h>
#include <far/patchMap.h>
#ifdef OPENSUBDIV_HAS_TBB
#include <tbb/parallel_for.h>
#include <tbb/atomic.h>
tbb::atomic<int> g_tbbCounter;
class TbbUpdateKernel {
public:
TbbUpdateKernel(float speed,
STParticles::Position *positions,
float *velocities,
std::vector<STParticles::FaceInfo> const &adjacency,
OpenSubdiv::Osd::PatchCoord *patchCoords,
OpenSubdiv::Far::PatchMap const *patchMap) :
_speed(speed), _positions(positions), _velocities(velocities),
_adjacency(adjacency), _patchCoords(patchCoords), _patchMap(patchMap) {
}
void operator () (tbb::blocked_range<int> const &r) const {
for (int i = r.begin(); i < r.end(); ++i) {
STParticles::Position * p = _positions + i;
float *dp = _velocities + i*2;
// apply velocity
p->s += dp[0] * _speed;
p->t += dp[1] * _speed;
// make sure particles can't skip more than 1 face boundary at a time
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
// check if the particle is jumping a boundary
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
// this is not treated here.
int edge = -1;
if (p->s >= 1.0f) edge = 1;
if (p->s <= 0.0f) edge = 3;
if (p->t >= 1.0f) edge = 2;
if (p->t <= 0.0f) edge = 0;
if (edge>=0) {
// warp the particle to the other side of the boundary
STParticles::WarpParticle(_adjacency, edge, p, dp);
}
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
// resolve particle positions into patch handles
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
if (handle) {
int index = g_tbbCounter.fetch_and_add(1);
_patchCoords[index] =
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t);
}
}
}
private:
float _speed;
STParticles::Position *_positions;
float *_velocities;
std::vector<STParticles::FaceInfo> const &_adjacency;
OpenSubdiv::Osd::PatchCoord *_patchCoords;
OpenSubdiv::Far::PatchMap const *_patchMap;
};
#endif
#include <cassert>
STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered) :
STParticles::STParticles(Refiner const & refiner,
PatchTable const *patchTable,
int nParticles, bool centered) :
_speed(1.0f) {
OpenSubdiv::Far::PtexIndices ptexIndices(refiner);
int nptexfaces = ptexIndices.GetNumFaces(),
nsamples = nptexfaces * nparticles;
// Create a far patch map
_patchMap = new OpenSubdiv::Far::PatchMap(*patchTable);
int nPtexFaces = ptexIndices.GetNumFaces();
srand(static_cast<int>(2147483647));
{ // initialize positions
_positions.resize(nsamples);
_positions.resize(nParticles);
Position * pos = &_positions[0];
for (int i=0; i<nptexfaces; ++i) {
for (int j=0; j<nparticles; ++j) {
pos->ptexIndex = i;
pos->s = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
pos->t = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
++pos;
}
for (int i = 0; i < nParticles; ++i) {
pos->ptexIndex = (int)(((float)rand()/(float)RAND_MAX) * nPtexFaces);
pos->s = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
pos->t = centered ? 0.5f : (float)rand()/(float)RAND_MAX;
++pos;
}
}
{ // initialize velocities
_velocities.resize(nsamples*2);
_velocities.resize(nParticles * 2);
for (int i=0; i<nsamples; ++i) {
for (int i = 0; i < nParticles; ++i) {
// initialize normalized random directions
float s = 2.0f*(float)rand()/(float)RAND_MAX - 1.0f,
t = 2.0f*(float)rand()/(float)RAND_MAX - 1.0f,
@ -68,7 +134,7 @@ STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered)
}
{ // initialize topology adjacency
_adjacency.resize(nptexfaces);
_adjacency.resize(nPtexFaces);
OpenSubdiv::Far::TopologyLevel const & refBaseLevel = refiner.GetLevel(0);
@ -97,42 +163,6 @@ STParticles::STParticles(Refiner const & refiner, int nparticles, bool centered)
//std::cout << *this;
}
void
STParticles::Update(float deltaTime) {
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
Position * p = &_positions[0];
float * dp = &_velocities[0];
for (int i=0; i<GetNumParticles(); ++i, ++p, dp+=2) {
// apply velocity
p->s += dp[0] * speed;
p->t += dp[1] * speed;
// make sure particles can't skip more than 1 face boundary at a time
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
// check if the particle is jumping a boundary
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
// this is not treated here.
int edge = -1;
if (p->s >= 1.0f) edge = 1;
if (p->s <= 0.0f) edge = 3;
if (p->t >= 1.0f) edge = 2;
if (p->t <= 0.0f) edge = 0;
if (edge>=0) {
// warp the particle to the other side of the boundary
warpParticle(edge, p, dp);
}
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
}
inline void
FlipS(STParticles::Position * p, float * dp) {
p->s = 1.0f-p->s;
@ -163,6 +193,15 @@ Rotate(int rot, STParticles::Position * p, float * dp) {
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
inline void
Trim(STParticles::Position * p) {
if (p->s <0.0f) p->s = 1.0f + p->s;
if (p->s>=1.0f) p->s = p->s - 1.0f;
if (p->t <0.0f) p->t = 1.0f + p->t;
if (p->t>=1.0f) p->t = p->t - 1.0f;
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
inline void
Clamp(STParticles::Position * p) {
if (p->s<0.0f) {
@ -192,21 +231,12 @@ Bounce(int edge, STParticles::Position * p, float * dp) {
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
inline void
Trim(STParticles::Position * p) {
if (p->s <0.0f) p->s = 1.0f + p->s;
if (p->s>=1.0f) p->s = p->s - 1.0f;
if (p->t <0.0f) p->t = 1.0f + p->t;
if (p->t>=1.0f) p->t = p->t - 1.0f;
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
void
STParticles::warpParticle(int edge, Position * p, float * dp) {
assert(p->ptexIndex<(int)_adjacency.size() and (edge>=0 and edge<4));
STParticles::WarpParticle(std::vector<FaceInfo> const &adjacency,
int edge, Position * p, float * dp) {
assert(p->ptexIndex<(int)adjacency.size() and (edge>=0 and edge<4));
FaceInfo const & f = _adjacency[p->ptexIndex];
FaceInfo const & f = adjacency[p->ptexIndex];
int afid = f.adjface(edge),
aeid = f.adjedge(edge);
@ -215,7 +245,7 @@ STParticles::warpParticle(int edge, Position * p, float * dp) {
// boundary detected: bounce the particle
Bounce(edge, p, dp);
} else {
FaceInfo const & af = _adjacency[afid];
FaceInfo const & af = adjacency[afid];
int rot = edge - aeid + 2;
bool fIsSubface = f.isSubface(),
@ -233,6 +263,66 @@ STParticles::warpParticle(int edge, Position * p, float * dp) {
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
}
STParticles::~STParticles() {
delete _patchMap;
}
void
STParticles::Update(float deltaTime) {
if (fabs(GetSpeed()) < 0.001f) return;
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
_patchCoords.clear();
// XXX: this process should be parallelized.
#ifdef OPENSUBDIV_HAS_TBB
_patchCoords.resize((int)GetNumParticles());
TbbUpdateKernel kernel(speed, &_positions[0], &_velocities[0],
_adjacency, &_patchCoords[0], _patchMap);;
g_tbbCounter = 0;
tbb::blocked_range<int> range(0, GetNumParticles(), 256);
tbb::parallel_for(range, kernel);
_patchCoords.resize(g_tbbCounter);
#else
Position * p = &_positions[0];
float * dp = &_velocities[0];
for (int i=0; i<GetNumParticles(); ++i, ++p, dp+=2) {
// apply velocity
p->s += dp[0] * speed;
p->t += dp[1] * speed;
// make sure particles can't skip more than 1 face boundary at a time
assert((p->s>-2.0f) and (p->s<2.0f) and (p->t>-2.0f) and (p->t<2.0f));
// check if the particle is jumping a boundary
// note: a particle can jump 2 edges at a time (a "diagonal" jump)
// this is not treated here.
int edge = -1;
if (p->s >= 1.0f) edge = 1;
if (p->s <= 0.0f) edge = 3;
if (p->t >= 1.0f) edge = 2;
if (p->t <= 0.0f) edge = 0;
if (edge>=0) {
// warp the particle to the other side of the boundary
WarpParticle(_adjacency, edge, p, dp);
}
assert((p->s>=0.0f) and (p->s<=1.0f) and (p->t>=0.0f) and (p->t<=1.0f));
// resolve particle positions into patch handles
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
if (handle) {
_patchCoords.push_back(
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t));
}
}
#endif
}
// Dump adjacency info
std::ostream & operator << (std::ostream & os,
STParticles::FaceInfo const & f) {

View File

@ -26,7 +26,8 @@
#define ST_PARTICLES_H
#include <far/topologyRefiner.h>
#include <far/patchMap.h>
#include <osd/types.h>
#include <iostream>
//
@ -72,47 +73,6 @@ public:
float s, t; ///< parametric location on face
};
typedef OpenSubdiv::Far::TopologyRefiner Refiner;
STParticles(Refiner const & refiner, int nparticles, bool centered=false);
void Update(float deltaTime);
int GetNumParticles() const {
return (int)_positions.size();
}
void SetSpeed(float speed) {
_speed = std::max(-1.0f, std::min(1.0f, speed));
}
float GetSpeed() const {
return _speed;
}
std::vector<Position> & GetPositions() {
return _positions;
}
std::vector<float> & GetVelocities() {
return _velocities;
}
friend std::ostream & operator << (std::ostream & os, STParticles const & f);
private:
//
// Particle "Dynamics"
//
std::vector<Position> _positions;
std::vector<float> _velocities;
float _speed; // velocity multiplier
private:
//
// Topology adjacency (borrowed from Ptexture.h)
//
@ -152,11 +112,63 @@ private:
int adjfaces[4];
};
void warpParticle(int edge, Position * p, float * dp);
typedef OpenSubdiv::Far::TopologyRefiner Refiner;
typedef OpenSubdiv::Far::PatchTable PatchTable;
STParticles(Refiner const & refiner, PatchTable const *patchTable,
int nparticles, bool centered=false);
~STParticles();
void Update(float deltaTime);
int GetNumParticles() const {
return (int)_positions.size();
}
void SetSpeed(float speed) {
_speed = std::max(-1.0f, std::min(1.0f, speed));
}
float GetSpeed() const {
return _speed;
}
std::vector<Position> & GetPositions() {
return _positions;
}
std::vector<float> & GetVelocities() {
return _velocities;
}
std::vector<OpenSubdiv::Osd::PatchCoord> GetPatchCoords() const {
return _patchCoords;
}
friend std::ostream & operator << (std::ostream & os, STParticles const & f);
static void WarpParticle(std::vector<FaceInfo> const &adjacency,
int edge, Position * p, float * dp);
private:
//
// Particle "Dynamics"
//
std::vector<Position> _positions;
std::vector<float> _velocities;
std::vector<OpenSubdiv::Osd::PatchCoord> _patchCoords;
float _speed; // velocity multiplier
friend std::ostream & operator << (std::ostream & os, FaceInfo const & f);
std::vector<FaceInfo> _adjacency;
OpenSubdiv::Far::PatchMap const *_patchMap;
};
#endif // ST_PARTICLES_H

View File

@ -786,7 +786,7 @@ bindTextures() {
}
static GLenum
bindProgram(Effect effect, OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
@ -868,7 +868,7 @@ display() {
glBindVertexArray(g_vao);
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
if (g_displayStyle == kWire)
@ -879,7 +879,7 @@ display() {
// patch drawing
for (int i = 0; i < (int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
GLenum primType = bindProgram(GetEffect(), patch);
@ -909,7 +909,7 @@ display() {
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
for (int i = 0; i < (int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
GLenum primType = bindProgram(GetEffect(/*uvDraw=*/ true), patch);

4
examples/glImaging/glImaging.cpp Executable file → Normal file
View File

@ -422,11 +422,11 @@ void runTest(ShapeDesc const &shapeDesc, std::string const &kernel,
mesh->GetPatchTable()->GetPatchParamTextureBuffer());
}
Osd::GLPatchTable::PatchArrayVector const & patches =
Osd::PatchArrayVector const & patches =
mesh->GetPatchTable()->GetPatchArrays();
for (int i=0; i<(int)patches.size(); ++i) {
Osd::GLPatchTable::PatchArray const & patch = patches[i];
Osd::PatchArray const & patch = patches[i];
Far::PatchDescriptor desc = patch.GetDescriptor();
Far::PatchDescriptor::Type patchType = desc.GetType();

View File

@ -597,7 +597,7 @@ static void bindTextures(Effect effect) {
}
static GLuint
bindProgram(Effect effect, OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
bindProgram(Effect effect, OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
@ -670,12 +670,12 @@ display() {
glBindVertexArray(g_vao);
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
// patch drawing
for (int i=0; i<(int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
GLenum primType = GL_PATCHES;
@ -807,13 +807,13 @@ drawStroke(int x, int y) {
effect.paint = 1;
bindTextures(effect);
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
// patch drawing
for (int i=0; i<(int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
GLenum primType = GL_PATCHES;

View File

@ -1249,7 +1249,7 @@ bindTextures() {
//------------------------------------------------------------------------------
static GLenum
bindProgram(Effect effect,
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
GLDrawConfig *config = g_shaderCache.GetDrawConfig(effectDesc);
@ -1296,10 +1296,10 @@ drawModel() {
glBindVertexArray(g_vao);
// patch drawing
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
for (int i = 0; i < (int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
Effect effect;
effect.value = 0;

View File

@ -1181,7 +1181,7 @@ bindTextures() {
static GLenum
bindProgram(Effect effect,
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch) {
OpenSubdiv::Osd::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
// only legacy gregory needs maxValence and numElements
@ -1299,7 +1299,7 @@ display() {
glBindVertexArray(g_vao);
OpenSubdiv::Osd::GLPatchTable::PatchArrayVector const & patches =
OpenSubdiv::Osd::PatchArrayVector const & patches =
g_mesh->GetPatchTable()->GetPatchArrays();
// patch drawing
@ -1316,7 +1316,7 @@ display() {
// core draw-calls
for (int i=0; i<(int)patches.size(); ++i) {
OpenSubdiv::Osd::GLPatchTable::PatchArray const & patch = patches[i];
OpenSubdiv::Osd::PatchArray const & patch = patches[i];
OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor();
OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType();

View File

@ -29,6 +29,7 @@
set(CPU_SOURCE_FILES
cpuEvaluator.cpp
cpuKernel.cpp
cpuPatchTable.cpp
cpuVertexBuffer.cpp
)
@ -43,10 +44,12 @@ set(PRIVATE_HEADER_FILES
set(PUBLIC_HEADER_FILES
cpuEvaluator.h
cpuPatchTable.h
cpuVertexBuffer.h
mesh.h
nonCopyable.h
opengl.h
types.h
vertexDescriptor.h
)
@ -216,6 +219,7 @@ list(APPEND DOXY_HEADER_FILES ${DXSDK_PUBLIC_HEADERS})
# OpenCL code & dependencies
set(OPENCL_PUBLIC_HEADERS
clEvaluator.h
clPatchTable.h
clVertexBuffer.h
opencl.h
)
@ -223,6 +227,7 @@ set(OPENCL_PUBLIC_HEADERS
if ( OPENCL_FOUND )
list(APPEND GPU_SOURCE_FILES
clEvaluator.cpp
clPatchTable.cpp
clVertexBuffer.cpp
)
list(APPEND PUBLIC_HEADER_FILES ${OPENCL_PUBLIC_HEADERS})
@ -254,12 +259,14 @@ list(APPEND DOXY_HEADER_FILES ${OPENCL_PUBLIC_HEADERS})
# CUDA code & dependencies
set(CUDA_PUBLIC_HEADERS
cudaEvaluator.h
cudaPatchTable.h
cudaVertexBuffer.h
)
if( CUDA_FOUND )
list(APPEND GPU_SOURCE_FILES
cudaEvaluator.cpp
cudaPatchTable.cpp
cudaVertexBuffer.cpp
)
list(APPEND PUBLIC_HEADER_FILES ${CUDA_PUBLIC_HEADERS})

View File

@ -27,6 +27,7 @@
#include <sstream>
#include <string>
#include <vector>
#include <cstdio>
#include "../osd/opencl.h"
#include "../far/error.h"
@ -87,11 +88,12 @@ CLStencilTable::~CLStencilTable() {
CLEvaluator::CLEvaluator(cl_context context, cl_command_queue queue)
: _clContext(context), _clCommandQueue(queue),
_program(NULL), _stencilsKernel(NULL) {
_program(NULL), _stencilKernel(NULL), _patchKernel(NULL) {
}
CLEvaluator::~CLEvaluator() {
if (_stencilsKernel) clReleaseKernel(_stencilsKernel);
if (_stencilKernel) clReleaseKernel(_stencilKernel);
if (_patchKernel) clReleaseKernel(_patchKernel);
if (_program) clReleaseProgram(_program);
}
@ -145,7 +147,13 @@ CLEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
return false;
}
_stencilsKernel = clCreateKernel(_program, "computeStencils", &errNum);
_stencilKernel = clCreateKernel(_program, "computeStencils", &errNum);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
return false;
}
_patchKernel = clCreateKernel(_program, "computePatches", &errNum);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
@ -169,24 +177,24 @@ CLEvaluator::EvalStencils(cl_mem src,
size_t globalWorkSize = (size_t)(end - start);
clSetKernelArg(_stencilsKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_stencilsKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_stencilsKernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(_stencilsKernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(_stencilsKernel, 4, sizeof(cl_mem), &sizes);
clSetKernelArg(_stencilsKernel, 5, sizeof(cl_mem), &offsets);
clSetKernelArg(_stencilsKernel, 6, sizeof(cl_mem), &indices);
clSetKernelArg(_stencilsKernel, 7, sizeof(cl_mem), &weights);
clSetKernelArg(_stencilsKernel, 8, sizeof(int), &start);
clSetKernelArg(_stencilsKernel, 9, sizeof(int), &end);
clSetKernelArg(_stencilKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_stencilKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_stencilKernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(_stencilKernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(_stencilKernel, 4, sizeof(cl_mem), &sizes);
clSetKernelArg(_stencilKernel, 5, sizeof(cl_mem), &offsets);
clSetKernelArg(_stencilKernel, 6, sizeof(cl_mem), &indices);
clSetKernelArg(_stencilKernel, 7, sizeof(cl_mem), &weights);
clSetKernelArg(_stencilKernel, 8, sizeof(int), &start);
clSetKernelArg(_stencilKernel, 9, sizeof(int), &end);
cl_int errNum = clEnqueueNDRangeKernel(
_clCommandQueue, _stencilsKernel, 1, NULL,
_clCommandQueue, _stencilKernel, 1, NULL,
&globalWorkSize, NULL, 0, NULL, NULL);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyStencilTableKernel (%d) ", errNum);
"ApplyStencilKernel (%d) ", errNum);
return false;
}
@ -194,6 +202,50 @@ CLEvaluator::EvalStencils(cl_mem src,
return true;
}
bool
CLEvaluator::EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc,
cl_mem dst, VertexBufferDescriptor const &dstDesc,
cl_mem du, VertexBufferDescriptor const &duDesc,
cl_mem dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
cl_mem patchCoordsBuffer,
cl_mem patchArrayBuffer,
cl_mem patchIndexBuffer,
cl_mem patchParamBuffer) const {
size_t globalWorkSize = (size_t)(numPatchCoords);
clSetKernelArg(_patchKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(_patchKernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(_patchKernel, 4, sizeof(cl_mem), &du);
clSetKernelArg(_patchKernel, 5, sizeof(int), &duDesc.offset);
clSetKernelArg(_patchKernel, 6, sizeof(int), &duDesc.stride);
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
clSetKernelArg(_patchKernel, 10, sizeof(cl_mem), &patchCoordsBuffer);
clSetKernelArg(_patchKernel, 11, sizeof(cl_mem), &patchArrayBuffer);
clSetKernelArg(_patchKernel, 12, sizeof(cl_mem), &patchIndexBuffer);
clSetKernelArg(_patchKernel, 13, sizeof(cl_mem), &patchParamBuffer);
cl_int errNum = clEnqueueNDRangeKernel(
_clCommandQueue, _patchKernel, 1, NULL,
&globalWorkSize, NULL, 0, NULL, NULL);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyPatchKernel (%d) ", errNum);
return false;
}
clFinish(_clCommandQueue);
return true;
}
/* static */
void
CLEvaluator::Synchronize(cl_command_queue clCommandQueue) {

View File

@ -28,6 +28,7 @@
#include "../version.h"
#include "../osd/opencl.h"
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
@ -75,9 +76,6 @@ private:
// ---------------------------------------------------------------------------
/// \brief OpenCL stencil kernel
///
///
class CLEvaluator {
public:
typedef bool Instantiatable;
@ -107,6 +105,12 @@ public:
return NULL;
}
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
@ -124,7 +128,7 @@ public:
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// OpenCL memory interfaces.
/// SSBO interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
@ -137,25 +141,25 @@ public:
/// cl_command_queue GetCommandQueue()
/// methods.
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE,
typename DEVICE_CONTEXT>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext) {
template <typename SRC_BUFFER, typename DST_BUFFER,
typename STENCIL_TABLE, typename DEVICE_CONTEXT>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext) {
if (instance) {
return instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
return instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
} else {
// Create an instance on demand (slow)
instance = Create(srcDesc, dstDesc, deviceContext);
if (instance) {
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
bool r = instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
delete instance;
return r;
@ -167,15 +171,14 @@ public:
/// Generic compute function.
/// Dispatch the CL compute kernel asynchronously.
/// Returns false if the kernel hasn't been compiled yet.
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcVertexBuffer->BindCLBuffer(_clCommandQueue),
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue),
srcDesc,
dstVertexBuffer->BindCLBuffer(_clCommandQueue),
dstBuffer->BindCLBuffer(_clCommandQueue),
dstDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
@ -187,10 +190,8 @@ public:
/// Dispatch the CL compute kernel asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(cl_mem src,
VertexBufferDescriptor const &srcDesc,
cl_mem dst,
VertexBufferDescriptor const &dstDesc,
bool EvalStencils(cl_mem src, VertexBufferDescriptor const &srcDesc,
cl_mem dst, VertexBufferDescriptor const &dstDesc,
cl_mem sizes,
cl_mem offsets,
cl_mem indices,
@ -198,6 +199,278 @@ public:
int start,
int end) const;
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
///
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, deviceContext);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer
///
/// @param duDesc
///
/// @param dvBuffer
///
/// @param dvDesc
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, deviceContext);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
0, VertexBufferDescriptor(),
0, VertexBufferDescriptor(),
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivatives buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data of Du
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output V-derivatives buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data of Dv
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
bool EvalPatches(cl_mem src, VertexBufferDescriptor const &srcDesc,
cl_mem dst, VertexBufferDescriptor const &dstDesc,
cl_mem du, VertexBufferDescriptor const &duDesc,
cl_mem dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
cl_mem patchCoordsBuffer,
cl_mem patchArrayBuffer,
cl_mem patchIndexBuffer,
cl_mem patchParamsBuffer) const;
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// Configure OpenCL kernel.
/// Returns false if it fails to compile the kernel.
bool Compile(VertexBufferDescriptor const &srcDesc,
@ -215,7 +488,8 @@ private:
cl_context _clContext;
cl_command_queue _clCommandQueue;
cl_program _program;
cl_kernel _stencilsKernel;
cl_kernel _stencilKernel;
cl_kernel _patchKernel;
};

View File

@ -85,3 +85,176 @@ __kernel void computeStencils(__global float * src,
writeVertex(dst, current, &v);
}
// ---------------------------------------------------------------------------
struct PatchArray {
int patchType;
int numPatches;
int indexBase; // an offset within the index buffer
int primitiveIdBase; // an offset within the patch param buffer
};
struct PatchCoord {
int arrayIndex;
int patchIndex;
int vertIndex;
float s;
float t;
};
struct PatchParam {
int faceIndex;
uint patchBits;
float sharpness;
};
static void getBSplineWeights(float t, float *point, float *deriv) {
// The four uniform cubic B-Spline basis functions evaluated at t:
float one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
point[3] = one6th * ( t3);
// Derivatives of the above four basis functions at t:
deriv[0] = -0.5f*t2 + t - 0.5f;
deriv[1] = 1.5f*t2 - 2.0f*t;
deriv[2] = -1.5f*t2 + t + 0.5f;
deriv[3] = 0.5f*t2;
}
static void adjustBoundaryWeights(uint bits, float *sWeights, float *tWeights) {
int boundary = ((bits >> 4) & 0xf);
if (boundary & 1) {
tWeights[2] -= tWeights[0];
tWeights[1] += 2*tWeights[0];
tWeights[0] = 0;
}
if (boundary & 2) {
sWeights[1] -= sWeights[3];
sWeights[2] += 2*sWeights[3];
sWeights[3] = 0;
}
if (boundary & 4) {
tWeights[1] -= tWeights[3];
tWeights[2] += 2*tWeights[3];
tWeights[3] = 0;
}
if (boundary & 8) {
sWeights[2] -= sWeights[0];
sWeights[1] += 2*sWeights[0];
sWeights[0] = 0;
}
}
static int getDepth(uint patchBits) {
return (patchBits & 0x7);
}
static float getParamFraction(uint patchBits) {
bool nonQuadRoot = (patchBits >> 3) & 0x1;
int depth = getDepth(patchBits);
if (nonQuadRoot) {
return 1.0f / (float)( 1 << (depth-1) );
} else {
return 1.0f / (float)( 1 << depth );
}
}
static void normalizePatchCoord(uint patchBits, float *uv) {
float frac = getParamFraction(patchBits);
int iu = (patchBits >> 22) & 0x3ff;
int iv = (patchBits >> 12) & 0x3ff;
// top left corner
float pu = (float)iu*frac;
float pv = (float)iv*frac;
// normalize u,v coordinates
uv[0] = (uv[0] - pu) / frac;
uv[1] = (uv[1] - pv) / frac;
}
__kernel void computePatches(__global float *src, int srcOffset,
__global float *dst, int dstOffset,
__global float *du, int duOffset, int duStride,
__global float *dv, int dvOffset, int dvStride,
__global struct PatchCoord *patchCoords,
__global struct PatchArray *patchArrayBuffer,
__global int *patchIndexBuffer,
__global struct PatchParam *patchParamBuffer) {
int current = get_global_id(0);
if (src) src += srcOffset;
if (dst) dst += dstOffset;
if (du) du += duOffset;
if (dv) dv += dvOffset;
struct PatchCoord coord = patchCoords[current];
struct PatchArray array = patchArrayBuffer[coord.arrayIndex];
int patchType = 6; // array.patchType XXX: REGULAR only for now.
int numControlVertices = 16;
uint patchBits = patchParamBuffer[coord.patchIndex].patchBits;
float uv[2] = {coord.s, coord.t};
normalizePatchCoord(patchBits, uv);
float dScale = (float)(1 << getDepth(patchBits));
float wP[20], wDs[20], wDt[20];
if (patchType == 6) { // REGULAR
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
getBSplineWeights(uv[0], sWeights, dsWeights);
getBSplineWeights(uv[1], tWeights, dtWeights);
adjustBoundaryWeights(patchBits, sWeights, tWeights);
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 4; ++l) {
wP[4*k+l] = sWeights[l] * tWeights[k];
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
}
}
} else {
// TODO: GREGORY BASIS
}
int indexBase = array.indexBase + coord.vertIndex;
struct Vertex v;
clear(&v);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(&v, src, index, wP[i]);
}
writeVertex(dst, current, &v);
if (du) {
struct Vertex vdu;
clear(&vdu);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(&vdu, src, index, wDs[i]);
}
writeVertex(du, current, &vdu);
}
if (dv) {
struct Vertex vdv;
clear(&vdv);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(&vdv, src, index, wDt[i]);
}
writeVertex(dv, current, &vdv);
}
}

View File

@ -0,0 +1,102 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/clPatchTable.h"
#include "../far/error.h"
#include "../far/patchTable.h"
#include "../osd/opencl.h"
#include "../osd/cpuPatchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CLPatchTable::CLPatchTable() :
_patchArrays(NULL), _indexBuffer(NULL), _patchParamBuffer(NULL) {
}
CLPatchTable::~CLPatchTable() {
if (_patchArrays) clReleaseMemObject(_patchArrays);
if (_indexBuffer) clReleaseMemObject(_indexBuffer);
if (_patchParamBuffer) clReleaseMemObject(_patchParamBuffer);
}
CLPatchTable *
CLPatchTable::Create(Far::PatchTable const *farPatchTable,
cl_context clContext) {
CLPatchTable *instance = new CLPatchTable();
if (instance->allocate(farPatchTable, clContext)) return instance;
delete instance;
return 0;
}
bool
CLPatchTable::allocate(Far::PatchTable const *farPatchTable, cl_context clContext) {
CpuPatchTable patchTable(farPatchTable);
size_t numPatchArrays = patchTable.GetNumPatchArrays();
size_t indexSize = patchTable.GetPatchIndexSize();
size_t patchParamSize = patchTable.GetPatchParamSize();
cl_int err = 0;
_patchArrays = clCreateBuffer(clContext,
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
numPatchArrays * sizeof(Osd::PatchArray),
(void*)patchTable.GetPatchArrayBuffer(),
&err);
if (err != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
return false;
}
_indexBuffer = clCreateBuffer(clContext,
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
indexSize * sizeof(int),
(void*)patchTable.GetPatchIndexBuffer(),
&err);
if (err != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
return false;
}
_patchParamBuffer = clCreateBuffer(clContext,
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
patchParamSize * sizeof(Osd::PatchParam),
(void*)patchTable.GetPatchParamBuffer(),
&err);
if (err != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", err);
return false;
}
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,91 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_CL_PATCH_TABLE_H
#define OPENSUBDIV3_OSD_CL_PATCH_TABLE_H
#include "../version.h"
#include "../osd/opencl.h"
#include "../osd/nonCopyable.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far{
class PatchTable;
};
namespace Osd {
/// \brief CL patch table
///
/// This class is a CL buffer representation of Far::PatchTable.
///
/// CLEvaluator consumes this table to evaluate on the patches.
///
///
class CLPatchTable : private NonCopyable<CLPatchTable> {
public:
/// Creator. Returns NULL if error
static CLPatchTable *Create(Far::PatchTable const *patchTable,
cl_context clContext);
template <typename DEVICE_CONTEXT>
static CLPatchTable * Create(Far::PatchTable const *patchTable,
DEVICE_CONTEXT context) {
return Create(patchTable, context->GetContext());
}
/// Destructor
~CLPatchTable();
/// Returns the CL memory of the array of Osd::PatchArray buffer
cl_mem GetPatchArrayBuffer() const { return _patchArrays; }
/// Returns the CL memory of the patch control vertices
cl_mem GetPatchIndexBuffer() const { return _indexBuffer; }
/// Returns the CL memory of the array of Osd::PatchParam buffer
cl_mem GetPatchParamBuffer() const { return _patchParamBuffer; }
protected:
CLPatchTable();
bool allocate(Far::PatchTable const *patchTable, cl_context clContext);
cl_mem _patchArrays;
cl_mem _indexBuffer;
cl_mem _patchParamBuffer;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV3_OSD_CL_PATCH_TABLE_H

View File

@ -45,6 +45,12 @@ public:
/// Creator. Returns NULL if error.
static CLVertexBuffer * Create(int numElements, int numVertices, cl_context clContext);
template <typename DEVICE_CONTEXT>
static CLVertexBuffer * Create(int numElements, int numVertices,
DEVICE_CONTEXT context) {
return Create(numElements, numVertices, context->GetContext());
}
/// Destructor.
~CLVertexBuffer();
@ -52,6 +58,12 @@ public:
/// vertices data to Osd.
void UpdateData(const float *src, int startVertex, int numVertices, cl_command_queue clQueue);
template<typename DEVICE_CONTEXT>
void UpdateData(const float *src, int startVertex, int numVertices,
DEVICE_CONTEXT context) {
UpdateData(src, startVertex, numVertices, context->GetCommandQueue());
}
/// Returns how many elements defined in this vertex buffer.
int GetNumElements() const;

View File

@ -24,6 +24,7 @@
#include "../osd/cpuEvaluator.h"
#include "../osd/cpuKernel.h"
#include "../far/patchBasis.h"
#include <cstdlib>
@ -34,15 +35,15 @@ namespace Osd {
/* static */
bool
CpuEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
CpuEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
@ -55,30 +56,28 @@ CpuEvaluator::EvalStencils(const float *src,
/* static */
bool
CpuEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDs,
VertexBufferDescriptor const &dstDsDesc,
float *dstDt,
VertexBufferDescriptor const &dstDtDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end) {
CpuEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
if (srcDesc.length != dstDsDesc.length) return false;
if (srcDesc.length != dstDtDesc.length) return false;
if (srcDesc.length != duDesc.length) return false;
if (srcDesc.length != dvDesc.length) return false;
CpuEvalStencils(src, srcDesc,
dst, dstDesc,
dstDs, dstDsDesc,
dstDt, dstDtDesc,
du, duDesc,
dv, dvDesc,
sizes, offsets, indices,
weights, duWeights, dvWeights,
start, end);
@ -123,10 +122,13 @@ CpuEvaluator::EvalPatches(const float *src,
float *dst,
VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable) {
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer){
src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
else return false;
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
@ -134,14 +136,38 @@ CpuEvaluator::EvalPatches(const float *src,
float wP[20], wDs[20], wDt[20];
for (int i = 0; i < numPatchCoords; ++i) {
PatchCoord const &coords = patchCoords[i];
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt);
int patchType = array.GetPatchType();
// XXX: patchIndex is absolute. not sure it's consistent.
// (should be offsetted by array.primitiveIdBase?)
// patchParamBuffer[array.primitiveIdBase + coord.handle.patchIndex]
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&patchParamBuffer[coord.handle.patchIndex].patchBits;
Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle);
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
assert(0);
return false;
}
const int *cvs =
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
for (int j = 0; j < cvs.size(); ++j) {
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
}
++dstT;
@ -151,47 +177,67 @@ CpuEvaluator::EvalPatches(const float *src,
/* static */
bool
CpuEvaluator::EvalPatches(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDs,
VertexBufferDescriptor const &dstDsDesc,
float *dstDt,
VertexBufferDescriptor const &dstDtDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable) {
CpuEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer) {
src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (dstDs) dstDs += dstDsDesc.offset;
if (dstDt) dstDt += dstDtDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
BufferAdapter<float> dstDsT(dstDs, dstDsDesc.length, dstDsDesc.stride);
BufferAdapter<float> dstDtT(dstDt, dstDtDesc.length, dstDtDesc.stride);
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
BufferAdapter<float> duT (du, duDesc.length, duDesc.stride);
BufferAdapter<float> dvT (dv, dvDesc.length, dvDesc.stride);
float wP[20], wDs[20], wDt[20];
for (int i = 0; i < numPatchCoords; ++i) {
PatchCoord const &coords = patchCoords[i];
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt);
int patchType = array.GetPatchType();
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&patchParamBuffer[coord.handle.patchIndex].patchBits;
Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle);
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
assert(0);
}
const int *cvs =
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
dstDsT.Clear();
dstDtT.Clear();
for (int j = 0; j < cvs.size(); ++j) {
duT.Clear();
dvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
dstDsT.AddWithWeight(srcT[cvs[j]], wDs[j]);
dstDtT.AddWithWeight(srcT[cvs[j]], wDt[j]);
duT.AddWithWeight (srcT[cvs[j]], wDs[j]);
dvT.AddWithWeight (srcT[cvs[j]], wDt[j]);
}
++dstT;
++dstDsT;
++dstDtT;
++duT;
++dvT;
}
return true;
}

View File

@ -29,33 +29,14 @@
#include <cstddef>
#include <vector>
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
#include "../far/patchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Coordinates set on a patch table
/// XXX: this is a temporary structure, exists during Osd refactoring work.
///
struct PatchCoord {
/// \brief Constructor
///
/// @param p patch handle
///
/// @param s parametric location on the patch
///
/// @param t parametric location on the patch
///
PatchCoord(Far::PatchTable::PatchHandle handle, float s, float t) :
handle(handle), s(s), t(t) { }
Far::PatchTable::PatchHandle handle; ///< patch handle
float s, t; ///< parametric location on patch
};
class CpuEvaluator {
public:
/// ----------------------------------------------------------------------
@ -80,7 +61,7 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
@ -89,20 +70,18 @@ public:
/// @param deviceContext not used in the cpu kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *instance = NULL,
void * deviceContext = NULL) {
return EvalStencils(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
@ -125,24 +104,27 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
/// @param sizes pointer to the sizes buffer of the stencil table
/// to apply for the range [start, end)
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param deviceContext not used in the cpu kernel
/// @param indices pointer to the indices buffer of the stencil table
///
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
@ -161,19 +143,19 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param dstDsBuffer Output s-derivative buffer
/// @param duBuffer Output U-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDsDesc vertex buffer descriptor for the output buffer
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dstDtBuffer Output t-derivative buffer
/// @param dvBuffer Output V-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDtDesc vertex buffer descriptor for the output buffer
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
@ -182,28 +164,22 @@ public:
/// @param deviceContext not used in the cpu kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
DST_BUFFER *dstDsBuffer,
VertexBufferDescriptor const &dstDsDesc,
DST_BUFFER *dstDtBuffer,
VertexBufferDescriptor const &dstDtDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *evaluator = NULL,
void * deviceContext = NULL) {
(void)evaluator; // unused
(void)deviceContext; // unused
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *instance = NULL,
void * deviceContext = NULL) {
return EvalStencils(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
dstDsBuffer->BindCpuBuffer(),
dstDsDesc,
dstDtBuffer->BindCpuBuffer(),
dstDtDesc,
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
@ -228,40 +204,44 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param dstDs Output s-derivatives pointer. An offset of
/// dstDsDesc will be applied internally.
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param dstDsDesc vertex buffer descriptor for the output buffer
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dstDt Output t-derivatives pointer. An offset of
/// dstDtDesc will be applied internally.
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dstDtDesc vertex buffer descriptor for the output buffer
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param deviceContext not used in the cpu kernel
/// @param indices pointer to the indices buffer of the stencil table
///
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDs,
VertexBufferDescriptor const &dstDsDesc,
float *dstDt,
VertexBufferDescriptor const &dstDtDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start,
int end);
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
@ -289,32 +269,35 @@ public:
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER>
static bool EvalPatches(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable,
CpuEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CpuEvaluator const *instance = NULL,
void * deviceContext = NULL) {
return EvalPatches(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
numPatchCoords,
patchCoords,
patchTable);
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
@ -333,56 +316,59 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param dstDsBuffer Output s-derivatives buffer
/// @param duBuffer Output U-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDsDesc vertex buffer descriptor for the dstDsBuffer
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dstDtBuffer Output t-derivatives buffer
/// @param dvBuffer Output V-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDtDesc vertex buffer descriptor for the dstDtBuffer
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER>
static bool EvalPatches(SRC_BUFFER *srcBuffer,
VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer,
VertexBufferDescriptor const &dstDesc,
DST_BUFFER *dstDsBuffer,
VertexBufferDescriptor const &dstDsDesc,
DST_BUFFER *dstDtBuffer,
VertexBufferDescriptor const &dstDtDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable,
CpuEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CpuEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
dstDsBuffer->BindCpuBuffer(),
dstDsDesc,
dstDtBuffer->BindCpuBuffer(),
dstDtDesc,
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
// ideally all buffer classes should have templated by datatype
// so that downcast isn't needed there.
// (e.g. Osd::CpuBuffer<PatchCoord> )
//
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
patchCoords,
patchTable);
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
@ -403,20 +389,23 @@ public:
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable on which primvars are evaluated
/// for the patchCoords
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param instance not used in the cpu evaluator
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param deviceContext not used in the cpu evaluator
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable);
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
@ -432,38 +421,45 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param dstDs Output s-derivatives pointer. An offset of
/// dstDsDesc will be applied internally.
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param dstDsDesc vertex buffer descriptor for the dstDs buffer
/// @param duDesc vertex buffer descriptor for the du buffer
///
/// @param dstDt Output t-derivatives pointer. An offset of
/// dstDtDesc will be applied internally.
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dstDtDesc vertex buffer descriptor for the dstDt buffer
/// @param dvDesc vertex buffer descriptor for the dv buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable on which primvars are evaluated
/// for the patchCoords
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param instance not used in the cpu evaluator
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param deviceContext not used in the cpu evaluator
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDs,
VertexBufferDescriptor const &dstDsDesc,
float *dstDt,
VertexBufferDescriptor const &dstDtDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
Far::PatchTable const *patchTable);
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// \brief synchronize all asynchronous computation invoked on this device.
static void Synchronize(void * /*deviceContext = NULL*/) {

View File

@ -0,0 +1,106 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cpuPatchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CpuPatchTable::CpuPatchTable(const Far::PatchTable *farPatchTable) {
int nPatchArrays = farPatchTable->GetNumPatchArrays();
// count
int numPatches = 0;
int numIndices = 0;
for (int j = 0; j < nPatchArrays; ++j) {
int nPatch = farPatchTable->GetNumPatches(j);
int nCV = farPatchTable->GetPatchArrayDescriptor(j).GetNumControlVertices();
numPatches += nPatch;
numIndices += nPatch * nCV;
}
_patchArrays.reserve(nPatchArrays);
_indexBuffer.reserve(numIndices);
_patchParamBuffer.reserve(numPatches);
// for each patchArray
for (int j = 0; j < nPatchArrays; ++j) {
PatchArray patchArray(farPatchTable->GetPatchArrayDescriptor(j),
farPatchTable->GetNumPatches(j),
(int)_indexBuffer.size(),
(int)_patchParamBuffer.size());
_patchArrays.push_back(patchArray);
// indices
Far::ConstIndexArray indices = farPatchTable->GetPatchArrayVertices(j);
for (int k = 0; k < indices.size(); ++k) {
_indexBuffer.push_back(indices[k]);
}
// patchParams bundling
// XXX: this process won't be needed if Far::PatchParam includes
// sharpness.
#if 0
// XXX: we need sharpness interface for patcharray or put sharpness
// into patchParam.
Far::ConstPatchParamArray patchParams =
farPatchTable->GetPatchParams(j);
for (int k = 0; k < patchParams.size(); ++k) {
float sharpness = 0.0;
_patchParamBuffer.push_back(patchParams[k].faceIndex);
_patchParamBuffer.push_back(patchParams[k].bitField.field);
_patchParamBuffer.push_back(*((unsigned int *)&sharpness));
}
#else
// XXX: workaround. GetPatchParamTable() will be deprecated though.
Far::PatchParamTable const & patchParamTable =
farPatchTable->GetPatchParamTable();
std::vector<Far::Index> const &sharpnessIndexTable =
farPatchTable->GetSharpnessIndexTable();
int numPatches = farPatchTable->GetNumPatches(j);
for (int k = 0; k < numPatches; ++k) {
float sharpness = 0.0;
int patchIndex = (int)_patchParamBuffer.size();
if (patchIndex < (int)sharpnessIndexTable.size()) {
int sharpnessIndex = sharpnessIndexTable[patchIndex];
if (sharpnessIndex >= 0)
sharpness = farPatchTable->GetSharpnessValues()[sharpnessIndex];
}
PatchParam param;
//param.patchParam = patchParamTable[patchIndex];
param.faceIndex = patchParamTable[patchIndex].faceIndex;
param.patchBits = patchParamTable[patchIndex].bitField.field;
param.sharpness = sharpness;
_patchParamBuffer.push_back(param);
}
#endif
}
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,102 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H
#define OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H
#include "../version.h"
#include <vector>
#include "../far/patchDescriptor.h"
#include "../osd/nonCopyable.h"
#include "../osd/opengl.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far{
class PatchTable;
};
namespace Osd {
/// \brief Cpu patch table
///
/// XXX: We can use just Far::PatchTable for typical CpuEval use cases.
///
/// Currently this class exists because of the template resolution
/// for the CpuEvaluator's generic interface functions
/// (glEvalLimit example uses), and
/// device-specific patch tables such as GLPatchTables internally use
/// as a staging buffer to splice patcharray and interleave sharpnesses.
///
/// Ideally Far::PatchTables should have the same data representation
/// and accessors so that we don't have to copy data unnecessarily.
///
class CpuPatchTable {
public:
static CpuPatchTable *Create(const Far::PatchTable *patchTable,
void *deviceContext = NULL) {
(void)deviceContext; // unused
return new CpuPatchTable(patchTable);
}
explicit CpuPatchTable(const Far::PatchTable *patchTable);
~CpuPatchTable() {}
const PatchArray *GetPatchArrayBuffer() const {
return &_patchArrays[0];
}
const int *GetPatchIndexBuffer() const {
return &_indexBuffer[0];
}
const PatchParam *GetPatchParamBuffer() const {
return &_patchParamBuffer[0];
}
size_t GetNumPatchArrays() const {
return _patchArrays.size();
}
size_t GetPatchIndexSize() const {
return _indexBuffer.size();
}
size_t GetPatchParamSize() const {
return _patchParamBuffer.size();
}
protected:
PatchArrayVector _patchArrays;
std::vector<int> _indexBuffer;
PatchParamVector _patchParamBuffer;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV3_OSD_CPU_PATCH_TABLE_H

View File

@ -52,7 +52,8 @@ CpuVertexBuffer::Create(int numElements, int numVertices,
}
void
CpuVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices) {
CpuVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices,
void * /*deviceContext*/) {
memcpy(_cpuBuffer + startVertex * _numElements,
src, GetNumElements() * numVertices * sizeof(float));

View File

@ -50,7 +50,8 @@ public:
/// This method is meant to be used in client code in order to provide
/// coarse vertices data to Osd.
void UpdateData(const float *src, int startVertex, int numVertices);
void UpdateData(const float *src, int startVertex, int numVertices,
void *deviceContext = NULL);
/// Returns how many elements defined in this vertex buffer.
int GetNumElements() const;

View File

@ -28,6 +28,7 @@
#include <vector>
#include "../far/stencilTable.h"
#include "../osd/types.h"
extern "C" {
void CudaEvalStencils(const float *src,
@ -41,6 +42,25 @@ extern "C" {
const float * weights,
int start,
int end);
void CudaEvalPatches(
const float *src, float *dst,
int length, int srcStride, int dstStride,
int numPatchCoords,
const void *patchCoords,
const void *patchArrays,
const int *patchIndices,
const void *patchParams);
void CudaEvalPatchesWithDerivatives(
const float *src, float *dst, float *du, float *dv,
int length,
int srcStride, int dstStride, int dvStride, int duStride,
int numPatchCoords,
const void *patchCoords,
const void *patchArrays,
const int *patchIndices,
const void *patchParams);
}
namespace OpenSubdiv {
@ -102,6 +122,8 @@ CudaEvaluator::EvalStencils(const float *src,
const float * weights,
int start,
int end) {
if (dst == NULL) return false;
CudaEvalStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
@ -112,6 +134,104 @@ CudaEvaluator::EvalStencils(const float *src,
return true;
}
/* static */
bool
CudaEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDu,
VertexBufferDescriptor const &dstDuDesc,
float *dstDv,
VertexBufferDescriptor const &dstDvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start,
int end) {
// PERFORMANCE: need to combine 3 launches together
if (dst) {
CudaEvalStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDesc.stride,
sizes, offsets, indices, weights,
start, end);
}
if (dstDu) {
CudaEvalStencils(src + srcDesc.offset,
dstDu + dstDuDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDuDesc.stride,
sizes, offsets, indices, duWeights,
start, end);
}
if (dstDv) {
CudaEvalStencils(src + srcDesc.offset,
dstDv + dstDvDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDvDesc.stride,
sizes, offsets, indices, dvWeights,
start, end);
}
return true;
}
/* static */
bool
CudaEvaluator::EvalPatches(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams) {
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
CudaEvalPatches(src, dst,
srcDesc.length, srcDesc.stride, dstDesc.stride,
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
return true;
}
/* static */
bool
CudaEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams) {
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
CudaEvalPatchesWithDerivatives(
src, dst, du, dv,
srcDesc.length, srcDesc.stride,
dstDesc.stride, duDesc.stride, dvDesc.stride,
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
return true;
}
/* static */
void
CudaEvaluator::Synchronize(void * /*deviceContext*/) {

View File

@ -29,11 +29,13 @@
#include <vector>
#include "../osd/vertexDescriptor.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far {
class PatchTable;
class StencilTable;
}
@ -43,7 +45,7 @@ namespace Osd {
///
/// This class is a cuda buffer representation of Far::StencilTable.
///
/// CudaComputeKernel consumes this table to apply stencils
/// CudaEvaluator consumes this table to apply stencils
///
///
class CudaStencilTable {
@ -72,10 +74,14 @@ private:
int _numStencils;
};
// ---------------------------------------------------------------------------
class CudaEvaluator {
public:
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
@ -99,21 +105,18 @@ public:
///
/// @param deviceContext not used in the CudaEvaluator
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const void *instance = NULL,
void * deviceContext = NULL) {
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const void *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcVertexBuffer->BindCudaBuffer(),
srcDesc,
dstVertexBuffer->BindCudaBuffer(),
dstDesc,
return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
(int const *)stencilTable->GetSizesBuffer(),
(int const *)stencilTable->GetOffsetsBuffer(),
(int const *)stencilTable->GetIndicesBuffer(),
@ -122,17 +125,369 @@ public:
/*end = */ stencilTable->GetNumStencils());
}
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
/// \brief Static eval stencils function which takes raw cuda buffers for
/// input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivative buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dvBuffer Output V-derivative buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
///
/// @param instance not used in the cuda kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the cuda kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
const CudaEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
duBuffer->BindCudaBuffer(), duDesc,
dvBuffer->BindCudaBuffer(), dvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw cuda pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCudaBuffer() method returning an
/// array of PatchCoord struct in cuda memory.
///
/// @param patchTable CudaPatchTable or equivalent
///
/// @param instance not used in the cuda evaluator
///
/// @param deviceContext not used in the cuda evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
numPatchCoords,
(const PatchCoord *)patchCoords->BindCudaBuffer(),
(const PatchArray *)patchTable->GetPatchArrayBuffer(),
(const int *)patchTable->GetPatchIndexBuffer(),
(const PatchParam *)patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivatives buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output V-derivatives buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CudaPatchTable or equivalent
///
/// @param instance not used in the cuda evaluator
///
/// @param deviceContext not used in the cuda evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
duBuffer->BindCudaBuffer(), duDesc,
dvBuffer->BindCudaBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord *)patchCoords->BindCudaBuffer(),
(const PatchArray *)patchTable->GetPatchArrayBuffer(),
(const int *)patchTable->GetPatchIndexBuffer(),
(const PatchParam *)patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndices an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParams an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the du buffer
///
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dv buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndices an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParams an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams);
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
static void Synchronize(void *deviceContext = NULL);
};

View File

@ -238,6 +238,186 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs,
// -----------------------------------------------------------------------------
// Osd::PatchCoord osd/types.h
struct PatchCoord {
int arrayIndex;
int patchIndex;
int vertIndex;
float s;
float t;
};
struct PatchArray {
int patchType; // Far::PatchDescriptor::Type
int numPatches;
int indexBase; // offset in the index buffer
int primitiveIdBase; // offset in the patch param buffer
};
struct PatchParam {
int faceIndex;
unsigned int bitField;
float sharpness;
};
__device__ void
getBSplineWeights(float t, float point[4], float deriv[4]) {
// The four uniform cubic B-Spline basis functions evaluated at t:
float const one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
point[3] = one6th * ( t3);
// Derivatives of the above four basis functions at t:
if (deriv) {
deriv[0] = -0.5f*t2 + t - 0.5f;
deriv[1] = 1.5f*t2 - 2.0f*t;
deriv[2] = -1.5f*t2 + t + 0.5f;
deriv[3] = 0.5f*t2;
}
}
__device__ void
adjustBoundaryWeights(unsigned int bits, float sWeights[4], float tWeights[4]) {
int boundary = ((bits >> 4) & 0xf); // far/patchParam.h
if (boundary & 1) {
tWeights[2] -= tWeights[0];
tWeights[1] += 2*tWeights[0];
tWeights[0] = 0;
}
if (boundary & 2) {
sWeights[1] -= sWeights[3];
sWeights[2] += 2*sWeights[3];
sWeights[3] = 0;
}
if (boundary & 4) {
tWeights[1] -= tWeights[3];
tWeights[2] += 2*tWeights[3];
tWeights[3] = 0;
}
if (boundary & 8) {
sWeights[2] -= sWeights[0];
sWeights[1] += 2*sWeights[0];
sWeights[0] = 0;
}
}
__device__
int getDepth(unsigned int patchBits) {
return (patchBits & 0x7);
}
__device__
float getParamFraction(unsigned int patchBits) {
bool nonQuadRoot = (patchBits >> 3) & 0x1;
int depth = getDepth(patchBits);
if (nonQuadRoot) {
return 1.0f / float( 1 << (depth-1) );
} else {
return 1.0f / float( 1 << depth );
}
}
__device__
void normalizePatchCoord(unsigned int patchBits, float *u, float *v) {
float frac = getParamFraction(patchBits);
int iu = (patchBits >> 22) & 0x3ff;
int iv = (patchBits >> 12) & 0x3ff;
// top left corner
float pu = (float)iu*frac;
float pv = (float)iv*frac;
// normalize u,v coordinates
*u = (*u - pu) / frac;
*v = (*v - pv) / frac;
}
__global__ void
computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
int numPatchCoords, const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
int first = threadIdx.x + blockIdx.x * blockDim.x;
// PERFORMANCE: not yet optimized
float wP[20], wDs[20], wDt[20];
for (int i = first; i < numPatchCoords; i += blockDim.x * gridDim.x) {
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrayBuffer[coord.arrayIndex];
int patchType = 6; // array.patchType XXX: REGULAR only for now.
int numControlVertices = 16;
// note: patchIndex is absolute.
unsigned int patchBits = patchParamBuffer[coord.patchIndex].bitField;
// normalize
float s = coord.s;
float t = coord.t;
normalizePatchCoord(patchBits, &s, &t);
float dScale = (float)(1 << getDepth(patchBits));
if (patchType == 6) {
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
getBSplineWeights(s, sWeights, dsWeights);
getBSplineWeights(t, tWeights, dtWeights);
// Compute the tensor product weight of the (s,t) basis function
// corresponding to each control vertex:
adjustBoundaryWeights(patchBits, sWeights, tWeights);
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 4; ++l) {
wP[4*k+l] = sWeights[l] * tWeights[k];
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
}
}
} else {
// TODO: Gregory Basis.
continue;
}
const int *cvs = patchIndexBuffer + array.indexBase + coord.vertIndex;
float * dstVert = dst + i * dstStride;
clear(dstVert, length);
for (int j = 0; j < numControlVertices; ++j) {
const float * srcVert = src + cvs[j] * srcStride;
addWithWeight(dstVert, srcVert, wP[j], length);
}
if (dstDu) {
float *d = dstDu + i * dstDuStride;
clear(d, length);
for (int j = 0; j < numControlVertices; ++j) {
const float * srcVert = src + cvs[j] * srcStride;
addWithWeight(d, srcVert, wDs[j], length);
}
}
if (dstDv) {
float *d = dstDv + i * dstDvStride;
clear(d, length);
for (int j = 0; j < numControlVertices; ++j) {
const float * srcVert = src + cvs[j] * srcStride;
addWithWeight(d, srcVert, wDt[j], length);
}
}
}
}
// -----------------------------------------------------------------------------
#include "../version.h"
#define OPT_KERNEL(NUM_ELEMENTS, KERNEL, X, Y, ARG) \
@ -257,20 +437,12 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs,
extern "C" {
void CudaEvalStencils(const float *src,
float *dst,
int length,
int srcStride,
int dstStride,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end)
{
// assert(cvs and dst and sizes and offsets and indices and weights and (end>=start));
void CudaEvalStencils(
const float *src, float *dst,
int length, int srcStride, int dstStride,
const int * sizes, const int * offsets, const int * indices,
const float * weights,
int start, int end) {
if (length == 0 or srcStride == 0 or dstStride == 0 or (end <= start)) {
return;
}
@ -301,4 +473,36 @@ void CudaEvalStencils(const float *src,
// -----------------------------------------------------------------------------
void CudaEvalPatches(
const float *src, float *dst,
int length, int srcStride, int dstStride,
int numPatchCoords, const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
// PERFORMANCE: not optimized at all
computePatches <<<512, 32>>>(
src, dst, NULL, NULL, length, srcStride, dstStride, 0, 0,
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
}
void CudaEvalPatchesWithDerivatives(
const float *src, float *dst, float *dstDu, float *dstDv,
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
int numPatchCoords, const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
// PERFORMANCE: not optimized at all
computePatches <<<512, 32>>>(
src, dst, dstDu, dstDv, length, srcStride, dstStride, dstDuStride, dstDvStride,
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
}
} /* extern "C" */

View File

@ -0,0 +1,103 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cudaPatchTable.h"
#include <cuda_runtime.h>
#include "../far/patchTable.h"
#include "../osd/cpuPatchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
CudaPatchTable::CudaPatchTable() :
_patchArrays(NULL), _indexBuffer(NULL), _patchParamBuffer(NULL) {
}
CudaPatchTable::~CudaPatchTable() {
if (_patchArrays) cudaFree(_patchArrays);
if (_indexBuffer) cudaFree(_indexBuffer);
if (_patchParamBuffer) cudaFree(_patchParamBuffer);
}
CudaPatchTable *
CudaPatchTable::Create(Far::PatchTable const *farPatchTable,
void * /*deviceContext*/) {
CudaPatchTable *instance = new CudaPatchTable();
if (instance->allocate(farPatchTable)) return instance;
delete instance;
return 0;
}
bool
CudaPatchTable::allocate(Far::PatchTable const *farPatchTable) {
CpuPatchTable patchTable(farPatchTable);
size_t numPatchArrays = patchTable.GetNumPatchArrays();
size_t indexSize = patchTable.GetPatchIndexSize();
size_t patchParamSize = patchTable.GetPatchParamSize();
cudaError_t err;
err = cudaMalloc(&_patchArrays, numPatchArrays * sizeof(Osd::PatchArray));
if (err != cudaSuccess) return false;
err = cudaMalloc(&_indexBuffer, indexSize * sizeof(int));
if (err != cudaSuccess) return false;
err = cudaMalloc(&_patchParamBuffer, patchParamSize * sizeof(Osd::PatchParam));
if (err != cudaSuccess) return false;
// copy patch array
err = cudaMemcpy(_patchArrays,
patchTable.GetPatchArrayBuffer(),
numPatchArrays * sizeof(Osd::PatchArray),
cudaMemcpyHostToDevice);
if (err != cudaSuccess) return false;
// copy index buffer
err = cudaMemcpy(_indexBuffer,
patchTable.GetPatchIndexBuffer(),
indexSize * sizeof(int),
cudaMemcpyHostToDevice);
if (err != cudaSuccess) return false;
// patch param buffer
err = cudaMemcpy(_patchParamBuffer,
patchTable.GetPatchParamBuffer(),
patchParamSize * sizeof(Osd::PatchParam),
cudaMemcpyHostToDevice);
if (err != cudaSuccess) return false;
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,83 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H
#define OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H
#include "../version.h"
#include "../osd/nonCopyable.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far{
class PatchTable;
};
namespace Osd {
/// \brief CUDA patch table
///
/// This class is a cuda buffer representation of Far::PatchTable.
///
/// CudaEvaluator consumes this table to evaluate on the patches.
///
///
class CudaPatchTable : private NonCopyable<CudaPatchTable> {
public:
/// Creator. Returns NULL if error
static CudaPatchTable *Create(Far::PatchTable const *patchTable,
void *deviceContext = NULL);
/// Destructor
~CudaPatchTable();
/// Returns the cuda memory of the array of Osd::PatchArray buffer
void *GetPatchArrayBuffer() const { return _patchArrays; }
/// Returns the cuda memory of the patch control vertices
void *GetPatchIndexBuffer() const { return _indexBuffer; }
/// Returns the cuda memory of the array of Osd::PatchParam buffer
void *GetPatchParamBuffer() const { return _patchParamBuffer; }
protected:
CudaPatchTable();
bool allocate(Far::PatchTable const *patchTable);
void *_patchArrays;
void *_indexBuffer;
void *_patchParamBuffer;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV3_OSD_CUDA_PATCH_TABLE_H

View File

@ -43,7 +43,8 @@ CudaVertexBuffer::~CudaVertexBuffer() {
}
CudaVertexBuffer *
CudaVertexBuffer::Create(int numElements, int numVertices) {
CudaVertexBuffer::Create(int numElements, int numVertices,
void * /*deviceContext */) {
CudaVertexBuffer *instance =
new CudaVertexBuffer(numElements, numVertices);
if (instance->allocate()) return instance;
@ -52,7 +53,8 @@ CudaVertexBuffer::Create(int numElements, int numVertices) {
}
void
CudaVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices) {
CudaVertexBuffer::UpdateData(const float *src, int startVertex, int numVertices,
void * /*deviceContext*/) {
size_t size = _numElements * numVertices * sizeof(float);

View File

@ -27,6 +27,8 @@
#include "../version.h"
#include <cstddef>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -41,14 +43,16 @@ class CudaVertexBuffer {
public:
/// Creator. Returns NULL if error.
static CudaVertexBuffer * Create(int numElements, int numVertices);
static CudaVertexBuffer * Create(int numElements, int numVertices,
void *deviceContext = NULL);
/// Destructor.
~CudaVertexBuffer();
/// This method is meant to be used in client code in order to provide coarse
/// vertices data to Osd.
void UpdateData(const float *src, int startVertex, int numVertices);
void UpdateData(const float *src, int startVertex, int numVertices,
void *deviceContext=NULL);
/// Returns how many elements defined in this vertex buffer.
int GetNumElements() const;

View File

@ -30,6 +30,7 @@
#include <vector>
#include "../far/patchDescriptor.h"
#include "../osd/nonCopyable.h"
#include "../osd/types.h"
struct ID3D11Buffer;
struct ID3D11ShaderResourceView;
@ -49,33 +50,6 @@ class D3D11PatchTable : private NonCopyable<D3D11PatchTable> {
public:
typedef ID3D11Buffer * VertexBufferBinding;
// XXX: this struct will be further refactored.
class PatchArray {
public:
PatchArray(Far::PatchDescriptor desc, int numPatches,
int indexBase, int primitiveIdBase) :
desc(desc), numPatches(numPatches), indexBase(indexBase),
primitiveIdBase(primitiveIdBase) {}
Far::PatchDescriptor const &GetDescriptor() const {
return desc;
}
int GetNumPatches() const {
return numPatches;
}
int GetIndexBase() const {
return indexBase;
}
int GetPrimitiveIdBase() const {
return primitiveIdBase;
}
private:
Far::PatchDescriptor desc;
int numPatches;
int indexBase; // an offset within the index buffer
int primitiveIdBase; // an offset within the patch param buffer
};
typedef std::vector<PatchArray> PatchArrayVector;
D3D11PatchTable();
~D3D11PatchTable();

View File

@ -88,16 +88,66 @@ GLStencilTableSSBO::~GLStencilTableSSBO() {
// ---------------------------------------------------------------------------
GLComputeEvaluator::GLComputeEvaluator() :
_program(0), _workGroupSize(64) {
GLComputeEvaluator::GLComputeEvaluator() : _workGroupSize(64) {
memset (&_stencilKernel, 0, sizeof(_stencilKernel));
memset (&_patchKernel, 0, sizeof(_patchKernel));
}
GLComputeEvaluator::~GLComputeEvaluator() {
if (_program) {
glDeleteProgram(_program);
if (_stencilKernel.program) {
glDeleteProgram(_stencilKernel.program);
}
if (_patchKernel.program) {
glDeleteProgram(_patchKernel.program);
}
}
static GLuint
compileKernel(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
const char *kernelDefine,
int workGroupSize) {
GLuint program = glCreateProgram();
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< "#define DST_STRIDE " << dstDesc.stride << "\n"
<< "#define WORK_GROUP_SIZE " << workGroupSize << "\n"
<< kernelDefine << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3] = {"#version 430\n", 0, 0};
shaderSources[1] = defineStr.c_str();
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(program, shader);
GLint linked = 0;
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(shader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(program);
return 0;
}
glDeleteShader(shader);
return program;
}
bool
GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc) {
@ -108,58 +158,55 @@ GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
return false;
}
if (_program) {
glDeleteProgram(_program);
_program = 0;
// create stencil kernel
if (_stencilKernel.program) {
glDeleteProgram(_stencilKernel.program);
}
_program = glCreateProgram();
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< "#define DST_STRIDE " << dstDesc.stride << "\n"
<< "#define WORK_GROUP_SIZE " << _workGroupSize << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3] = {"#version 430\n", 0, 0};
shaderSources[1] = defineStr.c_str();
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(_program, shader);
GLint linked = 0;
glLinkProgram(_program);
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(shader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(_program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(_program);
_program = 0;
return false;
}
glDeleteShader(shader);
_stencilKernel.program = compileKernel(
srcDesc, dstDesc,
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS",
_workGroupSize);
if (_stencilKernel.program == 0) return false;
// store uniform locations for the compute kernel program.
_uniformSizes = glGetUniformLocation(_program, "stencilSizes");
_uniformOffsets = glGetUniformLocation(_program, "stencilOffsets");
_uniformIndices = glGetUniformLocation(_program, "stencilIndices");
_uniformWeights = glGetUniformLocation(_program, "stencilIWeights");
_stencilKernel.uniformSizes =
glGetUniformLocation(_stencilKernel.program, "stencilSizes");
_stencilKernel.uniformOffsets =
glGetUniformLocation(_stencilKernel.program, "stencilOffsets");
_stencilKernel.uniformIndices =
glGetUniformLocation(_stencilKernel.program, "stencilIndices");
_stencilKernel.uniformWeights =
glGetUniformLocation(_stencilKernel.program, "stencilIWeights");
_stencilKernel.uniformStart =
glGetUniformLocation(_stencilKernel.program, "batchStart");
_stencilKernel.uniformEnd =
glGetUniformLocation(_stencilKernel.program, "batchEnd");
_stencilKernel.uniformSrcOffset =
glGetUniformLocation(_stencilKernel.program, "srcOffset");
_stencilKernel.uniformDstOffset =
glGetUniformLocation(_stencilKernel.program, "dstOffset");
_uniformStart = glGetUniformLocation(_program, "batchStart");
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
// create patch kernel
if (_patchKernel.program) {
glDeleteProgram(_patchKernel.program);
}
_patchKernel.program = compileKernel(
srcDesc, dstDesc,
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES",
_workGroupSize);
if (_patchKernel.program == 0) return false;
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
_uniformDstOffset = glGetUniformLocation(_program, "dstOffset");
// uniform locaitons
_patchKernel.uniformSrcOffset =
glGetUniformLocation(_patchKernel.program, "srcOffset");
_patchKernel.uniformDstOffset =
glGetUniformLocation(_patchKernel.program, "dstOffset");
_patchKernel.uniformPatchArray =
glGetUniformLocation(_patchKernel.program, "patchArray");
_patchKernel.uniformDuDesc =
glGetUniformLocation(_patchKernel.program, "dstDuDesc");
_patchKernel.uniformDvDesc =
glGetUniformLocation(_patchKernel.program, "dstDvDesc");
return true;
}
@ -183,7 +230,7 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
GLuint weightsBuffer,
int start,
int end) const {
if (!_program) return false;
if (!_stencilKernel.program) return false;
int count = end - start;
if (count <= 0) {
return true;
@ -196,12 +243,12 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, indicesBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, weightsBuffer);
glUseProgram(_program);
glUseProgram(_stencilKernel.program);
glUniform1i(_uniformStart, start);
glUniform1i(_uniformEnd, end);
glUniform1i(_uniformSrcOffset, srcDesc.offset);
glUniform1i(_uniformDstOffset, dstDesc.offset);
glUniform1i(_stencilKernel.uniformStart, start);
glUniform1i(_stencilKernel.uniformEnd, end);
glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
glUniform1i(_stencilKernel.uniformDstOffset, dstDesc.offset);
glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
@ -218,6 +265,52 @@ GLComputeEvaluator::EvalStencils(GLuint srcBuffer,
return true;
}
bool
GLComputeEvaluator::EvalPatches(
GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexBuffer,
GLuint patchParamsBuffer) const {
if (!_patchKernel.program) return false;
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, patchCoordsBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchIndexBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchParamsBuffer);
glUseProgram(_patchKernel.program);
glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset);
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
(const GLint*)&patchArrays[0]);
glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride);
glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride);
glDispatchCompute((numPatchCoords + _workGroupSize - 1) / _workGroupSize, 1, 1);
glUseProgram(0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0);
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -28,6 +28,7 @@
#include "../version.h"
#include "../osd/opengl.h"
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
@ -92,26 +93,32 @@ public:
/// Destructor. note that the GL context must be made current.
~GLComputeEvaluator();
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static compute function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a
/// const float pointer for read
/// GL buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a
/// float pointer for write
/// GL buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param evaluator cached compiled instance. Clients are supposed to
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
@ -119,25 +126,25 @@ public:
///
/// @param deviceContext not used in the GLSL kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GLComputeEvaluator const *instance,
void * deviceContext = NULL) {
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GLComputeEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
return instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
} else {
// Create a kernel on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
bool r = instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
delete instance;
return r;
@ -148,15 +155,14 @@ public:
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcVertexBuffer->BindVBO(),
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcBuffer->BindVBO(),
srcDesc,
dstVertexBuffer->BindVBO(),
dstBuffer->BindVBO(),
dstDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
@ -168,10 +174,8 @@ public:
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(GLuint srcBuffer,
VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer,
VertexBufferDescriptor const &dstDesc,
bool EvalStencils(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint sizesBuffer,
GLuint offsetsBuffer,
GLuint indicesBuffer,
@ -179,6 +183,271 @@ public:
int start,
int end) const;
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
///
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a GL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a GL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GLComputeEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a GL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a GL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer
///
/// @param duDesc
///
/// @param dvBuffer
///
/// @param dvDesc
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GLComputeEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBOBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
dstBuffer->BindVBO(), dstDesc,
0, VertexBufferDescriptor(),
0, VertexBufferDescriptor(),
numPatchCoords,
patchCoords->BindVBO(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivatives buffer
/// must have BindVBO() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output V-derivatives buffer
/// must have BindVBO() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable GLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
dstBuffer->BindVBO(), dstDesc,
duBuffer->BindVBO(), duDesc,
dvBuffer->BindVBO(), dvDesc,
numPatchCoords,
patchCoords->BindVBO(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
bool EvalPatches(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexBuffer,
GLuint patchParamsBuffer) const;
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// Configure GLSL kernel. A valid GL context must be made current before
/// calling this function. Returns false if it fails to compile the kernel.
bool Compile(VertexBufferDescriptor const &srcDesc,
@ -188,18 +457,27 @@ public:
static void Synchronize(void *deviceContext);
private:
GLuint _program;
struct _StencilKernel {
GLuint program;
GLuint uniformSizes;
GLuint uniformOffsets;
GLuint uniformIndices;
GLuint uniformWeights;
GLuint uniformStart;
GLuint uniformEnd;
GLuint uniformSrcOffset;
GLuint uniformDstOffset;
} _stencilKernel;
GLuint _uniformSizes, // stencil table
_uniformOffsets,
_uniformIndices,
_uniformWeights,
struct _PatchKernel {
GLuint program;
GLuint uniformSrcOffset;
GLuint uniformDstOffset;
GLuint uniformPatchArray;
GLuint uniformDuDesc;
GLuint uniformDvDesc;
_uniformStart, // range
_uniformEnd,
_uniformSrcOffset, // src buffer offset (in elements)
_uniformDstOffset; // dst buffer offset (in elements)
} _patchKernel;
int _workGroupSize;
};

View File

@ -26,6 +26,7 @@
#include "../far/patchTable.h"
#include "../osd/opengl.h"
#include "../osd/cpuPatchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -33,11 +34,14 @@ namespace OPENSUBDIV_VERSION {
namespace Osd {
GLPatchTable::GLPatchTable() :
_indexBuffer(0), _patchParamTexture(0) {
_patchIndexBuffer(0), _patchParamBuffer(0),
_patchIndexTexture(0), _patchParamTexture(0) {
}
GLPatchTable::~GLPatchTable() {
if (_indexBuffer) glDeleteBuffers(1, &_indexBuffer);
if (_patchIndexBuffer) glDeleteBuffers(1, &_patchIndexBuffer);
if (_patchParamBuffer) glDeleteBuffers(1, &_patchParamBuffer);
if (_patchIndexTexture) glDeleteTextures(1, &_patchIndexTexture);
if (_patchParamTexture) glDeleteTextures(1, &_patchParamTexture);
}
@ -52,84 +56,56 @@ GLPatchTable::Create(Far::PatchTable const *farPatchTable,
bool
GLPatchTable::allocate(Far::PatchTable const *farPatchTable) {
glGenBuffers(1, &_indexBuffer);
glGenBuffers(1, &_patchIndexBuffer);
glGenBuffers(1, &_patchParamBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _indexBuffer);
std::vector<int> buffer;
std::vector<unsigned int> ppBuffer;
CpuPatchTable patchTable(farPatchTable);
// needs reserve?
size_t numPatchArrays = patchTable.GetNumPatchArrays();
GLsizei indexSize = (GLsizei)patchTable.GetPatchIndexSize();
GLsizei patchParamSize = (GLsizei)patchTable.GetPatchParamSize();
int nPatchArrays = farPatchTable->GetNumPatchArrays();
// for each patchArray
for (int j = 0; j < nPatchArrays; ++j) {
PatchArray patchArray(farPatchTable->GetPatchArrayDescriptor(j),
farPatchTable->GetNumPatches(j),
(int)buffer.size(),
(int)ppBuffer.size()/3);
_patchArrays.push_back(patchArray);
// indices
Far::ConstIndexArray indices = farPatchTable->GetPatchArrayVertices(j);
for (int k = 0; k < indices.size(); ++k) {
buffer.push_back(indices[k]);
}
// patchParams
#if 0
// XXX: we need sharpness interface for patcharray or put sharpness
// into patchParam.
Far::ConstPatchParamArray patchParams =
farPatchTable->GetPatchParams(j);
for (int k = 0; k < patchParams.size(); ++k) {
float sharpness = 0.0;
ppBuffer.push_back(patchParams[k].faceIndex);
ppBuffer.push_back(patchParams[k].bitField.field);
ppBuffer.push_back(*((unsigned int *)&sharpness));
}
#else
// XXX: workaround. GetPatchParamTable() will be deprecated though.
Far::PatchParamTable const & patchParamTable =
farPatchTable->GetPatchParamTable();
std::vector<Far::Index> const &sharpnessIndexTable =
farPatchTable->GetSharpnessIndexTable();
int numPatches = farPatchTable->GetNumPatches(j);
for (int k = 0; k < numPatches; ++k) {
float sharpness = 0.0;
int patchIndex = (int)ppBuffer.size()/3;
if (patchIndex < (int)sharpnessIndexTable.size()) {
int sharpnessIndex = sharpnessIndexTable[patchIndex];
if (sharpnessIndex >= 0)
sharpness = farPatchTable->GetSharpnessValues()[sharpnessIndex];
}
ppBuffer.push_back(patchParamTable[patchIndex].faceIndex);
ppBuffer.push_back(patchParamTable[patchIndex].bitField.field);
ppBuffer.push_back(*((unsigned int *)&sharpness));
}
#endif
}
// copy patch array
_patchArrays.insert(_patchArrays.end(),
patchTable.GetPatchArrayBuffer(),
patchTable.GetPatchArrayBuffer() + numPatchArrays);
// copy index buffer
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _patchIndexBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER,
(int)buffer.size()*sizeof(int), &buffer[0], GL_STATIC_DRAW);
// patchParam is currently expected to be texture (it can be SSBO)
GLuint texBuffer = 0;
glGenBuffers(1, &texBuffer);
glBindBuffer(GL_ARRAY_BUFFER, texBuffer);
glBufferData(GL_ARRAY_BUFFER, ppBuffer.size()*sizeof(unsigned int),
&ppBuffer[0], GL_STATIC_DRAW);
glGenTextures(1, &_patchParamTexture);
glBindTexture(GL_TEXTURE_BUFFER, _patchParamTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGB32I, texBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
glDeleteBuffers(1, &texBuffer);
indexSize * sizeof(GLint),
patchTable.GetPatchIndexBuffer(),
GL_STATIC_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
// copy patchparam buffer
glBindBuffer(GL_ARRAY_BUFFER, _patchParamBuffer);
glBufferData(GL_ARRAY_BUFFER,
patchParamSize * sizeof(PatchParam),
patchTable.GetPatchParamBuffer(),
GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
// make both buffer as texture buffers too.
glGenTextures(1, &_patchIndexTexture);
glGenTextures(1, &_patchParamTexture);
GLuint buffer;
glGenBuffers(1, &buffer);
glBindBuffer(GL_ARRAY_BUFFER, buffer);
glBufferData(GL_ARRAY_BUFFER,
indexSize * sizeof(GLint),
patchTable.GetPatchIndexBuffer(),
GL_STATIC_DRAW);
glBindTexture(GL_TEXTURE_BUFFER, _patchIndexTexture);
// glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, _patchIndexBuffer);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, buffer);
glBindTexture(GL_TEXTURE_BUFFER, _patchParamTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGB32I, _patchParamBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
return true;
}

View File

@ -27,10 +27,9 @@
#include "../version.h"
#include <vector>
#include "../far/patchDescriptor.h"
#include "../osd/nonCopyable.h"
#include "../osd/opengl.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
@ -45,34 +44,6 @@ class GLPatchTable : private NonCopyable<GLPatchTable> {
public:
typedef GLuint VertexBufferBinding;
// XXX: this struct will be further refactored.
class PatchArray {
public:
PatchArray(Far::PatchDescriptor desc, int numPatches,
int indexBase, int primitiveIdBase) :
desc(desc), numPatches(numPatches), indexBase(indexBase),
primitiveIdBase(primitiveIdBase) {}
Far::PatchDescriptor const &GetDescriptor() const {
return desc;
}
int GetNumPatches() const {
return numPatches;
}
int GetIndexBase() const {
return indexBase;
}
int GetPrimitiveIdBase() const {
return primitiveIdBase;
}
private:
Far::PatchDescriptor desc;
int numPatches;
int indexBase; // an offset within the index buffer
int primitiveIdBase; // an offset within the patch param buffer
};
typedef std::vector<PatchArray> PatchArrayVector;
GLPatchTable();
~GLPatchTable();
static GLPatchTable *Create(Far::PatchTable const *farPatchTable,
@ -84,7 +55,17 @@ public:
/// Returns the GL index buffer containing the patch control vertices
GLuint GetPatchIndexBuffer() const {
return _indexBuffer;
return _patchIndexBuffer;
}
/// Returns the GL index buffer containing the patch parameter
GLuint GetPatchParamBuffer() const {
return _patchParamBuffer;
}
/// Returns the GL texture buffer containing the patch control vertices
GLuint GetPatchIndexTextureBuffer() const {
return _patchIndexTexture;
}
/// Returns the GL texture buffer containing the patch parameter
@ -93,11 +74,17 @@ public:
}
protected:
GLPatchTable();
// allocate buffers from patchTable
bool allocate(Far::PatchTable const *farPatchTable);
PatchArrayVector _patchArrays;
GLuint _indexBuffer;
GLuint _patchIndexBuffer;
GLuint _patchParamBuffer;
GLuint _patchIndexTexture;
GLuint _patchParamTexture;
};

View File

@ -108,37 +108,36 @@ GLStencilTableTBO::~GLStencilTableTBO() {
// ---------------------------------------------------------------------------
GLXFBEvaluator::GLXFBEvaluator() :
_program(0), _srcBufferTexture(0),
_uniformSrcBufferTexture(0), _uniformSizesTexture(0),
_uniformOffsetsTexture(0), _uniformIndicesTexture(0),
_uniformWeightsTexture(0), _uniformStart(0), _uniformEnd(0),
_uniformSrcOffset(0) {
GLXFBEvaluator::GLXFBEvaluator() : _srcBufferTexture(0) {
memset (&_stencilKernel, 0, sizeof(_stencilKernel));
memset (&_patchKernel, 0, sizeof(_patchKernel));
}
GLXFBEvaluator::~GLXFBEvaluator() {
if (_program) {
glDeleteProgram(_program);
if (_stencilKernel.program) {
glDeleteProgram(_stencilKernel.program);
}
if (_patchKernel.program) {
glDeleteProgram(_patchKernel.program);
}
if (_srcBufferTexture) {
glDeleteTextures(1, &_srcBufferTexture);
}
}
bool
GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc) {
if (_program) {
glDeleteProgram(_program);
_program = 0;
}
_program = glCreateProgram();
static GLuint
compileKernel(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc,
const char *kernelDefine) {
GLuint program = glCreateProgram();
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n";
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< kernelDefine << "\n";
std::string defineStr = defines.str();
const char *shaderSources[3] = {"#version 410\n", NULL, NULL};
@ -147,7 +146,7 @@ GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(_program, shader);
glAttachShader(program, shader);
std::vector<std::string> outputs;
std::vector<const char *> pOutputs;
@ -183,40 +182,83 @@ GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
}
}
glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(),
glTransformFeedbackVaryings(program, (GLsizei)outputs.size(),
&pOutputs[0], GL_INTERLEAVED_ATTRIBS);
GLint linked = 0;
glLinkProgram(_program);
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(shader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(_program, 1024, NULL, buffer);
glGetProgramInfoLog(program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(_program);
_program = 0;
return false;
glDeleteProgram(program);
program = 0;
}
glDeleteShader(shader);
// set uniform locations for compute kernels
_uniformSrcBufferTexture = glGetUniformLocation(_program, "vertexBuffer");
return program;
}
_uniformSizesTexture = glGetUniformLocation(_program, "sizes");
_uniformOffsetsTexture = glGetUniformLocation(_program, "offsets");
_uniformIndicesTexture = glGetUniformLocation(_program, "indices");
_uniformWeightsTexture = glGetUniformLocation(_program, "weights");
bool
GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc,
VertexBufferDescriptor const &dstDesc) {
_uniformStart = glGetUniformLocation(_program, "batchStart");
_uniformEnd = glGetUniformLocation(_program, "batchEnd");
// create stencil kernel
if (_stencilKernel.program) {
glDeleteProgram(_stencilKernel.program);
}
_stencilKernel.program = compileKernel(
srcDesc, dstDesc,
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS");
if (_stencilKernel.program == 0) return false;
_uniformSrcOffset = glGetUniformLocation(_program, "srcOffset");
// cache uniform locations
_stencilKernel.uniformSrcBufferTexture
= glGetUniformLocation(_stencilKernel.program, "vertexBuffer");
_stencilKernel.uniformSrcOffset
= glGetUniformLocation(_stencilKernel.program, "srcOffset");
_stencilKernel.uniformSizesTexture
= glGetUniformLocation(_stencilKernel.program, "sizes");
_stencilKernel.uniformOffsetsTexture
= glGetUniformLocation(_stencilKernel.program, "offsets");
_stencilKernel.uniformIndicesTexture
= glGetUniformLocation(_stencilKernel.program, "indices");
_stencilKernel.uniformWeightsTexture
= glGetUniformLocation(_stencilKernel.program, "weights");
_stencilKernel.uniformStart
= glGetUniformLocation(_stencilKernel.program, "batchStart");
_stencilKernel.uniformEnd
= glGetUniformLocation(_stencilKernel.program, "batchEnd");
// create patch kernel
if (_patchKernel.program) {
glDeleteProgram(_patchKernel.program);
}
_patchKernel.program = compileKernel(
srcDesc, dstDesc,
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES");
if (_patchKernel.program == 0) return false;
// cache uniform locations
_patchKernel.uniformSrcBufferTexture
= glGetUniformLocation(_patchKernel.program, "vertexBuffer");
_patchKernel.uniformSrcOffset
= glGetUniformLocation(_patchKernel.program, "srcOffset");
_patchKernel.uniformPatchArray
= glGetUniformLocation(_patchKernel.program, "patchArray");
_patchKernel.uniformPatchParamTexture
= glGetUniformLocation(_patchKernel.program, "patchParamBuffer");
_patchKernel.uniformPatchIndexTexture
= glGetUniformLocation(_patchKernel.program, "patchIndexBuffer");
// create a texture for input buffer
if (!_srcBufferTexture) {
@ -255,7 +297,7 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
GLuint weightsTexture,
int start,
int end) const {
if (!_program) return false;
if (!_stencilKernel.program) return false;
int count = end - start;
if (count <= 0) {
return true;
@ -268,25 +310,25 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
glBindVertexArray(vao);
glEnable(GL_RASTERIZER_DISCARD);
glUseProgram(_program);
glUseProgram(_stencilKernel.program);
// Set input VBO as a texture buffer.
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
bindTexture(_uniformSrcBufferTexture, _srcBufferTexture, 0);
bindTexture(_stencilKernel.uniformSrcBufferTexture, _srcBufferTexture, 0);
// bind stencil table textures.
bindTexture(_uniformSizesTexture, sizesTexture, 1);
bindTexture(_uniformOffsetsTexture, offsetsTexture, 2);
bindTexture(_uniformIndicesTexture, indicesTexture, 3);
bindTexture(_uniformWeightsTexture, weightsTexture, 4);
bindTexture(_stencilKernel.uniformSizesTexture, sizesTexture, 1);
bindTexture(_stencilKernel.uniformOffsetsTexture, offsetsTexture, 2);
bindTexture(_stencilKernel.uniformIndicesTexture, indicesTexture, 3);
bindTexture(_stencilKernel.uniformWeightsTexture, weightsTexture, 4);
// set batch range
glUniform1i(_uniformStart, start);
glUniform1i(_uniformEnd, end);
glUniform1i(_uniformSrcOffset, srcDesc.offset);
glUniform1i(_stencilKernel.uniformStart, start);
glUniform1i(_stencilKernel.uniformEnd, end);
glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
// The destination buffer is bound at vertex boundary.
//
@ -347,6 +389,94 @@ GLXFBEvaluator::EvalStencils(GLuint srcBuffer,
return true;
}
bool
GLXFBEvaluator::EvalPatches(
GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint duBuffer, VertexBufferDescriptor const & /*duDesc*/,
GLuint dvBuffer, VertexBufferDescriptor const & /*dvDesc*/,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexTexture,
GLuint patchParamTexture) const {
if (!_patchKernel.program) return false;
if (duBuffer != 0 || dvBuffer != 0) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"GLXFBEvaluator doesn't support derivative evaluation yet.\n");
}
// bind vertex array
// always create new one, to be safe with multiple contexts (slow though)
GLuint vao = 0;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glEnable(GL_RASTERIZER_DISCARD);
glUseProgram(_patchKernel.program);
// Set input VBO as a texture buffer.
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
bindTexture(_patchKernel.uniformSrcBufferTexture, _srcBufferTexture, 0);
// bind patch index and patch param textures.
bindTexture(_patchKernel.uniformPatchParamTexture, patchParamTexture, 1);
bindTexture(_patchKernel.uniformPatchIndexTexture, patchIndexTexture, 2);
// set other uniforms
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
(const GLint*)&patchArrays[0]);
glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
// input patchcoords
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
int stride = sizeof(int) * 5; // patchcoord = int*5 struct
glBindBuffer(GL_ARRAY_BUFFER, patchCoordsBuffer);
glVertexAttribIPointer(0, 3, GL_UNSIGNED_INT, stride, (void*)0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, stride, (void*)(sizeof(int)*3));
int dstBufferBindOffset =
dstDesc.offset - (dstDesc.offset % dstDesc.stride);
// bind destination buffer
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
0, dstBuffer,
dstBufferBindOffset * sizeof(float),
numPatchCoords * dstDesc.stride * sizeof(float));
glBeginTransformFeedback(GL_POINTS);
glDrawArrays(GL_POINTS, 0, numPatchCoords);
glEndTransformFeedback();
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
// unbind textures
for (int i = 0; i < 3; ++i) {
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
glDisable(GL_RASTERIZER_DISCARD);
glUseProgram(0);
glActiveTexture(GL_TEXTURE0);
glDisableVertexAttribArray(0);
glDisableVertexAttribArray(1);
// revert vao
glBindVertexArray(0);
glDeleteVertexArrays(1, &vao);
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -28,6 +28,7 @@
#include "../version.h"
#include "../osd/opengl.h"
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
@ -93,6 +94,12 @@ public:
/// Destructor. note that the GL context must be made current.
~GLXFBEvaluator();
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static stencil function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
@ -120,25 +127,25 @@ public:
///
/// @param deviceContext not used in the GLSLTransformFeedback kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GLXFBEvaluator const *instance,
void * deviceContext = NULL) {
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
GLXFBEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
return instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalStencils(srcVertexBuffer, srcDesc,
dstVertexBuffer, dstDesc,
bool r = instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable);
delete instance;
return r;
@ -147,18 +154,30 @@ public:
}
}
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcVertexBuffer->BindVBO(),
srcDesc,
dstVertexBuffer->BindVBO(),
dstDesc,
/// \brief dispatch eval stencils function.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a GL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a GL
/// buffer object for destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable) const {
return EvalStencils(srcBuffer->BindVBO(), srcDesc,
dstBuffer->BindVBO(), dstDesc,
stencilTable->GetSizesTexture(),
stencilTable->GetOffsetsTexture(),
stencilTable->GetIndicesTexture(),
@ -167,12 +186,31 @@ public:
/* end = */ stencilTable->GetNumStencils());
}
/// Dispatch the GLSL compute kernel on GPU asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(GLuint srcBuffer,
VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer,
VertexBufferDescriptor const &dstDesc,
/// \brief Static eval stencils function, dispatch the GLSL XFB kernel on
/// on GPU asynchronously.
///
/// @param srcBuffer GL buffer of input primvars.
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer GL buffer of output primvars.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param sizesBuffer GL buffer of the sizes in the stencil table
///
/// @param offsetsBuffer GL buffer of the offsets in the stencil table
///
/// @param indicesBuffer GL buffer of the indices in the stencil table
///
/// @param weightsBuffer GL buffer of the weifgrs in the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
bool EvalStencils(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint sizesBuffer,
GLuint offsetsBuffer,
GLuint indicesBuffer,
@ -180,6 +218,270 @@ public:
int start,
int end) const;
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
///
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a GL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a GL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GLXFBEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindVBO() method returning a GL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindVBO() method returning a GL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer
///
/// @param duDesc
///
/// @param dvBuffer
///
/// @param dvDesc
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindVBO() method returning an
/// array of PatchCoord struct in VBO.
///
/// @param patchTable GLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext not used in the GLXFB evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
GLXFBEvaluator const *instance,
void * deviceContext = NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCudaBuffer() method returning an
/// array of PatchCoord struct in cuda memory.
///
/// @param patchTable GLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
dstBuffer->BindVBO(), dstDesc,
0, VertexBufferDescriptor(),
0, VertexBufferDescriptor(),
numPatchCoords,
patchCoords->BindVBO(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexTextureBuffer(),
patchTable->GetPatchParamTextureBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output s-derivatives buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output t-derivatives buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable GLPatchTable or equivalent
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable) const {
return EvalPatches(srcBuffer->BindVBO(), srcDesc,
dstBuffer->BindVBO(), dstDesc,
duBuffer->BindVBO(), duDesc,
dvBuffer->BindVBO(), dvDesc,
numPatchCoords,
patchCoords->BindVBO(),
patchTable->GetPatchArrays(),
patchTable->GetPatchIndexTextureBuffer(),
patchTable->GetPatchParamTextureBuffer());
}
bool EvalPatches(GLuint srcBuffer, VertexBufferDescriptor const &srcDesc,
GLuint dstBuffer, VertexBufferDescriptor const &dstDesc,
GLuint duBuffer, VertexBufferDescriptor const &duDesc,
GLuint dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexBuffer,
GLuint patchParamsBuffer) const;
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// Configure GLSL kernel. A valid GL context must be made current before
/// calling this function. Returns false if it fails to compile the kernel.
bool Compile(VertexBufferDescriptor const &srcDesc,
@ -189,19 +491,31 @@ public:
static void Synchronize(void *kernel);
private:
GLuint _program;
GLuint _srcBufferTexture;
GLuint _uniformSrcBufferTexture;
GLuint _uniformSizesTexture;
GLuint _uniformOffsetsTexture;
GLuint _uniformIndicesTexture;
GLuint _uniformWeightsTexture;
struct _StencilKernel {
GLuint program;
GLuint uniformSrcBufferTexture;
GLuint uniformSrcOffset; // src buffer offset (in elements)
GLuint uniformSizesTexture;
GLuint uniformOffsetsTexture;
GLuint uniformIndicesTexture;
GLuint uniformWeightsTexture;
GLuint uniformStart; // range
GLuint uniformEnd;
} _stencilKernel;
struct _PatchKernel {
GLuint program;
GLuint uniformSrcBufferTexture;
GLuint uniformSrcOffset; // src buffer offset (in elements)
GLuint uniformPatchArray;
GLuint uniformPatchParamTexture;
GLuint uniformPatchIndexTexture;
} _patchKernel;
GLuint _uniformStart; // range
GLuint _uniformEnd;
GLuint _uniformSrcOffset; // src buffer offset (in elements)
};
} // end namespace Osd

View File

@ -24,18 +24,10 @@
//------------------------------------------------------------------------------
uniform int batchStart = 0;
uniform int batchEnd = 0;
uniform int srcOffset = 0;
uniform int dstOffset = 0;
layout(binding=0) buffer src_buffer { float srcVertexBuffer[]; };
layout(binding=1) buffer dst_buffer { float dstVertexBuffer[]; };
layout(binding=2) buffer stencilSizes { int _sizes[]; };
layout(binding=3) buffer stencilOffsets { int _offsets[]; };
layout(binding=4) buffer stencilIndices { int _indices[]; };
layout(binding=5) buffer stencilWeights { float _weights[]; };
layout(local_size_x=WORK_GROUP_SIZE, local_size_y=1, local_size_z=1) in;
//------------------------------------------------------------------------------
@ -73,6 +65,15 @@ void addWithWeight(inout Vertex v, const Vertex src, float weight) {
}
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
uniform int batchStart = 0;
uniform int batchEnd = 0;
layout(binding=2) buffer stencilSizes { int _sizes[]; };
layout(binding=3) buffer stencilOffsets { int _offsets[]; };
layout(binding=4) buffer stencilIndices { int _indices[]; };
layout(binding=5) buffer stencilWeights { float _weights[]; };
void main() {
int current = int(gl_GlobalInvocationID.x) + batchStart;
@ -94,4 +95,194 @@ void main() {
writeVertex(current, dst);
}
#endif
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES)
// PERFORMANCE: stride could be constant, but not as significant as length
//struct PatchArray {
// int patchType;
// int numPatches;
// int indexBase; // an offset within the index buffer
// int primitiveIdBase; // an offset within the patch param buffer
//};
// # of patcharrays is 1 or 2.
uniform ivec4 patchArray[2];
uniform ivec3 dstDuDesc;
uniform ivec3 dstDvDesc;
layout(binding=2) buffer du_buffer { float dstDuBuffer[]; };
layout(binding=3) buffer dv_buffer { float dstDvBuffer[]; };
struct PatchCoord {
int arrayIndex;
int patchIndex;
int vertIndex;
float s;
float t;
};
struct PatchParam {
int faceIndex;
uint patchBits;
float sharpness;
};
layout(binding=4) buffer patchCoord_buffer { PatchCoord patchCoords[]; };
layout(binding=5) buffer patchIndex_buffer { int patchIndexBuffer[]; };
layout(binding=6) buffer patchParam_buffer { PatchParam patchParamBuffer[]; };
void writeDu(int index, Vertex du) {
int duIndex = dstDuDesc.x + index * dstDuDesc.z;
for (int i = 0; i < LENGTH; ++i) {
dstDuBuffer[duIndex + i] = du.vertexData[i];
}
}
void writeDv(int index, Vertex dv) {
int dvIndex = dstDvDesc.x + index * dstDvDesc.z;
for (int i = 0; i < LENGTH; ++i) {
dstDvBuffer[dvIndex + i] = dv.vertexData[i];
}
}
void getBSplineWeights(float t, inout vec4 point, inout vec4 deriv) {
// The four uniform cubic B-Spline basis functions evaluated at t:
float one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
point.x = one6th * (1.0f - 3.0f*(t - t2) - t3);
point.y = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
point.z = one6th * (1.0f + 3.0f*(t + t2 - t3));
point.w = one6th * ( t3);
// Derivatives of the above four basis functions at t:
deriv.x = -0.5f*t2 + t - 0.5f;
deriv.y = 1.5f*t2 - 2.0f*t;
deriv.z = -1.5f*t2 + t + 0.5f;
deriv.w = 0.5f*t2;
}
uint getDepth(uint patchBits) {
return (patchBits & 0x7);
}
float getParamFraction(uint patchBits) {
uint nonQuadRoot = (patchBits >> 3) & 0x1;
uint depth = getDepth(patchBits);
if (nonQuadRoot == 1) {
return 1.0f / float( 1 << (depth-1) );
} else {
return 1.0f / float( 1 << depth );
}
}
vec2 normalizePatchCoord(uint patchBits, vec2 uv) {
float frac = getParamFraction(patchBits);
uint iu = (patchBits >> 22) & 0x3ff;
uint iv = (patchBits >> 12) & 0x3ff;
// top left corner
float pu = float(iu*frac);
float pv = float(iv*frac);
// normalize u,v coordinates
return vec2((uv.x - pu) / frac, (uv.y - pv) / frac);
}
void adjustBoundaryWeights(uint bits, inout vec4 sWeights, inout vec4 tWeights) {
uint boundary = ((bits >> 4) & 0xf);
if ((boundary & 1) != 0) {
tWeights[2] -= tWeights[0];
tWeights[1] += 2*tWeights[0];
tWeights[0] = 0;
}
if ((boundary & 2) != 0) {
sWeights[1] -= sWeights[3];
sWeights[2] += 2*sWeights[3];
sWeights[3] = 0;
}
if ((boundary & 4) != 0) {
tWeights[1] -= tWeights[3];
tWeights[2] += 2*tWeights[3];
tWeights[3] = 0;
}
if ((boundary & 8) != 0) {
sWeights[2] -= sWeights[0];
sWeights[1] += 2*sWeights[0];
sWeights[0] = 0;
}
}
void main() {
int current = int(gl_GlobalInvocationID.x);
PatchCoord coord = patchCoords[current];
int patchIndex = coord.patchIndex;
ivec4 array = patchArray[coord.arrayIndex];
int patchType = 6; // array.x XXX: REGULAR only for now.
int numControlVertices = 16;
uint patchBits = patchParamBuffer[patchIndex].patchBits;
vec2 uv = normalizePatchCoord(patchBits, vec2(coord.s, coord.t));
float dScale = float(1 << getDepth(patchBits));
float wP[20], wDs[20], wDt[20];
if (patchType == 6) { // REGULAR
vec4 sWeights, tWeights, dsWeights, dtWeights;
getBSplineWeights(uv.x, sWeights, dsWeights);
getBSplineWeights(uv.y, tWeights, dtWeights);
adjustBoundaryWeights(patchBits, sWeights, tWeights);
adjustBoundaryWeights(patchBits, dsWeights, dtWeights);
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 4; ++l) {
wP[4*k+l] = sWeights[l] * tWeights[k];
wDs[4*k+l] = dsWeights[l] * tWeights[k] * dScale;
wDt[4*k+l] = sWeights[l] * dtWeights[k] * dScale;
}
}
} else {
// TODO: GREGORY BASIS
}
Vertex dst;
clear(dst);
int indexBase = array.z + coord.vertIndex;
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(dst, readVertex(index), wP[i]);
}
writeVertex(current, dst);
if (dstDuDesc.y > 0) { // length
Vertex du;
clear(du);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(du, readVertex(index), wDs[i]);
}
writeDu(current, du);
}
if (dstDvDesc.y > 0) {
Vertex dv;
clear(dv);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(dv, readVertex(index), wDt[i]);
}
writeDv(current, dv);
}
}
#endif

View File

@ -25,18 +25,8 @@
//------------------------------------------------------------------------------
uniform samplerBuffer vertexBuffer;
out float outVertexBuffer[LENGTH];
uniform usamplerBuffer sizes;
uniform isamplerBuffer offsets;
uniform isamplerBuffer indices;
uniform samplerBuffer weights;
uniform int batchStart = 0;
uniform int batchEnd = 0;
uniform int srcOffset = 0;
out float outVertexBuffer[LENGTH];
//------------------------------------------------------------------------------
@ -72,6 +62,16 @@ void writeVertex(Vertex v) {
}
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS)
uniform usamplerBuffer sizes;
uniform isamplerBuffer offsets;
uniform isamplerBuffer indices;
uniform samplerBuffer weights;
uniform int batchStart = 0;
uniform int batchEnd = 0;
void main() {
int current = gl_VertexID + batchStart;
@ -97,4 +97,146 @@ void main() {
writeVertex(dst);
}
#endif
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES)
layout (location = 0) in ivec3 patchHandles;
layout (location = 1) in vec2 patchCoords;
//struct PatchArray {
// int patchType;
// int numPatches;
// int indexBase; // an offset within the index buffer
// int primitiveIdBase; // an offset within the patch param buffer
//};
// # of patcharrays is 1 or 2.
uniform ivec4 patchArray[2];
uniform isamplerBuffer patchParamBuffer;
uniform isamplerBuffer patchIndexBuffer;
void getBSplineWeights(float t, inout vec4 point, vec4 deriv) {
// The four uniform cubic B-Spline basis functions evaluated at t:
float one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
point.x = one6th * (1.0f - 3.0f*(t - t2) - t3);
point.y = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
point.z = one6th * (1.0f + 3.0f*(t + t2 - t3));
point.w = one6th * ( t3);
// Derivatives of the above four basis functions at t:
/* if (deriv) { */
/* deriv[0] = -0.5f*t2 + t - 0.5f; */
/* deriv[1] = 1.5f*t2 - 2.0f*t; */
/* deriv[2] = -1.5f*t2 + t + 0.5f; */
/* deriv[3] = 0.5f*t2; */
/* } */
}
uint getDepth(uint patchBits) {
return (patchBits & 0x7);
}
float getParamFraction(uint patchBits) {
uint nonQuadRoot = (patchBits >> 3) & 0x1;
uint depth = getDepth(patchBits);
if (nonQuadRoot == 1) {
return 1.0f / float( 1 << (depth-1) );
} else {
return 1.0f / float( 1 << depth );
}
}
vec2 normalizePatchCoord(uint patchBits, vec2 uv) {
float frac = getParamFraction(patchBits);
uint iu = (patchBits >> 22) & 0x3ff;
uint iv = (patchBits >> 12) & 0x3ff;
// top left corner
float pu = float(iu*frac);
float pv = float(iv*frac);
// normalize u,v coordinates
return vec2((uv.x - pu) / frac, (uv.y - pv) / frac);
}
void adjustBoundaryWeights(uint bits, inout vec4 sWeights, inout vec4 tWeights) {
uint boundary = ((bits >> 4) & 0xf);
if ((boundary & 1) != 0) {
tWeights[2] -= tWeights[0];
tWeights[1] += 2*tWeights[0];
tWeights[0] = 0;
}
if ((boundary & 2) != 0) {
sWeights[1] -= sWeights[3];
sWeights[2] += 2*sWeights[3];
sWeights[3] = 0;
}
if ((boundary & 4) != 0) {
tWeights[1] -= tWeights[3];
tWeights[2] += 2*tWeights[3];
tWeights[3] = 0;
}
if ((boundary & 8) != 0) {
sWeights[2] -= sWeights[0];
sWeights[1] += 2*sWeights[0];
sWeights[0] = 0;
}
}
void main() {
int current = gl_VertexID;
ivec3 handle = patchHandles;
int patchIndex = handle.y;
vec2 coord = patchCoords;
ivec4 array = patchArray[handle.x];
int patchType = array.x;
int numControlVertices = 16;
uint patchBits = texelFetch(patchParamBuffer, patchIndex).y;
// normalize
coord = normalizePatchCoord(patchBits, coord);
// XXX: dScale for derivative
// if regular
float wP[20];
{
vec4 sWeights, tWeights, dsWeights, dtWeights;
getBSplineWeights(coord.s, sWeights, dsWeights);
getBSplineWeights(coord.t, tWeights, dtWeights);
adjustBoundaryWeights(patchBits, sWeights, tWeights);
for (int k = 0; k < 4; ++k) {
for (int l = 0; l < 4; ++l) {
wP[4*k+l] = sWeights[l] * tWeights[k];
}
}
}
Vertex dst;
clear(dst);
int indexBase = array.z + handle.z;
for (int i = 0; i < numControlVertices; ++i) {
int index = texelFetch(patchIndexBuffer, indexBase + i).x;
addWithWeight(dst, readVertex(index), wP[i]);
}
writeVertex(dst);
}
#endif

View File

@ -171,7 +171,12 @@ public:
for(typename Evaluators::iterator it = _evaluators.begin();
it != _evaluators.end(); ++it) {
if (it->srcDesc.length == srcDesc.length and
// Note: XFB kernel needs to be configured with the local offset
// of the dstDesc to skip preceding primvars.
int dstOffset1 = it->dstDesc.offset % it->dstDesc.stride;
int dstOffset2 = dstDesc.offset % dstDesc.stride;
if (dstOffset1 == dstOffset2 and
it->srcDesc.length == srcDesc.length and
it->srcDesc.stride == srcDesc.stride and
it->dstDesc.length == dstDesc.length and
it->dstDesc.stride == dstDesc.stride) {

View File

@ -24,6 +24,7 @@
#include "../osd/ompEvaluator.h"
#include "../osd/ompKernel.h"
#include "../far/patchBasis.h"
#include <omp.h>
namespace OpenSubdiv {
@ -33,24 +34,212 @@ namespace Osd {
/* static */
bool
OmpEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
if (end <= start) return true;
OmpEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
// we can probably expand cpuKernel.cpp to here.
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
// XXX: we can probably expand cpuKernel.cpp to here.
OmpEvalStencils(src, srcDesc, dst, dstDesc,
sizes, offsets, indices, weights, start, end);
return true;
}
/* static */
bool
OmpEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
if (srcDesc.length != duDesc.length) return false;
if (srcDesc.length != dvDesc.length) return false;
OmpEvalStencils(src, srcDesc,
dst, dstDesc,
du, duDesc,
dv, dvDesc,
sizes, offsets, indices,
weights, duWeights, dvWeights,
start, end);
return true;
}
template <typename T>
struct BufferAdapter {
BufferAdapter(T *p, int length, int stride) :
_p(p), _length(length), _stride(stride) { }
void Clear() {
for (int i = 0; i < _length; ++i) _p[i] = 0;
}
void AddWithWeight(T const *src, float w) {
if (_p) {
// TODO: derivatives.
for (int i = 0; i < _length; ++i) {
_p[i] += src[i] * w;
}
}
}
const T *operator[] (int index) const {
return _p + _stride * index;
}
BufferAdapter<T> & operator ++() {
if (_p) {
_p += _stride;
}
return *this;
}
T *_p;
int _length;
int _stride;
};
/* static */
bool
OmpEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer){
src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
else return false;
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
#pragma omp parallel for
for (int i = 0; i < numPatchCoords; ++i) {
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
float wP[20], wDs[20], wDt[20];
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
int patchType = array.GetPatchType();
// XXX: patchIndex is absolute. not sure it's consistent.
// (should be offsetted by array.primitiveIdBase?)
// patchParamBuffer[array.primitiveIdBase + coord.handle.patchIndex]
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&patchParamBuffer[coord.handle.patchIndex].patchBits;
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
continue;
}
const int *cvs =
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
}
}
return true;
}
/* static */
bool
OmpEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer) {
src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
#pragma omp parallel for
for (int i = 0; i < numPatchCoords; ++i) {
float wP[20], wDs[20], wDt[20];
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
BufferAdapter<float> duT(du + duDesc.stride*i, duDesc.length, duDesc.stride);
BufferAdapter<float> dvT(dv + dvDesc.stride*i, dvDesc.length, dvDesc.stride);
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
int patchType = array.GetPatchType();
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&patchParamBuffer[coord.handle.patchIndex].patchBits;
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
continue;
}
const int *cvs =
&patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
duT.Clear();
dvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
duT.AddWithWeight(srcT[cvs[j]], wDs[j]);
dvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
}
++dstT;
++duT;
++dvT;
}
return true;
}
/* static */
void
OmpEvaluator::Synchronize(void * /*deviceContext*/) {

View File

@ -28,7 +28,7 @@
#include "../version.h"
#include <cstddef>
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
namespace OpenSubdiv {
@ -38,9 +38,15 @@ namespace Osd {
class OmpEvaluator {
public:
/// \brief Generic static compute function. This function has a same
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static eval stencils function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
/// in the same way from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
@ -54,29 +60,27 @@ public:
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the omp kernel
/// @param instance not used in the omp kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the omp kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
OmpEvaluator const * instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused;
(void)deviceContext; // unused;
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const OmpEvaluator *instance = NULL,
void * deviceContext = NULL) {
return EvalStencils(srcVertexBuffer->BindCpuBuffer(),
srcDesc,
dstVertexBuffer->BindCpuBuffer(),
dstDesc,
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
@ -85,17 +89,376 @@ public:
/*end = */ stencilTable->GetNumStencils());
}
/// stencil compute function.
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
/// \brief Static eval stencils function which takes raw CPU pointers for
/// input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
/// to apply for the range [start, end)
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dvBuffer Output V-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the omp kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the omp kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
const OmpEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw CPU pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the omp evaluator
///
/// @param deviceContext not used in the omp evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
OmpEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output V-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the omp evaluator
///
/// @param deviceContext not used in the omp evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
OmpEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
// ideally all buffer classes should have templated by datatype
// so that downcast isn't needed there.
// (e.g. Osd::CpuBuffer<PatchCoord> )
//
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the du buffer
///
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dv buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
static void Synchronize(void *deviceContext = NULL);

View File

@ -117,10 +117,82 @@ OmpEvalStencils(float const * src,
copy(dst, i, threadResult, dstDesc);
}
}
void
OmpEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
float * dstDu,
VertexBufferDescriptor const &dstDuDesc,
float * dstDv,
VertexBufferDescriptor const &dstDvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end) {
if (start > 0) {
sizes += start;
indices += offsets[start];
weights += offsets[start];
duWeights += offsets[start];
dvWeights += offsets[start];
}
src += srcDesc.offset;
dst += dstDesc.offset;
dstDu += dstDuDesc.offset;
dstDv += dstDvDesc.offset;
int numThreads = omp_get_max_threads();
int n = end - start;
float * result = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
float * resultDu = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
float * resultDv = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
#pragma omp parallel for
for (int i = 0; i < n; ++i) {
int index = i + (start > 0 ? start : 0); // Stencil index
// Get thread-local pointers
int const * threadIndices = indices + offsets[index];
float const * threadWeights = weights + offsets[index];
float const * threadWeightsDu = duWeights + offsets[index];
float const * threadWeightsDv = dvWeights + offsets[index];
int threadId = omp_get_thread_num();
float * threadResult = result + threadId*srcDesc.length;
float * threadResultDu = resultDu + threadId*srcDesc.length;
float * threadResultDv = resultDv + threadId*srcDesc.length;
clear(threadResult, dstDesc);
clear(threadResultDu, dstDuDesc);
clear(threadResultDv, dstDvDesc);
for (int j=0; j<(int)sizes[index]; ++j) {
addWithWeight(threadResult, src,
threadIndices[j], threadWeights[j], srcDesc);
addWithWeight(threadResultDu, src,
threadIndices[j], threadWeightsDu[j], srcDesc);
addWithWeight(threadResultDv, src,
threadIndices[j], threadWeightsDv[j], srcDesc);
}
copy(dst, i, threadResult, dstDesc);
copy(dstDu, i, threadResultDu, dstDuDesc);
copy(dstDv, i, threadResultDv, dstDvDesc);
}
}
} // end namespace Osd
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -45,6 +45,23 @@ OmpEvalStencils(float const * src,
float const * weights,
int start, int end);
void
OmpEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
float * dstDu,
VertexBufferDescriptor const &dstDuDesc,
float * dstDv,
VertexBufferDescriptor const &dstDvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end);
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -34,15 +34,15 @@ namespace Osd {
/* static */
bool
TbbEvaluator::EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
TbbEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
if (end <= start) return true;
TbbEvalStencils(src, srcDesc, dst, dstDesc,
@ -51,6 +51,82 @@ TbbEvaluator::EvalStencils(const float *src,
return true;
}
/* static */
bool
TbbEvaluator::EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
if (srcDesc.length != duDesc.length) return false;
if (srcDesc.length != dvDesc.length) return false;
TbbEvalStencils(src, srcDesc,
dst, dstDesc,
du, duDesc,
dv, dvDesc,
sizes, offsets, indices,
weights, duWeights, dvWeights,
start, end);
return true;
}
/* static */
bool
TbbEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
if (srcDesc.length != dstDesc.length) return false;
TbbEvalPatches(src, srcDesc, dst, dstDesc,
NULL, VertexBufferDescriptor(),
NULL, VertexBufferDescriptor(),
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
return true;
}
/* static */
bool
TbbEvaluator::EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
if (srcDesc.length != dstDesc.length) return false;
TbbEvalPatches(src, srcDesc, dst, dstDesc,
du, duDesc, dv, dvDesc,
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
return true;
}
/* static */
void
TbbEvaluator::Synchronize(void *) {

View File

@ -26,7 +26,9 @@
#define OPENSUBDIV3_OSD_TBB_EVALUATOR_H
#include "../version.h"
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
#include "../far/patchTable.h"
#include <cstddef>
@ -37,9 +39,15 @@ namespace Osd {
class TbbEvaluator {
public:
/// \brief Generic static stencil eval function. This function has a same
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static eval stencils function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
/// in the same way from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
@ -55,27 +63,25 @@ public:
///
/// @param stencilTable stencil table to be applied.
///
/// @param instance not used in the tbb kernel
/// @param instance not used in the tbb kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the tbb kernel
///
template <typename VERTEX_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer,
VertexBufferDescriptor const &srcDesc,
VERTEX_BUFFER *dstVertexBuffer,
VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
TbbEvaluator const *instance = NULL,
void *deviceContext = NULL) {
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
TbbEvaluator const *instance = NULL,
void *deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcVertexBuffer->BindCpuBuffer(),
srcDesc,
dstVertexBuffer->BindCpuBuffer(),
dstDesc,
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
@ -84,19 +90,381 @@ public:
/*end = */ stencilTable->GetNumStencils());
}
static bool EvalStencils(const float *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
const int *sizes,
const int *offsets,
const int *indices,
const float *weights,
int start,
int end);
/// \brief Static eval stencils function which takes raw CPU pointers for
/// input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
/// to apply for the range [start, end)
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
const int *sizes,
const int *offsets,
const int *indices,
const float *weights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dvBuffer Output V-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied.
///
/// @param instance not used in the tbb kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the tbb kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
const TbbEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw CPU pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output s-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dv Output t-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
/// to apply for the range [start, end)
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the u-weights buffer of the stencil table
///
/// @param dvWeights pointer to the v-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
TbbEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(),
srcDesc,
dstBuffer->BindCpuBuffer(),
dstDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output s-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output t-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable Far::PatchTable
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, VertexBufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, VertexBufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, VertexBufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
TbbEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(
srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output s-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the du buffer
///
/// @param dv Output t-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dv buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, VertexBufferDescriptor const &srcDesc,
float *dst, VertexBufferDescriptor const &dstDesc,
float *du, VertexBufferDescriptor const &duDesc,
float *dv, VertexBufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// \brief synchronize all asynchronous computation invoked on this device.
static void Synchronize(void *deviceContext = NULL);
/// \brief initialize tbb task schedular
/// (optional: client may use tbb::task_scheduler_init)
///
/// @param numThreads how many threads
///
static void SetNumThreads(int numThreads);
};

View File

@ -24,7 +24,9 @@
#include "../osd/cpuKernel.h"
#include "../osd/tbbKernel.h"
#include "../osd/types.h"
#include "../osd/vertexDescriptor.h"
#include "../far/patchBasis.h"
#include <cassert>
#include <cstdlib>
@ -187,6 +189,274 @@ TbbEvalStencils(float const * src,
tbb::parallel_for(range, kernel);
}
void
TbbEvalStencils(float const * src, VertexBufferDescriptor const &srcDesc,
float * dst, VertexBufferDescriptor const &dstDesc,
float * du, VertexBufferDescriptor const &duDesc,
float * dv, VertexBufferDescriptor const &dvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end) {
if (start > 0) {
sizes += start;
indices += offsets[start];
weights += offsets[start];
duWeights += offsets[start];
dvWeights += offsets[start];
}
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
// PERFORMANCE: need to combine 3 launches together
if (dst) {
TBBStencilKernel kernel(src, srcDesc, dst, dstDesc,
sizes, offsets, indices, weights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
if (du) {
TBBStencilKernel kernel(src, srcDesc, du, duDesc,
sizes, offsets, indices, duWeights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
if (dv) {
TBBStencilKernel kernel(src, srcDesc, dv, dvDesc,
sizes, offsets, indices, dvWeights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
}
// ---------------------------------------------------------------------------
template <typename T>
struct BufferAdapter {
BufferAdapter(T *p, int length, int stride) :
_p(p), _length(length), _stride(stride) { }
void Clear() {
for (int i = 0; i < _length; ++i) _p[i] = 0;
}
void AddWithWeight(T const *src, float w) {
if (_p) {
for (int i = 0; i < _length; ++i) {
_p[i] += src[i] * w;
}
}
}
const T *operator[] (int index) const {
return _p + _stride * index;
}
BufferAdapter<T> & operator ++() {
if (_p) {
_p += _stride;
}
return *this;
}
T *_p;
int _length;
int _stride;
};
class TbbEvalPatchesKernel {
VertexBufferDescriptor _srcDesc;
VertexBufferDescriptor _dstDesc;
VertexBufferDescriptor _dstDuDesc;
VertexBufferDescriptor _dstDvDesc;
float const * _src;
float * _dst;
float * _dstDu;
float * _dstDv;
int _numPatchCoords;
const PatchCoord *_patchCoords;
const PatchArray *_patchArrayBuffer;
const int *_patchIndexBuffer;
const PatchParam *_patchParamBuffer;
public:
TbbEvalPatchesKernel(float const *src,
VertexBufferDescriptor srcDesc,
float *dst,
VertexBufferDescriptor dstDesc,
float *dstDu,
VertexBufferDescriptor dstDuDesc,
float *dstDv,
VertexBufferDescriptor dstDvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) :
_srcDesc(srcDesc), _dstDesc(dstDesc),
_dstDuDesc(dstDuDesc), _dstDvDesc(dstDvDesc),
_src(src), _dst(dst), _dstDu(dstDu), _dstDv(dstDv),
_numPatchCoords(numPatchCoords),
_patchCoords(patchCoords),
_patchArrayBuffer(patchArrayBuffer),
_patchIndexBuffer(patchIndexBuffer),
_patchParamBuffer(patchParamBuffer) {
}
void operator() (tbb::blocked_range<int> const &r) const {
if (_dstDu == NULL && _dstDv == NULL) {
compute(r);
} else {
computeWithDerivative(r);
}
}
void compute(tbb::blocked_range<int> const &r) const {
float wP[20], wDs[20], wDt[20];
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
_srcDesc.length,
_srcDesc.stride);
BufferAdapter<float> dstT(_dst + _dstDesc.offset
+ r.begin() * _dstDesc.stride,
_dstDesc.length,
_dstDesc.stride);
BufferAdapter<float> dstDuT(_dstDu,
_dstDuDesc.length,
_dstDuDesc.stride);
BufferAdapter<float> dstDvT(_dstDv,
_dstDvDesc.length,
_dstDvDesc.stride);
for (int i = r.begin(); i < r.end(); ++i) {
PatchCoord const &coord = _patchCoords[i];
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
int patchType = array.GetPatchType();
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&_patchParamBuffer[coord.handle.patchIndex].patchBits;
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
assert(0);
}
const int *cvs =
&_patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
}
++dstT;
}
}
void computeWithDerivative(tbb::blocked_range<int> const &r) const {
float wP[20], wDs[20], wDt[20];
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
_srcDesc.length,
_srcDesc.stride);
BufferAdapter<float> dstT(_dst + _dstDesc.offset
+ r.begin() * _dstDesc.stride,
_dstDesc.length,
_dstDesc.stride);
BufferAdapter<float> dstDuT(_dstDu + _dstDuDesc.offset
+ r.begin() * _dstDuDesc.stride,
_dstDuDesc.length,
_dstDuDesc.stride);
BufferAdapter<float> dstDvT(_dstDv + _dstDvDesc.offset
+ r.begin() * _dstDvDesc.stride,
_dstDvDesc.length,
_dstDvDesc.stride);
for (int i = r.begin(); i < r.end(); ++i) {
PatchCoord const &coord = _patchCoords[i];
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
int patchType = array.GetPatchType();
Far::PatchParam::BitField patchBits = *(Far::PatchParam::BitField*)
&_patchParamBuffer[coord.handle.patchIndex].patchBits;
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(patchBits,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
assert(0);
}
const int *cvs =
&_patchIndexBuffer[array.indexBase + coord.handle.vertIndex];
dstT.Clear();
dstDuT.Clear();
dstDvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
dstDuT.AddWithWeight(srcT[cvs[j]], wDs[j]);
dstDvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
}
++dstT;
++dstDuT;
++dstDvT;
}
}
};
void
TbbEvalPatches(float const *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDu,
VertexBufferDescriptor const &dstDuDesc,
float *dstDv,
VertexBufferDescriptor const &dstDvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
TbbEvalPatchesKernel kernel(src, srcDesc, dst, dstDesc,
dstDu, dstDuDesc, dstDv, dstDvDesc,
numPatchCoords, patchCoords,
patchArrayBuffer,
patchIndexBuffer,
patchParamBuffer);
tbb::blocked_range<int> range(0, numPatchCoords, grain_size);
tbb::parallel_for(range, kernel);
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -32,6 +32,9 @@ namespace OPENSUBDIV_VERSION {
namespace Osd {
struct PatchArray;
struct PatchCoord;
struct PatchParam;
struct VertexBufferDescriptor;
void
@ -45,6 +48,38 @@ TbbEvalStencils(float const * src,
float const * weights,
int start, int end);
void
TbbEvalStencils(float const * src,
VertexBufferDescriptor const &srcDesc,
float * dst,
VertexBufferDescriptor const &dstDesc,
float * dstDu,
VertexBufferDescriptor const &dstDuDesc,
float * dstDv,
VertexBufferDescriptor const &dstDvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
int start, int end);
void
TbbEvalPatches(float const *src,
VertexBufferDescriptor const &srcDesc,
float *dst,
VertexBufferDescriptor const &dstDesc,
float *dstDu,
VertexBufferDescriptor const &dstDuDesc,
float *dstDv,
VertexBufferDescriptor const &dstDvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

111
opensubdiv/osd/types.h Normal file
View File

@ -0,0 +1,111 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_TYPES_H
#define OPENSUBDIV3_OSD_TYPES_H
#include "../version.h"
#include "../far/patchTable.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
/// \brief Coordinates set on a patch table
///
/// XXX: this class may be moved into Far
///
struct PatchCoord {
// 5-ints struct.
/// \brief Constructor
///
/// @param handle patch handle
///
/// @param s parametric location on the patch
///
/// @param t parametric location on the patch
///
PatchCoord(Far::PatchTable::PatchHandle handle, float s, float t) :
handle(handle), s(s), t(t) { }
PatchCoord() : s(0), t(0) {
handle.arrayIndex = 0;
handle.patchIndex = 0;
handle.vertIndex = 0;
}
Far::PatchTable::PatchHandle handle; ///< patch handle
float s, t; ///< parametric location on patch
};
struct PatchArray {
// 4-ints struct.
PatchArray(Far::PatchDescriptor desc, int numPatches,
int indexBase, int primitiveIdBase) :
desc(desc), numPatches(numPatches), indexBase(indexBase),
primitiveIdBase(primitiveIdBase) {}
Far::PatchDescriptor const &GetDescriptor() const {
return desc;
}
int GetPatchType() const {
return desc.GetType();
}
int GetNumPatches() const {
return numPatches;
}
int GetIndexBase() const {
return indexBase;
}
int GetPrimitiveIdBase() const {
return primitiveIdBase;
}
Far::PatchDescriptor desc;
int numPatches;
int indexBase; // an offset within the index buffer
int primitiveIdBase; // an offset within the patch param buffer
};
struct PatchParam {
// int3 struct.
int faceIndex;
unsigned int patchBits;
float sharpness;
};
typedef std::vector<PatchArray> PatchArrayVector;
typedef std::vector<PatchParam> PatchParamVector;
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV3_OSD_TYPES_H