mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2024-12-04 00:41:05 +00:00
Added OsdVertexBuffer class. OsdMesh no longer has vertex buffer and vertex
specification (how many elements exists in the buffer). client will create OsdVertexBuffer and provide it as an argument of OsdMesh::Subdivide() function. It would be more flexible and hopefully matches various use cases. Since each dispatcher has to accept arbitrary vertex buffer, introduced a simple shader registry into glslDispatcher. It will configure shaders for given vertex elements on demand (for now, just works only for varying buffer). Fixed cuda kernel's GL resource leakage. Since cuda GL interop seems one-way, OsdCudaVertexBuffer manages vertex updating instead of just using OsdGpuVertexBuffer. Cleaned up some kernel codes and renamed ambiguous names.
This commit is contained in:
parent
29b85a0e8c
commit
851c00d04c
@ -66,29 +66,33 @@
|
|||||||
namespace OpenSubdiv {
|
namespace OpenSubdiv {
|
||||||
namespace OPENSUBDIV_VERSION {
|
namespace OPENSUBDIV_VERSION {
|
||||||
|
|
||||||
OsdCpuKernelDispatcher::DeviceTable::~DeviceTable() {
|
OsdCpuKernelDispatcher::SubdivisionTable::~SubdivisionTable() {
|
||||||
|
|
||||||
if (devicePtr)
|
if (ptr)
|
||||||
free(devicePtr);
|
free(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::DeviceTable::Copy( int size, const void *table ) {
|
OsdCpuKernelDispatcher::SubdivisionTable::Copy( int size, const void *table ) {
|
||||||
|
|
||||||
if (size > 0) {
|
if (size > 0) {
|
||||||
if (devicePtr)
|
if (ptr)
|
||||||
free(devicePtr);
|
free(ptr);
|
||||||
devicePtr = malloc(size);
|
ptr = malloc(size);
|
||||||
memcpy(devicePtr, table, size);
|
memcpy(ptr, table, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
OsdCpuKernelDispatcher::OsdCpuKernelDispatcher( int levels )
|
OsdCpuKernelDispatcher::OsdCpuKernelDispatcher( int levels )
|
||||||
: OsdKernelDispatcher(levels) {
|
: OsdKernelDispatcher(levels), _currentVertexBuffer(NULL), _currentVaryingBuffer(NULL), _vdesc(NULL) {
|
||||||
_tables.resize(TABLE_MAX);
|
_tables.resize(TABLE_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
OsdCpuKernelDispatcher::~OsdCpuKernelDispatcher() { }
|
OsdCpuKernelDispatcher::~OsdCpuKernelDispatcher() {
|
||||||
|
|
||||||
|
if (_vdesc)
|
||||||
|
delete _vdesc;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) {
|
OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) {
|
||||||
@ -96,30 +100,37 @@ OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr)
|
|||||||
_tables[tableIndex].Copy(size, ptr);
|
_tables[tableIndex].Copy(size, ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
OsdCpuKernelDispatcher::BeginLaunchKernel() { }
|
|
||||||
|
|
||||||
void
|
|
||||||
OsdCpuKernelDispatcher::EndLaunchKernel() { }
|
|
||||||
|
|
||||||
OsdVertexBuffer *
|
OsdVertexBuffer *
|
||||||
OsdCpuKernelDispatcher::InitializeVertexBuffer(int numElements, int count)
|
OsdCpuKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices)
|
||||||
{
|
{
|
||||||
return new OsdCpuVertexBuffer(numElements, count);
|
return new OsdCpuVertexBuffer(numElements, numVertices);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
OsdCpuKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
||||||
|
|
||||||
_vertexBuffer = dynamic_cast<OsdCpuVertexBuffer *>(vertex);
|
if (vertex)
|
||||||
_varyingBuffer = dynamic_cast<OsdCpuVertexBuffer *>(varying);
|
_currentVertexBuffer = dynamic_cast<OsdCpuVertexBuffer *>(vertex);
|
||||||
|
else
|
||||||
|
_currentVertexBuffer = NULL;
|
||||||
|
|
||||||
|
if (varying)
|
||||||
|
_currentVaryingBuffer = dynamic_cast<OsdCpuVertexBuffer *>(varying);
|
||||||
|
else
|
||||||
|
_currentVaryingBuffer = NULL;
|
||||||
|
|
||||||
|
_vdesc = new VertexDescriptor(_currentVertexBuffer ? _currentVertexBuffer->GetNumElements() : 0,
|
||||||
|
_currentVaryingBuffer ? _currentVaryingBuffer->GetNumElements() : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::UnbindVertexBuffer()
|
OsdCpuKernelDispatcher::UnbindVertexBuffer()
|
||||||
{
|
{
|
||||||
_vertexBuffer = NULL;
|
delete _vdesc;
|
||||||
_varyingBuffer = NULL;
|
_vdesc = NULL;
|
||||||
|
|
||||||
|
_currentVertexBuffer = NULL;
|
||||||
|
_currentVaryingBuffer = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -129,21 +140,17 @@ OsdCpuKernelDispatcher::Synchronize() { }
|
|||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::ApplyBilinearFaceVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdCpuKernelDispatcher::ApplyBilinearFaceVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
VertexDescriptor vd(_numVertexElements, _numVaryingElements);
|
computeFace(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||||
|
(int*)_tables[F_IT].ptr + _tableOffsets[F_IT][level-1],
|
||||||
computeFace(&vd, _vbo, _varyingVbo,
|
(int*)_tables[F_ITa].ptr + _tableOffsets[F_ITa][level-1],
|
||||||
(int*)_tables[F_IT].devicePtr + _tableOffsets[F_IT][level-1],
|
|
||||||
(int*)_tables[F_ITa].devicePtr + _tableOffsets[F_ITa][level-1],
|
|
||||||
offset, start, end);
|
offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
VertexDescriptor vd(_numVertexElements, _numVaryingElements);
|
computeBilinearEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||||
|
(int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1],
|
||||||
computeBilinearEdge(&vd, _vbo, _varyingVbo,
|
|
||||||
(int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1],
|
|
||||||
offset,
|
offset,
|
||||||
start, end);
|
start, end);
|
||||||
}
|
}
|
||||||
@ -151,38 +158,26 @@ OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh<OsdVertex> * me
|
|||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::ApplyBilinearVertexVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdCpuKernelDispatcher::ApplyBilinearVertexVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
VertexDescriptor vd(_numVertexElements, _numVaryingElements);
|
computeBilinearVertex(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||||
|
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
|
||||||
computeBilinearVertex(&vd, _vbo, _varyingVbo,
|
|
||||||
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
|
|
||||||
offset, start, end);
|
offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::ApplyCatmarkFaceVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdCpuKernelDispatcher::ApplyCatmarkFaceVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
computeFace(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||||
|
(int*)_tables[F_IT].ptr + _tableOffsets[F_IT][level-1],
|
||||||
computeFace(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
(int*)_tables[F_ITa].ptr + _tableOffsets[F_ITa][level-1],
|
||||||
(int*)_tables[F_IT].devicePtr + _tableOffsets[F_IT][level-1],
|
|
||||||
(int*)_tables[F_ITa].devicePtr + _tableOffsets[F_ITa][level-1],
|
|
||||||
offset, start, end);
|
offset, start, end);
|
||||||
|
|
||||||
float *p = _vertexBuffer->GetCpuBuffer();
|
|
||||||
for(int i = 0; i < 150; i+=3){
|
|
||||||
printf("%f %f %f\n", p[0], p[1], p[2]);
|
|
||||||
p+=3;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
computeEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||||
|
(int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1],
|
||||||
computeEdge(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
(float*)_tables[E_W].ptr + _tableOffsets[E_W][level-1],
|
||||||
(int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1],
|
|
||||||
(float*)_tables[E_W].devicePtr + _tableOffsets[E_W][level-1],
|
|
||||||
offset,
|
offset,
|
||||||
start, end);
|
start, end);
|
||||||
}
|
}
|
||||||
@ -190,34 +185,28 @@ OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh<OsdVertex> * mes
|
|||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelB( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelB( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
computeVertexB(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||||
|
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
|
||||||
computeVertexB(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
(int*)_tables[V_IT].ptr + _tableOffsets[V_IT][level-1],
|
||||||
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
|
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
|
||||||
(int*)_tables[V_IT].devicePtr + _tableOffsets[V_IT][level-1],
|
|
||||||
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
|
|
||||||
offset, start, end);
|
offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelA( FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelA( FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
computeVertexA(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||||
|
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
|
||||||
computeVertexA(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
|
||||||
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
|
|
||||||
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
|
|
||||||
offset, start, end, pass);
|
offset, start, end, pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
computeEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||||
|
(int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1],
|
||||||
computeEdge(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
(float*)_tables[E_W].ptr + _tableOffsets[E_W][level-1],
|
||||||
(int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1],
|
|
||||||
(float*)_tables[E_W].devicePtr + _tableOffsets[E_W][level-1],
|
|
||||||
offset,
|
offset,
|
||||||
start, end);
|
start, end);
|
||||||
}
|
}
|
||||||
@ -225,28 +214,23 @@ OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh<OsdVertex> * mesh,
|
|||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelB( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelB( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
computeLoopVertexB(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||||
|
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
|
||||||
computeLoopVertexB(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
(int*)_tables[V_IT].ptr + _tableOffsets[V_IT][level-1],
|
||||||
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
|
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
|
||||||
(int*)_tables[V_IT].devicePtr + _tableOffsets[V_IT][level-1],
|
|
||||||
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
|
|
||||||
offset, start, end);
|
offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelA( FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelA( FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
computeVertexA(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||||
|
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
|
||||||
computeVertexA(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
|
||||||
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
|
|
||||||
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
|
|
||||||
offset, start, end, pass);
|
offset, start, end, pass);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace OPENSUBDIV_VERSION
|
} // end namespace OPENSUBDIV_VERSION
|
||||||
using namespace OPENSUBDIV_VERSION;
|
|
||||||
|
|
||||||
} // end namespace OpenSubdiv
|
} // end namespace OpenSubdiv
|
||||||
|
|
||||||
|
@ -70,6 +70,8 @@
|
|||||||
namespace OpenSubdiv {
|
namespace OpenSubdiv {
|
||||||
namespace OPENSUBDIV_VERSION {
|
namespace OPENSUBDIV_VERSION {
|
||||||
|
|
||||||
|
class VertexDescriptor;
|
||||||
|
|
||||||
class OsdCpuKernelDispatcher : public OsdKernelDispatcher
|
class OsdCpuKernelDispatcher : public OsdKernelDispatcher
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -102,11 +104,11 @@ public:
|
|||||||
|
|
||||||
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
|
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
|
||||||
|
|
||||||
virtual void BeginLaunchKernel();
|
virtual void OnKernelLaunch() {}
|
||||||
|
|
||||||
virtual void EndLaunchKernel();
|
virtual void OnKernelFinish() {}
|
||||||
|
|
||||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count);
|
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices);
|
||||||
|
|
||||||
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
||||||
|
|
||||||
@ -123,18 +125,26 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
struct DeviceTable {
|
struct SubdivisionTable {
|
||||||
DeviceTable() : devicePtr(NULL) { }
|
SubdivisionTable() : ptr(NULL) { }
|
||||||
~DeviceTable();
|
|
||||||
|
~SubdivisionTable();
|
||||||
|
|
||||||
void Copy(int size, const void *ptr);
|
void Copy(int size, const void *ptr);
|
||||||
|
|
||||||
void *devicePtr;
|
void *ptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
OsdCpuVertexBuffer *_vertexBuffer, *_varyingBuffer;
|
float *GetVertexBuffer() const { return _currentVertexBuffer ? _currentVertexBuffer->GetCpuBuffer() : NULL; }
|
||||||
|
|
||||||
std::vector<DeviceTable> _tables;
|
float *GetVaryingBuffer() const { return _currentVaryingBuffer ? _currentVaryingBuffer->GetCpuBuffer() : NULL; }
|
||||||
|
|
||||||
|
OsdCpuVertexBuffer *_currentVertexBuffer,
|
||||||
|
*_currentVaryingBuffer;
|
||||||
|
|
||||||
|
VertexDescriptor *_vdesc;
|
||||||
|
|
||||||
|
std::vector<SubdivisionTable> _tables;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace OPENSUBDIV_VERSION
|
} // end namespace OPENSUBDIV_VERSION
|
||||||
|
@ -64,9 +64,6 @@ namespace OPENSUBDIV_VERSION {
|
|||||||
|
|
||||||
void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying, const int *F_IT, const int *F_ITa, int offset, int start, int end) {
|
void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying, const int *F_IT, const int *F_ITa, int offset, int start, int end) {
|
||||||
|
|
||||||
int ve = vdesc->numVertexElements;
|
|
||||||
int vev = vdesc->numVaryingElements;
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
@ -77,23 +74,19 @@ void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying
|
|||||||
float weight = 1.0f/n;
|
float weight = 1.0f/n;
|
||||||
|
|
||||||
// XXX: should use local vertex struct variable instead of accumulating directly into global memory.
|
// XXX: should use local vertex struct variable instead of accumulating directly into global memory.
|
||||||
float *dst = &vertex[(offset + i)*ve];
|
int dstIndex = offset + i;
|
||||||
float *dstVarying = &varying[(offset + i)*vev];
|
vdesc->Clear(vertex, varying, dstIndex);
|
||||||
vdesc->Clear(dst, dstVarying);
|
|
||||||
|
|
||||||
for (int j=0; j<n; ++j) {
|
for (int j=0; j<n; ++j) {
|
||||||
int index = F_IT[h+j];
|
int index = F_IT[h+j];
|
||||||
vdesc->AddWithWeight(dst, &vertex[index*ve], weight);
|
vdesc->AddWithWeight(vertex, dstIndex, index, weight);
|
||||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[index*vev], weight);
|
vdesc->AddVaryingWithWeight(varying, dstIndex, index, weight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, const float *E_W, int offset, int start, int end) {
|
void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, const float *E_W, int offset, int start, int end) {
|
||||||
|
|
||||||
int ve = vdesc->numVertexElements;
|
|
||||||
int vev = vdesc->numVaryingElements;
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
@ -105,30 +98,26 @@ void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying,
|
|||||||
|
|
||||||
float vertWeight = E_W[i*2+0];
|
float vertWeight = E_W[i*2+0];
|
||||||
|
|
||||||
float *dst = &vertex[(offset+i)*ve];
|
int dstIndex = offset + i;
|
||||||
float *dstVarying = &varying[(offset+i)*vev];
|
vdesc->Clear(vertex, varying, dstIndex);
|
||||||
vdesc->Clear(dst, dstVarying);
|
|
||||||
|
|
||||||
vdesc->AddWithWeight(dst, &vertex[eidx0*ve], vertWeight);
|
vdesc->AddWithWeight(vertex, dstIndex, eidx0, vertWeight);
|
||||||
vdesc->AddWithWeight(dst, &vertex[eidx1*ve], vertWeight);
|
vdesc->AddWithWeight(vertex, dstIndex, eidx1, vertWeight);
|
||||||
|
|
||||||
if (eidx2 != -1) {
|
if (eidx2 != -1) {
|
||||||
float faceWeight = E_W[i*2+1];
|
float faceWeight = E_W[i*2+1];
|
||||||
|
|
||||||
vdesc->AddWithWeight(dst, &vertex[eidx2*ve], faceWeight);
|
vdesc->AddWithWeight(vertex, dstIndex, eidx2, faceWeight);
|
||||||
vdesc->AddWithWeight(dst, &vertex[eidx3*ve], faceWeight);
|
vdesc->AddWithWeight(vertex, dstIndex, eidx3, faceWeight);
|
||||||
}
|
}
|
||||||
|
|
||||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx0*vev], 0.5f);
|
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx1*vev], 0.5f);
|
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const float *V_W, int offset, int start, int end, int pass) {
|
void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const float *V_W, int offset, int start, int end, int pass) {
|
||||||
|
|
||||||
int ve = vdesc->numVertexElements;
|
|
||||||
int vev = vdesc->numVaryingElements;
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
@ -146,29 +135,25 @@ void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying
|
|||||||
if (weight>0.0f && weight<1.0f && n > 0)
|
if (weight>0.0f && weight<1.0f && n > 0)
|
||||||
weight=1.0f-weight;
|
weight=1.0f-weight;
|
||||||
|
|
||||||
float *dst = &vertex[(offset+i)*ve];
|
int dstIndex = offset + i;
|
||||||
float *dstVarying = &varying[(offset+i)*vev];
|
|
||||||
if(not pass)
|
if(not pass)
|
||||||
vdesc->Clear(dst, dstVarying);
|
vdesc->Clear(vertex, varying, dstIndex);
|
||||||
|
|
||||||
if (eidx0==-1 || (pass==0 && (n==-1)) ) {
|
if (eidx0==-1 || (pass==0 && (n==-1)) ) {
|
||||||
vdesc->AddWithWeight(dst, &vertex[p*ve], weight);
|
vdesc->AddWithWeight(vertex, dstIndex, p, weight);
|
||||||
} else {
|
} else {
|
||||||
vdesc->AddWithWeight(dst, &vertex[p*ve], weight * 0.75f);
|
vdesc->AddWithWeight(vertex, dstIndex, p, weight * 0.75f);
|
||||||
vdesc->AddWithWeight(dst, &vertex[eidx0*ve], weight * 0.125f);
|
vdesc->AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f);
|
||||||
vdesc->AddWithWeight(dst, &vertex[eidx1*ve], weight * 0.125f);
|
vdesc->AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (not pass)
|
if (not pass)
|
||||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0);
|
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) {
|
void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) {
|
||||||
|
|
||||||
int ve = vdesc->numVertexElements;
|
|
||||||
int vev = vdesc->numVaryingElements;
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
@ -181,25 +166,21 @@ void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying
|
|||||||
float wp = 1.0f/float(n*n);
|
float wp = 1.0f/float(n*n);
|
||||||
float wv = (n-2.0f) * n * wp;
|
float wv = (n-2.0f) * n * wp;
|
||||||
|
|
||||||
float *dst = &vertex[(offset+i)*ve];
|
int dstIndex = offset + i;
|
||||||
float *dstVarying = &varying[(offset+i)*vev];
|
vdesc->Clear(vertex, varying, dstIndex);
|
||||||
vdesc->Clear(dst, dstVarying);
|
|
||||||
|
|
||||||
vdesc->AddWithWeight(dst, &vertex[p*ve], weight * wv);
|
vdesc->AddWithWeight(vertex, dstIndex, p, weight * wv);
|
||||||
|
|
||||||
for (int j = 0; j < n; ++j) {
|
for (int j = 0; j < n; ++j) {
|
||||||
vdesc->AddWithWeight(dst, &vertex[V_IT[h+j*2]*ve], weight * wp);
|
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp);
|
||||||
vdesc->AddWithWeight(dst, &vertex[V_IT[h+j*2+1]*ve], weight * wp);
|
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
|
||||||
}
|
}
|
||||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0);
|
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) {
|
void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) {
|
||||||
|
|
||||||
int ve = vdesc->numVertexElements;
|
|
||||||
int vev = vdesc->numVaryingElements;
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
@ -214,24 +195,20 @@ void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *var
|
|||||||
beta = beta * beta;
|
beta = beta * beta;
|
||||||
beta = (0.625f - beta) * wp;
|
beta = (0.625f - beta) * wp;
|
||||||
|
|
||||||
float *dst = &vertex[(offset+i)*ve];
|
int dstIndex = offset + i;
|
||||||
float *dstVarying = &varying[(offset+i)*vev];
|
vdesc->Clear(vertex, varying, dstIndex);
|
||||||
vdesc->Clear(dst, dstVarying);
|
|
||||||
|
|
||||||
vdesc->AddWithWeight(dst, &vertex[p*ve], weight * (1.0f - (beta * n)));
|
vdesc->AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
|
||||||
|
|
||||||
for (int j = 0; j < n; ++j)
|
for (int j = 0; j < n; ++j)
|
||||||
vdesc->AddWithWeight(dst, &vertex[V_IT[h+j]*ve], weight * beta);
|
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
|
||||||
|
|
||||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0f);
|
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, int offset, int start, int end) {
|
void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, int offset, int start, int end) {
|
||||||
|
|
||||||
int ve = vdesc->numVertexElements;
|
|
||||||
int vev = vdesc->numVaryingElements;
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
@ -239,35 +216,30 @@ void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *va
|
|||||||
int eidx0 = E_IT[2*i+0];
|
int eidx0 = E_IT[2*i+0];
|
||||||
int eidx1 = E_IT[2*i+1];
|
int eidx1 = E_IT[2*i+1];
|
||||||
|
|
||||||
float *dst = &vertex[(offset+i)*ve];
|
int dstIndex = offset + i;
|
||||||
float *dstVarying = &varying[(offset+i)*vev];
|
vdesc->Clear(vertex, varying, dstIndex);
|
||||||
vdesc->Clear(dst, dstVarying);
|
|
||||||
|
vdesc->AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
|
||||||
|
vdesc->AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
|
||||||
|
|
||||||
vdesc->AddWithWeight(dst, &vertex[eidx0*ve], 0.5f);
|
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||||
vdesc->AddWithWeight(dst, &vertex[eidx1*ve], 0.5f);
|
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||||
|
|
||||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx0*vev], 0.5f);
|
|
||||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx1*vev], 0.5f);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void computeBilinearVertex(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, int offset, int start, int end) {
|
void computeBilinearVertex(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, int offset, int start, int end) {
|
||||||
|
|
||||||
int ve = vdesc->numVertexElements;
|
|
||||||
int vev = vdesc->numVaryingElements;
|
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
for (int i = start; i < end; i++) {
|
for (int i = start; i < end; i++) {
|
||||||
int p = V_ITa[i];
|
int p = V_ITa[i];
|
||||||
|
|
||||||
float *dst = &vertex[(offset+i)*ve];
|
int dstIndex = offset + i;
|
||||||
float *dstVarying = &varying[(offset+i)*vev];
|
vdesc->Clear(vertex, varying, dstIndex);
|
||||||
vdesc->Clear(dst, dstVarying);
|
|
||||||
|
|
||||||
vdesc->AddWithWeight(dst, &vertex[p*ve], 1.0f);
|
vdesc->AddWithWeight(vertex, dstIndex, p, 1.0f);
|
||||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0f);
|
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,20 +67,28 @@ struct VertexDescriptor {
|
|||||||
VertexDescriptor(int numVertexElem, int numVaryingElem)
|
VertexDescriptor(int numVertexElem, int numVaryingElem)
|
||||||
: numVertexElements(numVertexElem), numVaryingElements(numVaryingElem) { }
|
: numVertexElements(numVertexElem), numVaryingElements(numVaryingElem) { }
|
||||||
|
|
||||||
void Clear(float *vertex, float *varying) const {
|
void Clear(float *vertex, float *varying, int index) const {
|
||||||
for (int i = 0; i < numVertexElements; ++i)
|
if (vertex) {
|
||||||
vertex[i] = 0.0f;
|
for (int i = 0; i < numVertexElements; ++i)
|
||||||
|
vertex[index*numVertexElements+i] = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < numVaryingElements; ++i)
|
if (varying) {
|
||||||
varying[i] = 0.0f;
|
for (int i = 0; i < numVaryingElements; ++i)
|
||||||
|
varying[index*numVaryingElements+i] = 0.0f;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
void AddWithWeight(float *vertex, const float *src, float weight) const {
|
void AddWithWeight(float *vertex, int dstIndex, int srcIndex, float weight) const {
|
||||||
|
int d = dstIndex * numVertexElements;
|
||||||
|
int s = srcIndex * numVertexElements;
|
||||||
for (int i = 0; i < numVertexElements; ++i)
|
for (int i = 0; i < numVertexElements; ++i)
|
||||||
vertex[i] += src[i] * weight;
|
vertex[d++] += vertex[s++] * weight;
|
||||||
}
|
}
|
||||||
void AddVaryingWithWeight(float *varying, const float *src, float weight) const {
|
void AddVaryingWithWeight(float *varying, int dstIndex, int srcIndex, float weight) const {
|
||||||
|
int d = dstIndex * numVaryingElements;
|
||||||
|
int s = srcIndex * numVaryingElements;
|
||||||
for (int i = 0; i < numVaryingElements; ++i)
|
for (int i = 0; i < numVaryingElements; ++i)
|
||||||
varying[i] += src[i] * weight;
|
varying[d++] += varying[s++] * weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
int numVertexElements;
|
int numVertexElements;
|
||||||
|
@ -81,7 +81,42 @@ void OsdCudaComputeBilinearVertex(float *vertex, float *varying, int numUserVert
|
|||||||
namespace OpenSubdiv {
|
namespace OpenSubdiv {
|
||||||
namespace OPENSUBDIV_VERSION {
|
namespace OPENSUBDIV_VERSION {
|
||||||
|
|
||||||
|
OsdCudaVertexBuffer::OsdCudaVertexBuffer(int numElements, int numVertices) :
|
||||||
|
OsdGpuVertexBuffer(numElements, numVertices) {
|
||||||
|
|
||||||
|
// register vbo as cuda resource
|
||||||
|
cudaGraphicsGLRegisterBuffer(&_cudaResource, _vbo, cudaGraphicsMapFlagsNone);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OsdCudaVertexBuffer::UpdateData(const float *src, int numVertices) {
|
||||||
|
|
||||||
|
void *dst = Map();
|
||||||
|
cudaMemcpy(dst, src, _numElements * numVertices * sizeof(float), cudaMemcpyHostToDevice);
|
||||||
|
Unmap();
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
OsdCudaVertexBuffer::Map() {
|
||||||
|
|
||||||
|
size_t num_bytes;
|
||||||
|
void *ptr;
|
||||||
|
|
||||||
|
cudaGraphicsMapResources(1, &_cudaResource, 0);
|
||||||
|
cudaGraphicsResourceGetMappedPointer(&ptr, &num_bytes, _cudaResource);
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OsdCudaVertexBuffer::Unmap() {
|
||||||
|
cudaGraphicsUnmapResources(1, &_cudaResource, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
OsdCudaVertexBuffer::~OsdCudaVertexBuffer() {
|
||||||
|
cudaGraphicsUnregisterResource(_cudaResource);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------
|
||||||
OsdCudaKernelDispatcher::DeviceTable::~DeviceTable() {
|
OsdCudaKernelDispatcher::DeviceTable::~DeviceTable() {
|
||||||
|
|
||||||
if (devicePtr) cudaFree(devicePtr);
|
if (devicePtr) cudaFree(devicePtr);
|
||||||
@ -95,18 +130,16 @@ OsdCudaKernelDispatcher::DeviceTable::Copy(int size, const void *ptr) {
|
|||||||
cudaMalloc(&devicePtr, size);
|
cudaMalloc(&devicePtr, size);
|
||||||
cudaMemcpy(devicePtr, ptr, size, cudaMemcpyHostToDevice);
|
cudaMemcpy(devicePtr, ptr, size, cudaMemcpyHostToDevice);
|
||||||
}
|
}
|
||||||
|
// -------------------------------------------------------------------------------
|
||||||
|
|
||||||
OsdCudaKernelDispatcher::OsdCudaKernelDispatcher(int levels)
|
OsdCudaKernelDispatcher::OsdCudaKernelDispatcher(int levels)
|
||||||
: OsdKernelDispatcher(levels),
|
: OsdKernelDispatcher(levels)
|
||||||
_cudaVertexResource(NULL),
|
|
||||||
_cudaVaryingResource(NULL)
|
|
||||||
{
|
{
|
||||||
_tables.resize(TABLE_MAX);
|
_tables.resize(TABLE_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
OsdCudaKernelDispatcher::~OsdCudaKernelDispatcher() {
|
OsdCudaKernelDispatcher::~OsdCudaKernelDispatcher() {
|
||||||
cudaDeviceReset(); // XXX: necessary?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -115,48 +148,52 @@ OsdCudaKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr)
|
|||||||
_tables[tableIndex].Copy(size, ptr);
|
_tables[tableIndex].Copy(size, ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
OsdCudaKernelDispatcher::BeginLaunchKernel() { }
|
|
||||||
|
|
||||||
void
|
|
||||||
OsdCudaKernelDispatcher::EndLaunchKernel() { }
|
|
||||||
|
|
||||||
OsdVertexBuffer *
|
OsdVertexBuffer *
|
||||||
OsdCudaKernelDispatcher::InitializeVertexBuffer(int numElements, int count)
|
OsdCudaKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices)
|
||||||
{
|
{
|
||||||
return new OsdGpuVertexBuffer(numElements, count);
|
return new OsdCudaVertexBuffer(numElements, numVertices);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCudaKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
OsdCudaKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
||||||
|
|
||||||
OsdGpuVertexBuffer *bVertex = dynamic_cast<OsdGpuVertexBuffer *>(vertex);
|
if (vertex)
|
||||||
OsdGpuVertexBuffer *bVarying = dynamic_cast<OsdGpuVertexBuffer *>(varying);
|
_currentVertexBuffer = dynamic_cast<OsdCudaVertexBuffer *>(vertex);
|
||||||
size_t num_bytes;
|
else
|
||||||
|
_currentVertexBuffer = NULL;
|
||||||
|
|
||||||
if (bVertex) {
|
if (varying)
|
||||||
cudaGraphicsGLRegisterBuffer(&_cudaVertexResource, bVertex->GetGpuBuffer(), cudaGraphicsMapFlagsWriteDiscard);
|
_currentVaryingBuffer = dynamic_cast<OsdCudaVertexBuffer *>(varying);
|
||||||
cudaGraphicsMapResources(1, &_cudaVertexResource, 0);
|
else
|
||||||
cudaGraphicsResourceGetMappedPointer((void **)&_deviceVertices, &num_bytes, _cudaVertexResource);
|
_currentVaryingBuffer = NULL;
|
||||||
|
|
||||||
|
if (_currentVertexBuffer) {
|
||||||
|
_deviceVertices = (float*)_currentVertexBuffer->Map();
|
||||||
|
// XXX todo remove _numVertexElements
|
||||||
|
_numVertexElements = _currentVertexBuffer->GetNumElements();
|
||||||
|
} else {
|
||||||
|
_numVertexElements = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bVarying) {
|
if (_currentVaryingBuffer) {
|
||||||
cudaGraphicsGLRegisterBuffer(&_cudaVaryingResource, bVarying->GetGpuBuffer(), cudaGraphicsMapFlagsWriteDiscard);
|
_deviceVaryings = (float*)_currentVaryingBuffer->Map();
|
||||||
cudaGraphicsMapResources(1, &_cudaVaryingResource, 0);
|
_numVaryingElements = _currentVaryingBuffer->GetNumElements();
|
||||||
cudaGraphicsResourceGetMappedPointer((void **)&_deviceVaryings, &num_bytes, _cudaVaryingResource);
|
} else {
|
||||||
|
_numVaryingElements = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdCudaKernelDispatcher::UnbindVertexBuffer()
|
OsdCudaKernelDispatcher::UnbindVertexBuffer()
|
||||||
{
|
{
|
||||||
if (_cudaVertexResource)
|
if (_currentVertexBuffer){
|
||||||
cudaGraphicsUnmapResources(1, &_cudaVertexResource, 0);
|
_currentVertexBuffer->Unmap();
|
||||||
if (_cudaVaryingResource)
|
}
|
||||||
cudaGraphicsUnmapResources(1, &_cudaVertexResource, 0);
|
if (_currentVaryingBuffer)
|
||||||
|
_currentVaryingBuffer->Unmap();
|
||||||
|
|
||||||
_cudaVertexResource = NULL;
|
_currentVertexBuffer = NULL;
|
||||||
_cudaVaryingResource = NULL;
|
_currentVaryingBuffer = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -59,6 +59,7 @@
|
|||||||
|
|
||||||
#include "../version.h"
|
#include "../version.h"
|
||||||
#include "../osd/kernelDispatcher.h"
|
#include "../osd/kernelDispatcher.h"
|
||||||
|
#include "../osd/vertexBuffer.h"
|
||||||
|
|
||||||
#include <GL/glew.h>
|
#include <GL/glew.h>
|
||||||
|
|
||||||
@ -72,6 +73,19 @@ namespace OPENSUBDIV_VERSION {
|
|||||||
|
|
||||||
struct DeviceVertex;
|
struct DeviceVertex;
|
||||||
|
|
||||||
|
class OsdCudaVertexBuffer : public OsdGpuVertexBuffer {
|
||||||
|
public:
|
||||||
|
OsdCudaVertexBuffer(int numElements, int numVertices);
|
||||||
|
virtual ~OsdCudaVertexBuffer();
|
||||||
|
|
||||||
|
virtual void UpdateData(const float *src, int numVertices);
|
||||||
|
void * Map();
|
||||||
|
void Unmap();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
cudaGraphicsResource *_cudaResource;
|
||||||
|
};
|
||||||
|
|
||||||
class OsdCudaKernelDispatcher : public OsdKernelDispatcher
|
class OsdCudaKernelDispatcher : public OsdKernelDispatcher
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -106,11 +120,11 @@ public:
|
|||||||
|
|
||||||
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
|
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
|
||||||
|
|
||||||
virtual void BeginLaunchKernel();
|
virtual void OnKernelLaunch() {}
|
||||||
|
|
||||||
virtual void EndLaunchKernel();
|
|
||||||
|
|
||||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count);
|
virtual void OnKernelFinish() {}
|
||||||
|
|
||||||
|
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices);
|
||||||
|
|
||||||
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
||||||
|
|
||||||
@ -138,8 +152,8 @@ protected:
|
|||||||
|
|
||||||
std::vector<DeviceTable> _tables;
|
std::vector<DeviceTable> _tables;
|
||||||
|
|
||||||
cudaGraphicsResource *_cudaVertexResource,
|
OsdCudaVertexBuffer *_currentVertexBuffer,
|
||||||
*_cudaVaryingResource;
|
*_currentVaryingBuffer;
|
||||||
|
|
||||||
float *_deviceVertices,
|
float *_deviceVertices,
|
||||||
*_deviceVaryings;
|
*_deviceVaryings;
|
||||||
|
@ -61,6 +61,8 @@
|
|||||||
#include <GL/glew.h>
|
#include <GL/glew.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <functional>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
#define OPT_E0_IT_VEC4
|
#define OPT_E0_IT_VEC4
|
||||||
#define OPT_E0_S_VEC2
|
#define OPT_E0_S_VEC2
|
||||||
@ -88,65 +90,28 @@ static const char *shaderDefines = ""
|
|||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
|
|
||||||
|
std::vector<OsdGlslKernelDispatcher::ComputeShader> OsdGlslKernelDispatcher::shaderRegistry;
|
||||||
|
|
||||||
OsdGlslKernelDispatcher::OsdGlslKernelDispatcher(int levels)
|
OsdGlslKernelDispatcher::OsdGlslKernelDispatcher(int levels)
|
||||||
: OsdKernelDispatcher(levels)
|
: OsdKernelDispatcher(levels)
|
||||||
{
|
{
|
||||||
_vertexBuffer = 0;
|
_currentVertexBuffer = 0;
|
||||||
_varyingBuffer = 0;
|
_currentVaryingBuffer = 0;
|
||||||
_prgKernel = 0;
|
_shader = 0;
|
||||||
|
|
||||||
glGenTextures(1, &_vertexTexture);
|
glGenTextures(1, &_vertexTexture);
|
||||||
glGenTextures(1, &_varyingTexture);
|
glGenTextures(1, &_varyingTexture);
|
||||||
|
|
||||||
_tableBuffers.resize(TABLE_MAX);
|
_tableBuffers.resize(TABLE_MAX);
|
||||||
_tableTextures.resize(TABLE_MAX);
|
_tableTextures.resize(TABLE_MAX);
|
||||||
_tableUniforms.resize(TABLE_MAX);
|
|
||||||
_tableOffsetUniforms.resize(TABLE_MAX);
|
|
||||||
|
|
||||||
glGenBuffers(TABLE_MAX, &_tableBuffers[0]);
|
glGenBuffers(TABLE_MAX, &_tableBuffers[0]);
|
||||||
glGenTextures(TABLE_MAX, &_tableTextures[0]);
|
glGenTextures(TABLE_MAX, &_tableTextures[0]);
|
||||||
|
|
||||||
subComputeFace = 0;
|
|
||||||
subComputeEdge = 0;
|
|
||||||
subComputeVertexA = 0;
|
|
||||||
subComputeVertexB = 0;
|
|
||||||
uniformVertexPass = 0;
|
|
||||||
uniformIndexStart = 0;
|
|
||||||
uniformIndexOffset = 0;
|
|
||||||
|
|
||||||
compile(shaderSource, shaderDefines);
|
|
||||||
|
|
||||||
subComputeFace = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeFace");
|
|
||||||
subComputeEdge = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeEdge");
|
|
||||||
subComputeVertexA = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeVertexA");
|
|
||||||
subComputeVertexB = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeVertexB");
|
|
||||||
subComputeLoopVertexB = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "loopComputeVertexB");
|
|
||||||
|
|
||||||
uniformVertexPass = glGetUniformLocation(_prgKernel, "vertexPass");
|
|
||||||
uniformIndexStart = glGetUniformLocation(_prgKernel, "indexStart");
|
|
||||||
uniformIndexOffset = glGetUniformLocation(_prgKernel, "indexOffset");
|
|
||||||
|
|
||||||
_tableUniforms[F_IT] = glGetUniformLocation(_prgKernel, "_F0_IT");
|
|
||||||
_tableUniforms[F_ITa] = glGetUniformLocation(_prgKernel, "_F0_ITa");
|
|
||||||
_tableUniforms[E_IT] = glGetUniformLocation(_prgKernel, "_E0_IT");
|
|
||||||
_tableUniforms[V_IT] = glGetUniformLocation(_prgKernel, "_V0_IT");
|
|
||||||
_tableUniforms[V_ITa] = glGetUniformLocation(_prgKernel, "_V0_ITa");
|
|
||||||
_tableUniforms[E_W] = glGetUniformLocation(_prgKernel, "_E0_S");
|
|
||||||
_tableUniforms[V_W] = glGetUniformLocation(_prgKernel, "_V0_S");
|
|
||||||
_tableOffsetUniforms[F_IT] = glGetUniformLocation(_prgKernel, "F_IT_ofs");
|
|
||||||
_tableOffsetUniforms[F_ITa] = glGetUniformLocation(_prgKernel, "F_ITa_ofs");
|
|
||||||
_tableOffsetUniforms[E_IT] = glGetUniformLocation(_prgKernel, "E_IT_ofs");
|
|
||||||
_tableOffsetUniforms[V_IT] = glGetUniformLocation(_prgKernel, "V_IT_ofs");
|
|
||||||
_tableOffsetUniforms[V_ITa] = glGetUniformLocation(_prgKernel, "V_ITa_ofs");
|
|
||||||
_tableOffsetUniforms[E_W] = glGetUniformLocation(_prgKernel, "E_W_ofs");
|
|
||||||
_tableOffsetUniforms[V_W] = glGetUniformLocation(_prgKernel, "V_W_ofs");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
OsdGlslKernelDispatcher::~OsdGlslKernelDispatcher() {
|
OsdGlslKernelDispatcher::~OsdGlslKernelDispatcher() {
|
||||||
|
|
||||||
if (_prgKernel)
|
|
||||||
glDeleteProgram(_prgKernel);
|
|
||||||
|
|
||||||
glDeleteTextures(1, &_vertexTexture);
|
glDeleteTextures(1, &_vertexTexture);
|
||||||
glDeleteTextures(1, &_varyingTexture);
|
glDeleteTextures(1, &_varyingTexture);
|
||||||
|
|
||||||
@ -163,98 +128,134 @@ OsdGlslKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::BeginLaunchKernel() {
|
OsdGlslKernelDispatcher::OnKernelLaunch() {
|
||||||
|
|
||||||
glUseProgram(_prgKernel);
|
|
||||||
glEnable(GL_RASTERIZER_DISCARD);
|
glEnable(GL_RASTERIZER_DISCARD);
|
||||||
|
_shader->UseProgram();
|
||||||
|
|
||||||
//XXX what if loop..
|
//XXX what if loop..
|
||||||
bindTextureBuffer(_tableUniforms[F_IT], _tableBuffers[F_IT],
|
bindTextureBuffer(_shader->GetTableUniform(F_IT), _tableBuffers[F_IT],
|
||||||
_tableTextures[F_IT], GL_R32UI, 2);
|
_tableTextures[F_IT], GL_R32UI, 2);
|
||||||
bindTextureBuffer(_tableUniforms[F_ITa], _tableBuffers[F_ITa],
|
bindTextureBuffer(_shader->GetTableUniform(F_ITa), _tableBuffers[F_ITa],
|
||||||
_tableTextures[F_ITa], GL_R32I, 3);
|
_tableTextures[F_ITa], GL_R32I, 3);
|
||||||
|
|
||||||
#ifdef OPT_E0_IT_VEC4
|
#ifdef OPT_E0_IT_VEC4
|
||||||
bindTextureBuffer(_tableUniforms[E_IT], _tableBuffers[E_IT],
|
bindTextureBuffer(_shader->GetTableUniform(E_IT), _tableBuffers[E_IT],
|
||||||
_tableTextures[E_IT], GL_RGBA32UI, 4);
|
_tableTextures[E_IT], GL_RGBA32UI, 4);
|
||||||
#else
|
#else
|
||||||
bindTextureBuffer(_tableUniforms[E_IT], _tableBuffers[E_IT],
|
bindTextureBuffer(_shader->GetTableUniform(E_IT), _tableBuffers[E_IT],
|
||||||
_tableTextures[E_IT], GL_R32UI, 4);
|
_tableTextures[E_IT], GL_R32UI, 4);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef OPT_CATMARK_V_IT_VEC2
|
#ifdef OPT_CATMARK_V_IT_VEC2
|
||||||
bindTextureBuffer(_tableUniforms[V_IT], _tableBuffers[V_IT],
|
bindTextureBuffer(_shader->GetTableUniform(V_IT), _tableBuffers[V_IT],
|
||||||
_tableTextures[V_IT], GL_RG32UI, 5);
|
_tableTextures[V_IT], GL_RG32UI, 5);
|
||||||
#else
|
#else
|
||||||
bindTextureBuffer(_tableUniforms[V_IT], _tableBuffers[V_IT],
|
bindTextureBuffer(_shader->GetTableUniform(V_IT), _tableBuffers[V_IT],
|
||||||
_tableTextures[V_IT], GL_R32UI, 5);
|
_tableTextures[V_IT], GL_R32UI, 5);
|
||||||
#endif
|
#endif
|
||||||
bindTextureBuffer(_tableUniforms[V_ITa], _tableBuffers[V_ITa],
|
bindTextureBuffer(_shader->GetTableUniform(V_ITa), _tableBuffers[V_ITa],
|
||||||
_tableTextures[V_ITa], GL_R32I, 6);
|
_tableTextures[V_ITa], GL_R32I, 6);
|
||||||
#ifdef OPT_E0_S_VEC2
|
#ifdef OPT_E0_S_VEC2
|
||||||
bindTextureBuffer(_tableUniforms[E_W], _tableBuffers[E_W],
|
bindTextureBuffer(_shader->GetTableUniform(E_W), _tableBuffers[E_W],
|
||||||
_tableTextures[E_W], GL_RG32F, 7);
|
_tableTextures[E_W], GL_RG32F, 7);
|
||||||
#else
|
#else
|
||||||
bindTextureBuffer(_tableUniforms[E_W], _tableBuffers[E_W],
|
bindTextureBuffer(_shader->GetTableUniform(E_W), _tableBuffers[E_W],
|
||||||
_tableTextures[E_W], GL_R32F, 7);
|
_tableTextures[E_W], GL_R32F, 7);
|
||||||
#endif
|
#endif
|
||||||
bindTextureBuffer(_tableUniforms[V_W], _tableBuffers[V_W],
|
bindTextureBuffer(_shader->GetTableUniform(V_W), _tableBuffers[V_W],
|
||||||
_tableTextures[V_W], GL_R32F, 8);
|
_tableTextures[V_W], GL_R32F, 8);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::EndLaunchKernel() {
|
OsdGlslKernelDispatcher::OnKernelFinish() {
|
||||||
|
|
||||||
|
unbindTextureBuffer(2);
|
||||||
|
unbindTextureBuffer(3);
|
||||||
|
unbindTextureBuffer(4);
|
||||||
|
unbindTextureBuffer(5);
|
||||||
|
unbindTextureBuffer(6);
|
||||||
|
unbindTextureBuffer(7);
|
||||||
|
unbindTextureBuffer(8);
|
||||||
|
|
||||||
glDisable(GL_RASTERIZER_DISCARD);
|
glDisable(GL_RASTERIZER_DISCARD);
|
||||||
glUseProgram(0);
|
glUseProgram(0);
|
||||||
|
|
||||||
// XXX Unbind table buffer
|
|
||||||
}
|
}
|
||||||
|
|
||||||
OsdVertexBuffer *
|
OsdVertexBuffer *
|
||||||
OsdGlslKernelDispatcher::InitializeVertexBuffer(int numElements, int count)
|
OsdGlslKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices)
|
||||||
{
|
{
|
||||||
return new OsdGpuVertexBuffer(numElements, count);
|
return new OsdGpuVertexBuffer(numElements, numVertices);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
OsdGlslKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
||||||
|
|
||||||
OsdGpuVertexBuffer *bVertex = dynamic_cast<OsdGpuVertexBuffer *>(vertex);
|
if (vertex)
|
||||||
OsdGpuVertexBuffer *bVarying = dynamic_cast<OsdGpuVertexBuffer *>(varying);
|
_currentVertexBuffer = dynamic_cast<OsdGpuVertexBuffer *>(vertex);
|
||||||
|
else
|
||||||
|
_currentVertexBuffer = NULL;
|
||||||
|
|
||||||
if (bVertex) {
|
if (varying)
|
||||||
_vertexBuffer = bVertex->GetGpuBuffer();
|
_currentVaryingBuffer = dynamic_cast<OsdGpuVertexBuffer *>(varying);
|
||||||
bindTextureBuffer(_vertexUniform, _vertexBuffer, _vertexTexture, GL_RGB32F, 0);
|
else
|
||||||
|
_currentVaryingBuffer = NULL;
|
||||||
|
|
||||||
|
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
|
||||||
|
int numVaryingElements = varying ? varying->GetNumElements() : 0;
|
||||||
|
|
||||||
|
// find appropriate shader program from registry (compile it if needed)
|
||||||
|
std::vector<ComputeShader>::iterator it =
|
||||||
|
std::find_if(shaderRegistry.begin(), shaderRegistry.end(),
|
||||||
|
ComputeShader::Match(numVertexElements, numVaryingElements));
|
||||||
|
|
||||||
|
_shader = NULL;
|
||||||
|
if (it != shaderRegistry.end()) {
|
||||||
|
_shader = &(*it);
|
||||||
|
} else {
|
||||||
|
shaderRegistry.push_back(ComputeShader());
|
||||||
|
_shader = &shaderRegistry.back();
|
||||||
|
_shader->Compile(numVertexElements, numVaryingElements);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bVarying) {
|
_shader->UseProgram(); // need to bind textures
|
||||||
_varyingBuffer = bVarying->GetGpuBuffer();
|
|
||||||
bindTextureBuffer(_varyingUniform, _varyingBuffer, _varyingTexture, GL_R32F, 0);
|
// bind vertex texture
|
||||||
|
if (_currentVertexBuffer) {
|
||||||
|
bindTextureBuffer(_shader->GetVertexUniform(), _currentVertexBuffer->GetGpuBuffer(), _vertexTexture, GL_RGB32F, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
glUseProgram(_prgKernel);
|
if (_currentVaryingBuffer) {
|
||||||
glUniform1i(_vertexUniform, 0);
|
bindTextureBuffer(_shader->GetVaryingUniform(), _currentVaryingBuffer->GetGpuBuffer(), _varyingTexture, GL_R32F, 1);
|
||||||
|
}
|
||||||
|
|
||||||
#if 0 // experiment to use image store function
|
#if 0 // experiment to use image store function
|
||||||
glActiveTexture(GL_TEXTURE0 + 0);
|
glActiveTexture(GL_TEXTURE0 + 0);
|
||||||
glBindImageTextureEXT(0, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F);
|
glBindImageTextureEXT(0, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F);
|
||||||
|
|
||||||
if (_numVarying > 0) {
|
if (_numVarying > 0) {
|
||||||
glUniform1i(_varyingUniform, 1);
|
|
||||||
glBindImageTextureEXT(1, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F);
|
glBindImageTextureEXT(1, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
CHECK_GL_ERROR("BindVertexBuffer \n");
|
CHECK_GL_ERROR("BindVertexBuffer \n");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::UnbindVertexBuffer()
|
OsdGlslKernelDispatcher::UnbindVertexBuffer()
|
||||||
{
|
{
|
||||||
|
if (_currentVertexBuffer) {
|
||||||
|
unbindTextureBuffer(0);
|
||||||
|
}
|
||||||
|
if (_currentVaryingBuffer) {
|
||||||
|
unbindTextureBuffer(1);
|
||||||
|
}
|
||||||
|
_currentVertexBuffer = NULL;
|
||||||
|
_currentVaryingBuffer = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::Synchronize() {
|
OsdGlslKernelDispatcher::Synchronize() {
|
||||||
glFinish();
|
glFinish();
|
||||||
@ -263,14 +264,15 @@ OsdGlslKernelDispatcher::Synchronize() {
|
|||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::bindTextureBuffer(
|
OsdGlslKernelDispatcher::bindTextureBuffer(
|
||||||
GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const {
|
GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const {
|
||||||
|
|
||||||
if (sampler == -1) {
|
if (sampler == -1) {
|
||||||
OSD_ERROR("BindTextureError:: sampler = %d\n", sampler);
|
OSD_ERROR("BindTextureError:: sampler = %d\n", sampler);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
OSD_DEBUG("BindTextureBuffer sampler=%d, buffer=%d, texture = %d, E%x\n", sampler, buffer, texture, glGetError());
|
OSD_DEBUG("BindTextureBuffer unit=%d, sampler=%d, buffer=%d, texture = %d, E%x\n", unit, sampler, buffer, texture, glGetError());
|
||||||
|
|
||||||
glUniform1i(sampler, unit);
|
glUniform1i(sampler, unit);
|
||||||
|
CHECK_GL_ERROR("BindTextureBuffer glUniform %d\n", unit);
|
||||||
glActiveTexture(GL_TEXTURE0 + unit);
|
glActiveTexture(GL_TEXTURE0 + unit);
|
||||||
CHECK_GL_ERROR("BindTextureBuffer glActiveTexture %d\n", unit);
|
CHECK_GL_ERROR("BindTextureBuffer glActiveTexture %d\n", unit);
|
||||||
glBindTexture(GL_TEXTURE_BUFFER, texture);
|
glBindTexture(GL_TEXTURE_BUFFER, texture);
|
||||||
@ -280,38 +282,137 @@ OsdGlslKernelDispatcher::bindTextureBuffer(
|
|||||||
glActiveTexture(GL_TEXTURE0);
|
glActiveTexture(GL_TEXTURE0);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
|
||||||
OsdGlslKernelDispatcher::compile(const char *shaderSource, const char *shaderDefine) {
|
|
||||||
|
|
||||||
_prgKernel = glCreateProgram();
|
void
|
||||||
|
OsdGlslKernelDispatcher::unbindTextureBuffer(int unit) const {
|
||||||
|
|
||||||
|
glActiveTexture(GL_TEXTURE0 + unit);
|
||||||
|
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OsdGlslKernelDispatcher::ApplyCatmarkFaceVerticesKernel(
|
||||||
|
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
|
_shader->ApplyCatmarkFaceVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer,
|
||||||
|
_tableOffsets[F_IT][level-1],
|
||||||
|
_tableOffsets[F_ITa][level-1],
|
||||||
|
offset, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OsdGlslKernelDispatcher::ApplyCatmarkEdgeVerticesKernel(
|
||||||
|
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
|
_shader->ApplyCatmarkEdgeVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer,
|
||||||
|
_tableOffsets[E_IT][level-1],
|
||||||
|
_tableOffsets[E_W][level-1],
|
||||||
|
offset, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelB(
|
||||||
|
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
|
_shader->ApplyCatmarkVertexVerticesKernelB(_currentVertexBuffer, _currentVaryingBuffer,
|
||||||
|
_tableOffsets[V_IT][level-1],
|
||||||
|
_tableOffsets[V_ITa][level-1],
|
||||||
|
_tableOffsets[V_W][level-1],
|
||||||
|
offset, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelA(
|
||||||
|
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
|
_shader->ApplyCatmarkVertexVerticesKernelA(_currentVertexBuffer, _currentVaryingBuffer,
|
||||||
|
_tableOffsets[V_ITa][level-1],
|
||||||
|
_tableOffsets[V_W][level-1],
|
||||||
|
offset, pass, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OsdGlslKernelDispatcher::ApplyLoopEdgeVerticesKernel(
|
||||||
|
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
|
_shader->ApplyLoopEdgeVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer,
|
||||||
|
_tableOffsets[E_IT][level-1],
|
||||||
|
_tableOffsets[E_W][level-1],
|
||||||
|
offset, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelB(
|
||||||
|
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
|
_shader->ApplyLoopVertexVerticesKernelB(_currentVertexBuffer, _currentVaryingBuffer,
|
||||||
|
_tableOffsets[V_IT][level-1],
|
||||||
|
_tableOffsets[V_ITa][level-1],
|
||||||
|
_tableOffsets[V_W][level-1],
|
||||||
|
offset, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelA(
|
||||||
|
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
||||||
|
|
||||||
|
_shader->ApplyLoopVertexVerticesKernelA(_currentVertexBuffer, _currentVaryingBuffer,
|
||||||
|
_tableOffsets[V_ITa][level-1],
|
||||||
|
_tableOffsets[V_W][level-1],
|
||||||
|
offset, pass, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
OsdGlslKernelDispatcher::ComputeShader::ComputeShader() :
|
||||||
|
_program(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
OsdGlslKernelDispatcher::ComputeShader::~ComputeShader()
|
||||||
|
{
|
||||||
|
if (_program)
|
||||||
|
glDeleteProgram(_program);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
OsdGlslKernelDispatcher::ComputeShader::Compile(int numVertexElements, int numVaryingElements) {
|
||||||
|
|
||||||
|
// XXX: NOTE: current GLSL only supports numVertexElements = 6!!!
|
||||||
|
assert(numVertexElements == 6);
|
||||||
|
|
||||||
|
_numVertexElements = numVertexElements;
|
||||||
|
_numVaryingElements = numVaryingElements;
|
||||||
|
_program = glCreateProgram();
|
||||||
|
|
||||||
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
|
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
|
||||||
|
|
||||||
char constantDefine[256];
|
char constantDefine[256];
|
||||||
snprintf(constantDefine, 256,
|
snprintf(constantDefine, 256,
|
||||||
"#define NUM_VARYING_ELEMENTS %d\n", _numVarying);
|
"#define NUM_VARYING_ELEMENTS %d\n", numVaryingElements);
|
||||||
|
|
||||||
const char *shaderSources[3];
|
const char *shaderSources[3];
|
||||||
shaderSources[0] = constantDefine;
|
shaderSources[0] = constantDefine;
|
||||||
shaderSources[1] = shaderDefine;
|
shaderSources[1] = shaderDefines;
|
||||||
shaderSources[2] = shaderSource;
|
shaderSources[2] = shaderSource;
|
||||||
glShaderSource(shader, 3, shaderSources, NULL);
|
glShaderSource(shader, 3, shaderSources, NULL);
|
||||||
glCompileShader(shader);
|
glCompileShader(shader);
|
||||||
glAttachShader(_prgKernel, shader);
|
glAttachShader(_program, shader);
|
||||||
|
|
||||||
const char *outputs[] = { "outPosition",
|
const char *outputs[] = { "outPosition",
|
||||||
"outNormal",
|
"outNormal",
|
||||||
"gl_NextBuffer",
|
"gl_NextBuffer",
|
||||||
"outVaryingData" };
|
"outVaryingData" };
|
||||||
int nOutputs = _numVarying > 0 ? 4 : 2;
|
|
||||||
|
|
||||||
glTransformFeedbackVaryings(_prgKernel, nOutputs, outputs, GL_INTERLEAVED_ATTRIBS);
|
int nOutputs = numVaryingElements > 0 ? 4 : 2;
|
||||||
|
|
||||||
|
glTransformFeedbackVaryings(_program, nOutputs, outputs, GL_INTERLEAVED_ATTRIBS);
|
||||||
|
|
||||||
CHECK_GL_ERROR("Transform feedback initialize \n");
|
CHECK_GL_ERROR("Transform feedback initialize \n");
|
||||||
|
|
||||||
GLint linked = 0;
|
GLint linked = 0;
|
||||||
glLinkProgram(_prgKernel);
|
glLinkProgram(_program);
|
||||||
glGetProgramiv(_prgKernel, GL_LINK_STATUS, &linked);
|
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
|
||||||
|
|
||||||
if (linked == GL_FALSE) {
|
if (linked == GL_FALSE) {
|
||||||
OSD_ERROR("Fail to link shader\n");
|
OSD_ERROR("Fail to link shader\n");
|
||||||
@ -320,56 +421,79 @@ OsdGlslKernelDispatcher::compile(const char *shaderSource, const char *shaderDef
|
|||||||
glGetShaderInfoLog(shader, 1024, NULL, buffer);
|
glGetShaderInfoLog(shader, 1024, NULL, buffer);
|
||||||
OSD_ERROR(buffer);
|
OSD_ERROR(buffer);
|
||||||
|
|
||||||
glGetProgramInfoLog(_prgKernel, 1024, NULL, buffer);
|
glGetProgramInfoLog(_program, 1024, NULL, buffer);
|
||||||
OSD_ERROR(buffer);
|
OSD_ERROR(buffer);
|
||||||
|
|
||||||
glDeleteProgram(_prgKernel);
|
glDeleteProgram(_program);
|
||||||
_prgKernel = 0;
|
_program = 0;
|
||||||
// XXX ERROR HANDLE
|
// XXX ERROR HANDLE
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
glDeleteShader(shader);
|
glDeleteShader(shader);
|
||||||
|
|
||||||
_vertexUniform = glGetUniformLocation(_prgKernel, "vertex");
|
_vertexUniform = glGetUniformLocation(_program, "vertex");
|
||||||
_varyingUniform = glGetUniformLocation(_prgKernel, "varyingData");
|
_varyingUniform = glGetUniformLocation(_program, "varyingData");
|
||||||
|
|
||||||
|
_subComputeFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeFace");
|
||||||
|
_subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge");
|
||||||
|
_subComputeVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexA");
|
||||||
|
_subComputeVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexB");
|
||||||
|
_subComputeLoopVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "loopComputeVertexB");
|
||||||
|
|
||||||
|
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
|
||||||
|
_uniformIndexStart = glGetUniformLocation(_program, "indexStart");
|
||||||
|
_uniformIndexOffset = glGetUniformLocation(_program, "indexOffset");
|
||||||
|
|
||||||
|
_tableUniforms.resize(TABLE_MAX);
|
||||||
|
_tableOffsetUniforms.resize(TABLE_MAX);
|
||||||
|
|
||||||
|
_tableUniforms[F_IT] = glGetUniformLocation(_program, "_F0_IT");
|
||||||
|
_tableUniforms[F_ITa] = glGetUniformLocation(_program, "_F0_ITa");
|
||||||
|
_tableUniforms[E_IT] = glGetUniformLocation(_program, "_E0_IT");
|
||||||
|
_tableUniforms[V_IT] = glGetUniformLocation(_program, "_V0_IT");
|
||||||
|
_tableUniforms[V_ITa] = glGetUniformLocation(_program, "_V0_ITa");
|
||||||
|
_tableUniforms[E_W] = glGetUniformLocation(_program, "_E0_S");
|
||||||
|
_tableUniforms[V_W] = glGetUniformLocation(_program, "_V0_S");
|
||||||
|
_tableOffsetUniforms[F_IT] = glGetUniformLocation(_program, "F_IT_ofs");
|
||||||
|
_tableOffsetUniforms[F_ITa] = glGetUniformLocation(_program, "F_ITa_ofs");
|
||||||
|
_tableOffsetUniforms[E_IT] = glGetUniformLocation(_program, "E_IT_ofs");
|
||||||
|
_tableOffsetUniforms[V_IT] = glGetUniformLocation(_program, "V_IT_ofs");
|
||||||
|
_tableOffsetUniforms[V_ITa] = glGetUniformLocation(_program, "V_ITa_ofs");
|
||||||
|
_tableOffsetUniforms[E_W] = glGetUniformLocation(_program, "E_W_ofs");
|
||||||
|
_tableOffsetUniforms[V_W] = glGetUniformLocation(_program, "V_W_ofs");
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::unbindTextureBuffer(int unit) const {
|
OsdGlslKernelDispatcher::ComputeShader::transformGpuBufferData(OsdGpuVertexBuffer *vertexBuffer, OsdGpuVertexBuffer *varyingBuffer,
|
||||||
glActiveTexture(GL_TEXTURE0 + unit);
|
GLint offset, int start, int end) const {
|
||||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
OsdGlslKernelDispatcher::transformGpuBufferData(GLuint kernel, GLint offset, int start, int end, bool vertexPass) const {
|
|
||||||
int count = end - start;
|
int count = end - start;
|
||||||
if (count <= 0) return;
|
if (count <= 0) return;
|
||||||
OSD_DEBUG("_transformGpuBufferData kernel=%d E%x, offset=%d, count=%d\n", kernel, glGetError(), offset, count);
|
OSD_DEBUG("_transformGpuBufferData offset=%d, count=%d\n", glGetError(), offset, count);
|
||||||
|
|
||||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &kernel);
|
|
||||||
glUniform1i(uniformVertexPass, vertexPass); // XXX
|
|
||||||
|
|
||||||
// set batch range
|
// set batch range
|
||||||
glUniform1i(uniformIndexStart, start);
|
glUniform1i(_uniformIndexStart, start);
|
||||||
glUniform1i(uniformIndexOffset, offset);
|
glUniform1i(_uniformIndexOffset, offset);
|
||||||
// XXX: end is not used here now
|
// XXX: end is not used here now
|
||||||
CHECK_GL_ERROR("Uniform index set at offset=%d. start=%d\n", offset, start);
|
CHECK_GL_ERROR("Uniform index set at offset=%d. start=%d\n", offset, start);
|
||||||
|
|
||||||
// set transform feedback buffer
|
// set transform feedback buffer
|
||||||
int vertexStride = _numVertexElements*sizeof(float);
|
if (vertexBuffer) {
|
||||||
int varyingStride = _numVarying*sizeof(float);
|
int vertexStride = vertexBuffer->GetNumElements()*sizeof(float);
|
||||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, _vertexBuffer,
|
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vertexBuffer->GetGpuBuffer(),
|
||||||
(start+offset)*vertexStride, count*vertexStride);
|
(start+offset)*vertexStride, count*vertexStride);
|
||||||
CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n");
|
}
|
||||||
|
|
||||||
if (_numVarying > 0){
|
if (varyingBuffer){
|
||||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, _varyingBuffer,
|
int varyingStride = varyingBuffer->GetNumElements()*sizeof(float);
|
||||||
|
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, varyingBuffer->GetGpuBuffer(),
|
||||||
(start+offset)*varyingStride, count*varyingStride);
|
(start+offset)*varyingStride, count*varyingStride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n");
|
||||||
|
|
||||||
glBeginTransformFeedback(GL_POINTS);
|
glBeginTransformFeedback(GL_POINTS);
|
||||||
|
|
||||||
CHECK_GL_ERROR("transformGpuBufferData glBeginTransformFeedback\n");
|
CHECK_GL_ERROR("transformGpuBufferData glBeginTransformFeedback\n");
|
||||||
@ -387,68 +511,84 @@ OsdGlslKernelDispatcher::transformGpuBufferData(GLuint kernel, GLint offset, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::ApplyCatmarkFaceVerticesKernel(
|
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkFaceVerticesKernel(
|
||||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int F_IT_ofs, int F_ITa_ofs, int offset, int start, int end) {
|
||||||
glUniform1i(_tableOffsetUniforms[F_IT], _tableOffsets[F_IT][level-1]);
|
|
||||||
glUniform1i(_tableOffsetUniforms[F_ITa], _tableOffsets[F_ITa][level-1]);
|
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeFace);
|
||||||
transformGpuBufferData(subComputeFace, offset, start, end);
|
glUniform1i(_tableOffsetUniforms[F_IT], F_IT_ofs);
|
||||||
|
glUniform1i(_tableOffsetUniforms[F_ITa], F_ITa_ofs);
|
||||||
|
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::ApplyCatmarkEdgeVerticesKernel(
|
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkEdgeVerticesKernel(
|
||||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int E_IT_ofs, int E_W_ofs, int offset, int start, int end) {
|
||||||
glUniform1i(_tableOffsetUniforms[E_IT], _tableOffsets[E_IT][level-1]);
|
|
||||||
glUniform1i(_tableOffsetUniforms[E_W], _tableOffsets[E_W][level-1]);
|
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge);
|
||||||
transformGpuBufferData(subComputeEdge, offset, start, end);
|
glUniform1i(_tableOffsetUniforms[E_IT], E_IT_ofs);
|
||||||
|
glUniform1i(_tableOffsetUniforms[E_W], E_W_ofs);
|
||||||
|
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelB(
|
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkVertexVerticesKernelB(
|
||||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end) {
|
||||||
|
|
||||||
glUniform1i(_tableOffsetUniforms[V_IT], _tableOffsets[V_IT][level-1]);
|
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexB);
|
||||||
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
|
glUniform1i(_tableOffsetUniforms[V_IT], V_IT_ofs);
|
||||||
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
|
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
|
||||||
transformGpuBufferData(subComputeVertexB, offset, start, end);
|
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
|
||||||
|
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelA(
|
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkVertexVerticesKernelA(
|
||||||
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end) {
|
||||||
|
|
||||||
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
|
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA);
|
||||||
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
|
glUniform1i(_uniformVertexPass, pass ? 1 : 0);
|
||||||
transformGpuBufferData(subComputeVertexA, offset, start, end, pass);
|
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
|
||||||
|
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
|
||||||
|
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::ApplyLoopEdgeVerticesKernel(
|
OsdGlslKernelDispatcher::ComputeShader::ApplyLoopEdgeVerticesKernel(
|
||||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int E_IT_ofs, int E_W_ofs, int offset, int start, int end) {
|
||||||
|
|
||||||
glUniform1i(_tableOffsetUniforms[E_IT], _tableOffsets[E_IT][level-1]);
|
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge);
|
||||||
glUniform1i(_tableOffsetUniforms[E_W], _tableOffsets[E_W][level-1]);
|
glUniform1i(_tableOffsetUniforms[E_IT], E_IT_ofs);
|
||||||
transformGpuBufferData(subComputeEdge, offset, start, end);
|
glUniform1i(_tableOffsetUniforms[E_W], E_W_ofs);
|
||||||
|
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelB(
|
OsdGlslKernelDispatcher::ComputeShader::ApplyLoopVertexVerticesKernelB(
|
||||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end) {
|
||||||
|
|
||||||
glUniform1i(_tableOffsetUniforms[V_IT], _tableOffsets[V_IT][level-1]);
|
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeLoopVertexB);
|
||||||
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
|
glUniform1i(_tableOffsetUniforms[V_IT], V_IT_ofs);
|
||||||
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
|
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
|
||||||
transformGpuBufferData(subComputeLoopVertexB, offset, start, end);
|
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
|
||||||
|
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelA(
|
OsdGlslKernelDispatcher::ComputeShader::ApplyLoopVertexVerticesKernelA(
|
||||||
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end) {
|
||||||
|
|
||||||
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
|
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA);
|
||||||
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
|
glUniform1i(_uniformVertexPass, pass ? 1 : 0);
|
||||||
transformGpuBufferData(subComputeVertexA, offset, start, end, pass);
|
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
|
||||||
|
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
|
||||||
|
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end namespace OPENSUBDIV_VERSION
|
} // end namespace OPENSUBDIV_VERSION
|
||||||
|
@ -89,11 +89,11 @@ public:
|
|||||||
|
|
||||||
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
|
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
|
||||||
|
|
||||||
virtual void BeginLaunchKernel();
|
virtual void OnKernelLaunch();
|
||||||
|
|
||||||
virtual void EndLaunchKernel();
|
|
||||||
|
|
||||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count);
|
virtual void OnKernelFinish();
|
||||||
|
|
||||||
|
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices);
|
||||||
|
|
||||||
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
||||||
|
|
||||||
@ -110,42 +110,100 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
|
class ComputeShader {
|
||||||
|
public:
|
||||||
|
ComputeShader();
|
||||||
|
~ComputeShader();
|
||||||
|
|
||||||
|
bool Compile(int numVertexElements, int numVaryingElements);
|
||||||
|
|
||||||
|
GLuint GetTableUniform(int table) const {
|
||||||
|
return _tableUniforms[table];
|
||||||
|
}
|
||||||
|
GLuint GetVertexUniform() const { return _vertexUniform; }
|
||||||
|
GLuint GetVaryingUniform() const { return _varyingUniform; }
|
||||||
|
|
||||||
|
void ApplyCatmarkFaceVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int F_IT_ofs, int F_ITa_ofs, int offset, int start, int end);
|
||||||
|
|
||||||
|
void ApplyCatmarkEdgeVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int E_IT_ofs, int E_W_ofs, int offset, int start, int end);
|
||||||
|
|
||||||
|
void ApplyCatmarkVertexVerticesKernelB(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end);
|
||||||
|
|
||||||
|
void ApplyCatmarkVertexVerticesKernelA(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end);
|
||||||
|
|
||||||
|
void ApplyLoopEdgeVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int E_IT_ofs, int E_W_ofs, int offset, int start, int end);
|
||||||
|
|
||||||
|
void ApplyLoopVertexVerticesKernelB(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end);
|
||||||
|
|
||||||
|
void ApplyLoopVertexVerticesKernelA(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end);
|
||||||
|
|
||||||
|
void UseProgram () const {
|
||||||
|
glUseProgram(_program);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Match {
|
||||||
|
Match(int numVertexElements, int numVaryingElements) :
|
||||||
|
_numVertexElements(numVertexElements), _numVaryingElements(numVaryingElements) {}
|
||||||
|
bool operator() (const ComputeShader &shader) {
|
||||||
|
return (shader._numVertexElements == _numVertexElements
|
||||||
|
&& shader._numVaryingElements == _numVaryingElements);
|
||||||
|
}
|
||||||
|
int _numVertexElements, _numVaryingElements;
|
||||||
|
};
|
||||||
|
|
||||||
|
friend class Match;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void transformGpuBufferData(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||||
|
GLint offset, int start, int end) const;
|
||||||
|
|
||||||
|
int _numVertexElements;
|
||||||
|
int _numVaryingElements;
|
||||||
|
|
||||||
|
GLuint _program;
|
||||||
|
|
||||||
|
GLuint _uniformVertexPass;
|
||||||
|
GLuint _uniformIndexStart;
|
||||||
|
GLuint _uniformIndexOffset;
|
||||||
|
|
||||||
|
GLuint _vertexUniform,
|
||||||
|
_varyingUniform;
|
||||||
|
|
||||||
|
// shader locations
|
||||||
|
GLuint _subComputeFace, _subComputeEdge, _subComputeVertexA, _subComputeVertexB;
|
||||||
|
GLuint _subComputeLoopVertexB;
|
||||||
|
|
||||||
|
std::vector<GLuint> _tableUniforms;
|
||||||
|
std::vector<GLuint> _tableOffsetUniforms;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
void bindTextureBuffer(GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const;
|
void bindTextureBuffer(GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const;
|
||||||
|
|
||||||
void unbindTextureBuffer(int unit) const;
|
void unbindTextureBuffer(int unit) const;
|
||||||
|
|
||||||
void transformGpuBufferData(GLuint kernel, GLint offset, int start, int end, bool vertexPass=false) const;
|
ComputeShader * _shader;
|
||||||
|
|
||||||
bool compile(const char *shaderSource, const char *shaderDefine);
|
|
||||||
|
|
||||||
GLuint _prgKernel;
|
|
||||||
|
|
||||||
int _numVertexElements,
|
|
||||||
_numVarying;
|
|
||||||
|
|
||||||
GLuint _vertexBuffer,
|
|
||||||
_varyingBuffer;
|
|
||||||
|
|
||||||
// texture for vertex
|
// texture for vertex
|
||||||
GLuint _vertexTexture,
|
GLuint _vertexTexture,
|
||||||
_varyingTexture;
|
_varyingTexture;
|
||||||
|
|
||||||
GLuint _vertexUniform,
|
OsdGpuVertexBuffer *_currentVertexBuffer,
|
||||||
_varyingUniform;
|
*_currentVaryingBuffer;
|
||||||
|
|
||||||
// table buffers
|
// table buffers
|
||||||
std::vector<GLuint> _tableBuffers;
|
std::vector<GLuint> _tableBuffers;
|
||||||
std::vector<GLuint> _tableTextures;
|
std::vector<GLuint> _tableTextures;
|
||||||
std::vector<GLuint> _tableUniforms;
|
|
||||||
std::vector<GLuint> _tableOffsetUniforms;
|
|
||||||
|
|
||||||
GLuint uniformVertexPass;
|
// static shader registry (XXX tentative..)
|
||||||
GLuint uniformIndexStart;
|
static std::vector<ComputeShader> shaderRegistry;
|
||||||
GLuint uniformIndexOffset;
|
|
||||||
|
|
||||||
// shader locations
|
|
||||||
GLuint subComputeFace, subComputeEdge, subComputeVertexA, subComputeVertexB;
|
|
||||||
GLuint subComputeLoopVertexB;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace OPENSUBDIV_VERSION
|
} // end namespace OPENSUBDIV_VERSION
|
||||||
|
@ -82,9 +82,9 @@ public:
|
|||||||
virtual void CopyTable(int tableIndex, size_t size, const void *ptr) = 0;
|
virtual void CopyTable(int tableIndex, size_t size, const void *ptr) = 0;
|
||||||
|
|
||||||
|
|
||||||
virtual void BeginLaunchKernel() = 0;
|
virtual void OnKernelLaunch() = 0;
|
||||||
|
|
||||||
virtual void EndLaunchKernel() = 0;
|
virtual void OnKernelFinish() = 0;
|
||||||
|
|
||||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count) = 0;
|
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count) = 0;
|
||||||
|
|
||||||
|
@ -102,7 +102,6 @@ OsdMesh::Create(OsdHbrMesh *hbrMesh, int level, const std::string &kernel) {
|
|||||||
|
|
||||||
OSD_DEBUG("PREP: NumCoarseVertex = %d\n", _mMesh->GetNumCoarseVertices());
|
OSD_DEBUG("PREP: NumCoarseVertex = %d\n", _mMesh->GetNumCoarseVertices());
|
||||||
OSD_DEBUG("PREP: NumVertex = %d\n", _mMesh->GetNumVertices());
|
OSD_DEBUG("PREP: NumVertex = %d\n", _mMesh->GetNumVertices());
|
||||||
OSD_DEBUG("PREP: NumTables = %d\n", _mMesh->GetNumSubdivisionTables());
|
|
||||||
|
|
||||||
const FarSubdivisionTables<OsdVertex>* table = _mMesh->GetSubdivision();
|
const FarSubdivisionTables<OsdVertex>* table = _mMesh->GetSubdivision();
|
||||||
|
|
||||||
@ -145,11 +144,11 @@ OsdMesh::Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
|||||||
|
|
||||||
_dispatcher->BindVertexBuffer(vertex, varying);
|
_dispatcher->BindVertexBuffer(vertex, varying);
|
||||||
|
|
||||||
_dispatcher->BeginLaunchKernel();
|
_dispatcher->OnKernelLaunch();
|
||||||
|
|
||||||
_mMesh->Subdivide(_level+1);
|
_mMesh->Subdivide(_level+1);
|
||||||
|
|
||||||
_dispatcher->EndLaunchKernel();
|
_dispatcher->OnKernelFinish();
|
||||||
|
|
||||||
_dispatcher->UnbindVertexBuffer();
|
_dispatcher->UnbindVertexBuffer();
|
||||||
}
|
}
|
||||||
|
@ -95,7 +95,13 @@ public:
|
|||||||
|
|
||||||
OsdVertexBuffer *InitializeVertexBuffer(int numElements);
|
OsdVertexBuffer *InitializeVertexBuffer(int numElements);
|
||||||
|
|
||||||
void Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
// for non-interleaved vertex data
|
||||||
|
void Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying = NULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
// for interleaved vertex data ?
|
||||||
|
template <class T> void Subdivide(T *vertex) { }
|
||||||
|
*/
|
||||||
|
|
||||||
void Synchronize();
|
void Synchronize();
|
||||||
|
|
||||||
|
@ -9,69 +9,83 @@ namespace OPENSUBDIV_VERSION {
|
|||||||
|
|
||||||
class OsdVertexBuffer {
|
class OsdVertexBuffer {
|
||||||
public:
|
public:
|
||||||
|
OsdVertexBuffer(int numElements) : _numElements(numElements) {}
|
||||||
virtual ~OsdVertexBuffer() {}
|
virtual ~OsdVertexBuffer() {}
|
||||||
virtual void UpdateData(const float *src, int count) = 0;
|
|
||||||
|
virtual void UpdateData(const float *src, int numVertices) = 0;
|
||||||
|
|
||||||
virtual GLuint GetGpuBuffer() = 0;
|
virtual GLuint GetGpuBuffer() = 0;
|
||||||
|
|
||||||
|
int GetNumElements() const {
|
||||||
|
return _numElements;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int _numElements;
|
||||||
};
|
};
|
||||||
|
|
||||||
class OsdGpuVertexBuffer : public OsdVertexBuffer {
|
class OsdGpuVertexBuffer : public OsdVertexBuffer {
|
||||||
public:
|
public:
|
||||||
OsdGpuVertexBuffer(int numElements, int count) : _vbo(0), _numElements(numElements) {
|
OsdGpuVertexBuffer(int numElements, int numVertices) : OsdVertexBuffer(numElements), _vbo(0) {
|
||||||
int stride = numElements * count * sizeof(float);
|
int size = numElements * numVertices * sizeof(float);
|
||||||
glGenBuffers(1, &_vbo);
|
glGenBuffers(1, &_vbo);
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
|
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
|
||||||
glBufferData(GL_ARRAY_BUFFER, stride, 0, GL_STREAM_DRAW);
|
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_STREAM_DRAW);
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||||
}
|
}
|
||||||
virtual ~OsdGpuVertexBuffer() {
|
virtual ~OsdGpuVertexBuffer() {
|
||||||
glDeleteBuffers(1, &_vbo);
|
glDeleteBuffers(1, &_vbo);
|
||||||
}
|
}
|
||||||
virtual void UpdateData(const float *src, int count) {
|
|
||||||
|
virtual void UpdateData(const float *src, int numVertices) {
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
|
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
|
||||||
float * pointer = (float*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
|
float * pointer = (float*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
|
||||||
memcpy(pointer, src, _numElements * count * sizeof(float));
|
memcpy(pointer, src, _numElements * numVertices * sizeof(float));
|
||||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual GLuint GetGpuBuffer() {
|
virtual GLuint GetGpuBuffer() {
|
||||||
return _vbo;
|
return _vbo;
|
||||||
}
|
}
|
||||||
private:
|
|
||||||
|
protected:
|
||||||
GLuint _vbo;
|
GLuint _vbo;
|
||||||
int _numElements;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class OsdCpuVertexBuffer : public OsdVertexBuffer {
|
class OsdCpuVertexBuffer : public OsdVertexBuffer {
|
||||||
public:
|
public:
|
||||||
OsdCpuVertexBuffer(int numElements, int count) : _cpuVbo(NULL), _vboSize(0), _numElements(numElements), _vbo(0) {
|
OsdCpuVertexBuffer(int numElements, int numVertices) : OsdVertexBuffer(numElements), _cpuVbo(NULL), _vboSize(0), _vbo(0) {
|
||||||
_cpuVbo = new float[numElements * count];
|
_vboSize = numElements * numVertices;
|
||||||
_vboSize = numElements * count;
|
_cpuVbo = new float[numElements * numVertices];
|
||||||
}
|
}
|
||||||
virtual ~OsdCpuVertexBuffer() {
|
virtual ~OsdCpuVertexBuffer() {
|
||||||
if(_cpuVbo) delete[] _cpuVbo;
|
delete [] _cpuVbo;
|
||||||
if(_vbo) glDeleteBuffers(1, &_vbo);
|
if (_vbo)
|
||||||
|
glDeleteBuffers(1, &_vbo);
|
||||||
}
|
}
|
||||||
virtual void UpdateData(const float *src, int count) {
|
|
||||||
memcpy(_cpuVbo, src, _numElements * count * sizeof(float));
|
virtual void UpdateData(const float *src, int numVertices) {
|
||||||
|
memcpy(_cpuVbo, src, _numElements * numVertices * sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
float *GetCpuBuffer() {
|
float *GetCpuBuffer() {
|
||||||
return _cpuVbo;
|
return _cpuVbo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// XXX: this method name is missleading
|
||||||
virtual GLuint GetGpuBuffer() {
|
virtual GLuint GetGpuBuffer() {
|
||||||
if(!_vbo) glGenBuffers(1, &_vbo);
|
if (!_vbo)
|
||||||
|
glGenBuffers(1, &_vbo);
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
|
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
|
||||||
glBufferData(GL_ARRAY_BUFFER, _vboSize * sizeof(float), _cpuVbo, GL_STREAM_DRAW);
|
glBufferData(GL_ARRAY_BUFFER, _vboSize * sizeof(float), _cpuVbo, GL_STREAM_DRAW);
|
||||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||||
return _vbo;
|
return _vbo;
|
||||||
}
|
}
|
||||||
|
|
||||||
int GetNumElements() const {
|
protected:
|
||||||
return _numElements;
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
float *_cpuVbo;
|
float *_cpuVbo;
|
||||||
int _vboSize;
|
int _vboSize;
|
||||||
int _numElements;
|
|
||||||
GLuint _vbo;
|
GLuint _vbo;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user