mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2024-12-03 16:31:04 +00:00
Added OsdVertexBuffer class. OsdMesh no longer has vertex buffer and vertex
specification (how many elements exists in the buffer). client will create OsdVertexBuffer and provide it as an argument of OsdMesh::Subdivide() function. It would be more flexible and hopefully matches various use cases. Since each dispatcher has to accept arbitrary vertex buffer, introduced a simple shader registry into glslDispatcher. It will configure shaders for given vertex elements on demand (for now, just works only for varying buffer). Fixed cuda kernel's GL resource leakage. Since cuda GL interop seems one-way, OsdCudaVertexBuffer manages vertex updating instead of just using OsdGpuVertexBuffer. Cleaned up some kernel codes and renamed ambiguous names.
This commit is contained in:
parent
29b85a0e8c
commit
851c00d04c
@ -66,29 +66,33 @@
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
OsdCpuKernelDispatcher::DeviceTable::~DeviceTable() {
|
||||
OsdCpuKernelDispatcher::SubdivisionTable::~SubdivisionTable() {
|
||||
|
||||
if (devicePtr)
|
||||
free(devicePtr);
|
||||
if (ptr)
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::DeviceTable::Copy( int size, const void *table ) {
|
||||
OsdCpuKernelDispatcher::SubdivisionTable::Copy( int size, const void *table ) {
|
||||
|
||||
if (size > 0) {
|
||||
if (devicePtr)
|
||||
free(devicePtr);
|
||||
devicePtr = malloc(size);
|
||||
memcpy(devicePtr, table, size);
|
||||
if (ptr)
|
||||
free(ptr);
|
||||
ptr = malloc(size);
|
||||
memcpy(ptr, table, size);
|
||||
}
|
||||
}
|
||||
|
||||
OsdCpuKernelDispatcher::OsdCpuKernelDispatcher( int levels )
|
||||
: OsdKernelDispatcher(levels) {
|
||||
: OsdKernelDispatcher(levels), _currentVertexBuffer(NULL), _currentVaryingBuffer(NULL), _vdesc(NULL) {
|
||||
_tables.resize(TABLE_MAX);
|
||||
}
|
||||
|
||||
OsdCpuKernelDispatcher::~OsdCpuKernelDispatcher() { }
|
||||
OsdCpuKernelDispatcher::~OsdCpuKernelDispatcher() {
|
||||
|
||||
if (_vdesc)
|
||||
delete _vdesc;
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) {
|
||||
@ -96,30 +100,37 @@ OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr)
|
||||
_tables[tableIndex].Copy(size, ptr);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::BeginLaunchKernel() { }
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::EndLaunchKernel() { }
|
||||
|
||||
OsdVertexBuffer *
|
||||
OsdCpuKernelDispatcher::InitializeVertexBuffer(int numElements, int count)
|
||||
OsdCpuKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices)
|
||||
{
|
||||
return new OsdCpuVertexBuffer(numElements, count);
|
||||
return new OsdCpuVertexBuffer(numElements, numVertices);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
||||
|
||||
_vertexBuffer = dynamic_cast<OsdCpuVertexBuffer *>(vertex);
|
||||
_varyingBuffer = dynamic_cast<OsdCpuVertexBuffer *>(varying);
|
||||
if (vertex)
|
||||
_currentVertexBuffer = dynamic_cast<OsdCpuVertexBuffer *>(vertex);
|
||||
else
|
||||
_currentVertexBuffer = NULL;
|
||||
|
||||
if (varying)
|
||||
_currentVaryingBuffer = dynamic_cast<OsdCpuVertexBuffer *>(varying);
|
||||
else
|
||||
_currentVaryingBuffer = NULL;
|
||||
|
||||
_vdesc = new VertexDescriptor(_currentVertexBuffer ? _currentVertexBuffer->GetNumElements() : 0,
|
||||
_currentVaryingBuffer ? _currentVaryingBuffer->GetNumElements() : 0);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::UnbindVertexBuffer()
|
||||
{
|
||||
_vertexBuffer = NULL;
|
||||
_varyingBuffer = NULL;
|
||||
delete _vdesc;
|
||||
_vdesc = NULL;
|
||||
|
||||
_currentVertexBuffer = NULL;
|
||||
_currentVaryingBuffer = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
@ -129,21 +140,17 @@ OsdCpuKernelDispatcher::Synchronize() { }
|
||||
void
|
||||
OsdCpuKernelDispatcher::ApplyBilinearFaceVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
VertexDescriptor vd(_numVertexElements, _numVaryingElements);
|
||||
|
||||
computeFace(&vd, _vbo, _varyingVbo,
|
||||
(int*)_tables[F_IT].devicePtr + _tableOffsets[F_IT][level-1],
|
||||
(int*)_tables[F_ITa].devicePtr + _tableOffsets[F_ITa][level-1],
|
||||
computeFace(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||
(int*)_tables[F_IT].ptr + _tableOffsets[F_IT][level-1],
|
||||
(int*)_tables[F_ITa].ptr + _tableOffsets[F_ITa][level-1],
|
||||
offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
VertexDescriptor vd(_numVertexElements, _numVaryingElements);
|
||||
|
||||
computeBilinearEdge(&vd, _vbo, _varyingVbo,
|
||||
(int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1],
|
||||
computeBilinearEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||
(int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1],
|
||||
offset,
|
||||
start, end);
|
||||
}
|
||||
@ -151,38 +158,26 @@ OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh<OsdVertex> * me
|
||||
void
|
||||
OsdCpuKernelDispatcher::ApplyBilinearVertexVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
VertexDescriptor vd(_numVertexElements, _numVaryingElements);
|
||||
|
||||
computeBilinearVertex(&vd, _vbo, _varyingVbo,
|
||||
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
|
||||
computeBilinearVertex(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
|
||||
offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::ApplyCatmarkFaceVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
||||
|
||||
computeFace(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
||||
(int*)_tables[F_IT].devicePtr + _tableOffsets[F_IT][level-1],
|
||||
(int*)_tables[F_ITa].devicePtr + _tableOffsets[F_ITa][level-1],
|
||||
computeFace(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||
(int*)_tables[F_IT].ptr + _tableOffsets[F_IT][level-1],
|
||||
(int*)_tables[F_ITa].ptr + _tableOffsets[F_ITa][level-1],
|
||||
offset, start, end);
|
||||
|
||||
float *p = _vertexBuffer->GetCpuBuffer();
|
||||
for(int i = 0; i < 150; i+=3){
|
||||
printf("%f %f %f\n", p[0], p[1], p[2]);
|
||||
p+=3;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
||||
|
||||
computeEdge(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
||||
(int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1],
|
||||
(float*)_tables[E_W].devicePtr + _tableOffsets[E_W][level-1],
|
||||
computeEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||
(int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1],
|
||||
(float*)_tables[E_W].ptr + _tableOffsets[E_W][level-1],
|
||||
offset,
|
||||
start, end);
|
||||
}
|
||||
@ -190,34 +185,28 @@ OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh<OsdVertex> * mes
|
||||
void
|
||||
OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelB( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
||||
|
||||
computeVertexB(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
||||
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
|
||||
(int*)_tables[V_IT].devicePtr + _tableOffsets[V_IT][level-1],
|
||||
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
|
||||
computeVertexB(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
|
||||
(int*)_tables[V_IT].ptr + _tableOffsets[V_IT][level-1],
|
||||
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
|
||||
offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelA( FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
||||
|
||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
||||
|
||||
computeVertexA(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
||||
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
|
||||
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
|
||||
computeVertexA(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
|
||||
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
|
||||
offset, start, end, pass);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
||||
|
||||
computeEdge(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
||||
(int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1],
|
||||
(float*)_tables[E_W].devicePtr + _tableOffsets[E_W][level-1],
|
||||
computeEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||
(int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1],
|
||||
(float*)_tables[E_W].ptr + _tableOffsets[E_W][level-1],
|
||||
offset,
|
||||
start, end);
|
||||
}
|
||||
@ -225,28 +214,23 @@ OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh<OsdVertex> * mesh,
|
||||
void
|
||||
OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelB( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
||||
|
||||
computeLoopVertexB(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
||||
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
|
||||
(int*)_tables[V_IT].devicePtr + _tableOffsets[V_IT][level-1],
|
||||
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
|
||||
computeLoopVertexB(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
|
||||
(int*)_tables[V_IT].ptr + _tableOffsets[V_IT][level-1],
|
||||
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
|
||||
offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelA( FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
||||
|
||||
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
|
||||
|
||||
computeVertexA(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
|
||||
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
|
||||
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
|
||||
computeVertexA(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
|
||||
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
|
||||
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
|
||||
offset, start, end, pass);
|
||||
}
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
} // end namespace OpenSubdiv
|
||||
|
||||
|
@ -70,6 +70,8 @@
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
class VertexDescriptor;
|
||||
|
||||
class OsdCpuKernelDispatcher : public OsdKernelDispatcher
|
||||
{
|
||||
public:
|
||||
@ -102,11 +104,11 @@ public:
|
||||
|
||||
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
|
||||
|
||||
virtual void BeginLaunchKernel();
|
||||
virtual void OnKernelLaunch() {}
|
||||
|
||||
virtual void EndLaunchKernel();
|
||||
virtual void OnKernelFinish() {}
|
||||
|
||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count);
|
||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices);
|
||||
|
||||
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
||||
|
||||
@ -123,18 +125,26 @@ public:
|
||||
|
||||
protected:
|
||||
|
||||
struct DeviceTable {
|
||||
DeviceTable() : devicePtr(NULL) { }
|
||||
~DeviceTable();
|
||||
struct SubdivisionTable {
|
||||
SubdivisionTable() : ptr(NULL) { }
|
||||
|
||||
~SubdivisionTable();
|
||||
|
||||
void Copy(int size, const void *ptr);
|
||||
|
||||
void *devicePtr;
|
||||
void *ptr;
|
||||
};
|
||||
|
||||
OsdCpuVertexBuffer *_vertexBuffer, *_varyingBuffer;
|
||||
float *GetVertexBuffer() const { return _currentVertexBuffer ? _currentVertexBuffer->GetCpuBuffer() : NULL; }
|
||||
|
||||
std::vector<DeviceTable> _tables;
|
||||
float *GetVaryingBuffer() const { return _currentVaryingBuffer ? _currentVaryingBuffer->GetCpuBuffer() : NULL; }
|
||||
|
||||
OsdCpuVertexBuffer *_currentVertexBuffer,
|
||||
*_currentVaryingBuffer;
|
||||
|
||||
VertexDescriptor *_vdesc;
|
||||
|
||||
std::vector<SubdivisionTable> _tables;
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -64,9 +64,6 @@ namespace OPENSUBDIV_VERSION {
|
||||
|
||||
void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying, const int *F_IT, const int *F_ITa, int offset, int start, int end) {
|
||||
|
||||
int ve = vdesc->numVertexElements;
|
||||
int vev = vdesc->numVaryingElements;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
@ -77,23 +74,19 @@ void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying
|
||||
float weight = 1.0f/n;
|
||||
|
||||
// XXX: should use local vertex struct variable instead of accumulating directly into global memory.
|
||||
float *dst = &vertex[(offset + i)*ve];
|
||||
float *dstVarying = &varying[(offset + i)*vev];
|
||||
vdesc->Clear(dst, dstVarying);
|
||||
int dstIndex = offset + i;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
|
||||
for (int j=0; j<n; ++j) {
|
||||
int index = F_IT[h+j];
|
||||
vdesc->AddWithWeight(dst, &vertex[index*ve], weight);
|
||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[index*vev], weight);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, index, weight);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, index, weight);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, const float *E_W, int offset, int start, int end) {
|
||||
|
||||
int ve = vdesc->numVertexElements;
|
||||
int vev = vdesc->numVaryingElements;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
@ -105,30 +98,26 @@ void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying,
|
||||
|
||||
float vertWeight = E_W[i*2+0];
|
||||
|
||||
float *dst = &vertex[(offset+i)*ve];
|
||||
float *dstVarying = &varying[(offset+i)*vev];
|
||||
vdesc->Clear(dst, dstVarying);
|
||||
int dstIndex = offset + i;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
|
||||
vdesc->AddWithWeight(dst, &vertex[eidx0*ve], vertWeight);
|
||||
vdesc->AddWithWeight(dst, &vertex[eidx1*ve], vertWeight);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx0, vertWeight);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx1, vertWeight);
|
||||
|
||||
if (eidx2 != -1) {
|
||||
float faceWeight = E_W[i*2+1];
|
||||
|
||||
vdesc->AddWithWeight(dst, &vertex[eidx2*ve], faceWeight);
|
||||
vdesc->AddWithWeight(dst, &vertex[eidx3*ve], faceWeight);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx2, faceWeight);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx3, faceWeight);
|
||||
}
|
||||
|
||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx0*vev], 0.5f);
|
||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx1*vev], 0.5f);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||
}
|
||||
}
|
||||
|
||||
void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const float *V_W, int offset, int start, int end, int pass) {
|
||||
|
||||
int ve = vdesc->numVertexElements;
|
||||
int vev = vdesc->numVaryingElements;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
@ -146,29 +135,25 @@ void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying
|
||||
if (weight>0.0f && weight<1.0f && n > 0)
|
||||
weight=1.0f-weight;
|
||||
|
||||
float *dst = &vertex[(offset+i)*ve];
|
||||
float *dstVarying = &varying[(offset+i)*vev];
|
||||
int dstIndex = offset + i;
|
||||
if(not pass)
|
||||
vdesc->Clear(dst, dstVarying);
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
|
||||
if (eidx0==-1 || (pass==0 && (n==-1)) ) {
|
||||
vdesc->AddWithWeight(dst, &vertex[p*ve], weight);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, p, weight);
|
||||
} else {
|
||||
vdesc->AddWithWeight(dst, &vertex[p*ve], weight * 0.75f);
|
||||
vdesc->AddWithWeight(dst, &vertex[eidx0*ve], weight * 0.125f);
|
||||
vdesc->AddWithWeight(dst, &vertex[eidx1*ve], weight * 0.125f);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, p, weight * 0.75f);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f);
|
||||
}
|
||||
|
||||
if (not pass)
|
||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) {
|
||||
|
||||
int ve = vdesc->numVertexElements;
|
||||
int vev = vdesc->numVaryingElements;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
@ -181,25 +166,21 @@ void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying
|
||||
float wp = 1.0f/float(n*n);
|
||||
float wv = (n-2.0f) * n * wp;
|
||||
|
||||
float *dst = &vertex[(offset+i)*ve];
|
||||
float *dstVarying = &varying[(offset+i)*vev];
|
||||
vdesc->Clear(dst, dstVarying);
|
||||
int dstIndex = offset + i;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
|
||||
vdesc->AddWithWeight(dst, &vertex[p*ve], weight * wv);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, p, weight * wv);
|
||||
|
||||
for (int j = 0; j < n; ++j) {
|
||||
vdesc->AddWithWeight(dst, &vertex[V_IT[h+j*2]*ve], weight * wp);
|
||||
vdesc->AddWithWeight(dst, &vertex[V_IT[h+j*2+1]*ve], weight * wp);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
|
||||
}
|
||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) {
|
||||
|
||||
int ve = vdesc->numVertexElements;
|
||||
int vev = vdesc->numVaryingElements;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
@ -214,24 +195,20 @@ void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *var
|
||||
beta = beta * beta;
|
||||
beta = (0.625f - beta) * wp;
|
||||
|
||||
float *dst = &vertex[(offset+i)*ve];
|
||||
float *dstVarying = &varying[(offset+i)*vev];
|
||||
vdesc->Clear(dst, dstVarying);
|
||||
int dstIndex = offset + i;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
|
||||
vdesc->AddWithWeight(dst, &vertex[p*ve], weight * (1.0f - (beta * n)));
|
||||
vdesc->AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
|
||||
|
||||
for (int j = 0; j < n; ++j)
|
||||
vdesc->AddWithWeight(dst, &vertex[V_IT[h+j]*ve], weight * beta);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
|
||||
|
||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0f);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, int offset, int start, int end) {
|
||||
|
||||
int ve = vdesc->numVertexElements;
|
||||
int vev = vdesc->numVaryingElements;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
@ -239,35 +216,30 @@ void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *va
|
||||
int eidx0 = E_IT[2*i+0];
|
||||
int eidx1 = E_IT[2*i+1];
|
||||
|
||||
float *dst = &vertex[(offset+i)*ve];
|
||||
float *dstVarying = &varying[(offset+i)*vev];
|
||||
vdesc->Clear(dst, dstVarying);
|
||||
int dstIndex = offset + i;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
|
||||
|
||||
vdesc->AddWithWeight(dst, &vertex[eidx0*ve], 0.5f);
|
||||
vdesc->AddWithWeight(dst, &vertex[eidx1*ve], 0.5f);
|
||||
|
||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx0*vev], 0.5f);
|
||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx1*vev], 0.5f);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
|
||||
}
|
||||
}
|
||||
|
||||
void computeBilinearVertex(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, int offset, int start, int end) {
|
||||
|
||||
int ve = vdesc->numVertexElements;
|
||||
int vev = vdesc->numVaryingElements;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int i = start; i < end; i++) {
|
||||
int p = V_ITa[i];
|
||||
|
||||
float *dst = &vertex[(offset+i)*ve];
|
||||
float *dstVarying = &varying[(offset+i)*vev];
|
||||
vdesc->Clear(dst, dstVarying);
|
||||
int dstIndex = offset + i;
|
||||
vdesc->Clear(vertex, varying, dstIndex);
|
||||
|
||||
vdesc->AddWithWeight(dst, &vertex[p*ve], 1.0f);
|
||||
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0f);
|
||||
vdesc->AddWithWeight(vertex, dstIndex, p, 1.0f);
|
||||
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -67,20 +67,28 @@ struct VertexDescriptor {
|
||||
VertexDescriptor(int numVertexElem, int numVaryingElem)
|
||||
: numVertexElements(numVertexElem), numVaryingElements(numVaryingElem) { }
|
||||
|
||||
void Clear(float *vertex, float *varying) const {
|
||||
for (int i = 0; i < numVertexElements; ++i)
|
||||
vertex[i] = 0.0f;
|
||||
void Clear(float *vertex, float *varying, int index) const {
|
||||
if (vertex) {
|
||||
for (int i = 0; i < numVertexElements; ++i)
|
||||
vertex[index*numVertexElements+i] = 0.0f;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numVaryingElements; ++i)
|
||||
varying[i] = 0.0f;
|
||||
if (varying) {
|
||||
for (int i = 0; i < numVaryingElements; ++i)
|
||||
varying[index*numVaryingElements+i] = 0.0f;
|
||||
}
|
||||
}
|
||||
void AddWithWeight(float *vertex, const float *src, float weight) const {
|
||||
void AddWithWeight(float *vertex, int dstIndex, int srcIndex, float weight) const {
|
||||
int d = dstIndex * numVertexElements;
|
||||
int s = srcIndex * numVertexElements;
|
||||
for (int i = 0; i < numVertexElements; ++i)
|
||||
vertex[i] += src[i] * weight;
|
||||
vertex[d++] += vertex[s++] * weight;
|
||||
}
|
||||
void AddVaryingWithWeight(float *varying, const float *src, float weight) const {
|
||||
void AddVaryingWithWeight(float *varying, int dstIndex, int srcIndex, float weight) const {
|
||||
int d = dstIndex * numVaryingElements;
|
||||
int s = srcIndex * numVaryingElements;
|
||||
for (int i = 0; i < numVaryingElements; ++i)
|
||||
varying[i] += src[i] * weight;
|
||||
varying[d++] += varying[s++] * weight;
|
||||
}
|
||||
|
||||
int numVertexElements;
|
||||
|
@ -81,7 +81,42 @@ void OsdCudaComputeBilinearVertex(float *vertex, float *varying, int numUserVert
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
OsdCudaVertexBuffer::OsdCudaVertexBuffer(int numElements, int numVertices) :
|
||||
OsdGpuVertexBuffer(numElements, numVertices) {
|
||||
|
||||
// register vbo as cuda resource
|
||||
cudaGraphicsGLRegisterBuffer(&_cudaResource, _vbo, cudaGraphicsMapFlagsNone);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaVertexBuffer::UpdateData(const float *src, int numVertices) {
|
||||
|
||||
void *dst = Map();
|
||||
cudaMemcpy(dst, src, _numElements * numVertices * sizeof(float), cudaMemcpyHostToDevice);
|
||||
Unmap();
|
||||
}
|
||||
|
||||
void *
|
||||
OsdCudaVertexBuffer::Map() {
|
||||
|
||||
size_t num_bytes;
|
||||
void *ptr;
|
||||
|
||||
cudaGraphicsMapResources(1, &_cudaResource, 0);
|
||||
cudaGraphicsResourceGetMappedPointer(&ptr, &num_bytes, _cudaResource);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaVertexBuffer::Unmap() {
|
||||
cudaGraphicsUnmapResources(1, &_cudaResource, 0);
|
||||
}
|
||||
|
||||
OsdCudaVertexBuffer::~OsdCudaVertexBuffer() {
|
||||
cudaGraphicsUnregisterResource(_cudaResource);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------
|
||||
OsdCudaKernelDispatcher::DeviceTable::~DeviceTable() {
|
||||
|
||||
if (devicePtr) cudaFree(devicePtr);
|
||||
@ -95,18 +130,16 @@ OsdCudaKernelDispatcher::DeviceTable::Copy(int size, const void *ptr) {
|
||||
cudaMalloc(&devicePtr, size);
|
||||
cudaMemcpy(devicePtr, ptr, size, cudaMemcpyHostToDevice);
|
||||
}
|
||||
// -------------------------------------------------------------------------------
|
||||
|
||||
OsdCudaKernelDispatcher::OsdCudaKernelDispatcher(int levels)
|
||||
: OsdKernelDispatcher(levels),
|
||||
_cudaVertexResource(NULL),
|
||||
_cudaVaryingResource(NULL)
|
||||
: OsdKernelDispatcher(levels)
|
||||
{
|
||||
_tables.resize(TABLE_MAX);
|
||||
}
|
||||
|
||||
|
||||
OsdCudaKernelDispatcher::~OsdCudaKernelDispatcher() {
|
||||
cudaDeviceReset(); // XXX: necessary?
|
||||
}
|
||||
|
||||
void
|
||||
@ -115,48 +148,52 @@ OsdCudaKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr)
|
||||
_tables[tableIndex].Copy(size, ptr);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaKernelDispatcher::BeginLaunchKernel() { }
|
||||
|
||||
void
|
||||
OsdCudaKernelDispatcher::EndLaunchKernel() { }
|
||||
|
||||
OsdVertexBuffer *
|
||||
OsdCudaKernelDispatcher::InitializeVertexBuffer(int numElements, int count)
|
||||
OsdCudaKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices)
|
||||
{
|
||||
return new OsdGpuVertexBuffer(numElements, count);
|
||||
return new OsdCudaVertexBuffer(numElements, numVertices);
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
||||
|
||||
OsdGpuVertexBuffer *bVertex = dynamic_cast<OsdGpuVertexBuffer *>(vertex);
|
||||
OsdGpuVertexBuffer *bVarying = dynamic_cast<OsdGpuVertexBuffer *>(varying);
|
||||
size_t num_bytes;
|
||||
if (vertex)
|
||||
_currentVertexBuffer = dynamic_cast<OsdCudaVertexBuffer *>(vertex);
|
||||
else
|
||||
_currentVertexBuffer = NULL;
|
||||
|
||||
if (bVertex) {
|
||||
cudaGraphicsGLRegisterBuffer(&_cudaVertexResource, bVertex->GetGpuBuffer(), cudaGraphicsMapFlagsWriteDiscard);
|
||||
cudaGraphicsMapResources(1, &_cudaVertexResource, 0);
|
||||
cudaGraphicsResourceGetMappedPointer((void **)&_deviceVertices, &num_bytes, _cudaVertexResource);
|
||||
if (varying)
|
||||
_currentVaryingBuffer = dynamic_cast<OsdCudaVertexBuffer *>(varying);
|
||||
else
|
||||
_currentVaryingBuffer = NULL;
|
||||
|
||||
if (_currentVertexBuffer) {
|
||||
_deviceVertices = (float*)_currentVertexBuffer->Map();
|
||||
// XXX todo remove _numVertexElements
|
||||
_numVertexElements = _currentVertexBuffer->GetNumElements();
|
||||
} else {
|
||||
_numVertexElements = 0;
|
||||
}
|
||||
|
||||
if (bVarying) {
|
||||
cudaGraphicsGLRegisterBuffer(&_cudaVaryingResource, bVarying->GetGpuBuffer(), cudaGraphicsMapFlagsWriteDiscard);
|
||||
cudaGraphicsMapResources(1, &_cudaVaryingResource, 0);
|
||||
cudaGraphicsResourceGetMappedPointer((void **)&_deviceVaryings, &num_bytes, _cudaVaryingResource);
|
||||
if (_currentVaryingBuffer) {
|
||||
_deviceVaryings = (float*)_currentVaryingBuffer->Map();
|
||||
_numVaryingElements = _currentVaryingBuffer->GetNumElements();
|
||||
} else {
|
||||
_numVaryingElements = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OsdCudaKernelDispatcher::UnbindVertexBuffer()
|
||||
{
|
||||
if (_cudaVertexResource)
|
||||
cudaGraphicsUnmapResources(1, &_cudaVertexResource, 0);
|
||||
if (_cudaVaryingResource)
|
||||
cudaGraphicsUnmapResources(1, &_cudaVertexResource, 0);
|
||||
if (_currentVertexBuffer){
|
||||
_currentVertexBuffer->Unmap();
|
||||
}
|
||||
if (_currentVaryingBuffer)
|
||||
_currentVaryingBuffer->Unmap();
|
||||
|
||||
_cudaVertexResource = NULL;
|
||||
_cudaVaryingResource = NULL;
|
||||
_currentVertexBuffer = NULL;
|
||||
_currentVaryingBuffer = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -59,6 +59,7 @@
|
||||
|
||||
#include "../version.h"
|
||||
#include "../osd/kernelDispatcher.h"
|
||||
#include "../osd/vertexBuffer.h"
|
||||
|
||||
#include <GL/glew.h>
|
||||
|
||||
@ -72,6 +73,19 @@ namespace OPENSUBDIV_VERSION {
|
||||
|
||||
struct DeviceVertex;
|
||||
|
||||
class OsdCudaVertexBuffer : public OsdGpuVertexBuffer {
|
||||
public:
|
||||
OsdCudaVertexBuffer(int numElements, int numVertices);
|
||||
virtual ~OsdCudaVertexBuffer();
|
||||
|
||||
virtual void UpdateData(const float *src, int numVertices);
|
||||
void * Map();
|
||||
void Unmap();
|
||||
|
||||
protected:
|
||||
cudaGraphicsResource *_cudaResource;
|
||||
};
|
||||
|
||||
class OsdCudaKernelDispatcher : public OsdKernelDispatcher
|
||||
{
|
||||
public:
|
||||
@ -106,11 +120,11 @@ public:
|
||||
|
||||
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
|
||||
|
||||
virtual void BeginLaunchKernel();
|
||||
|
||||
virtual void EndLaunchKernel();
|
||||
virtual void OnKernelLaunch() {}
|
||||
|
||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count);
|
||||
virtual void OnKernelFinish() {}
|
||||
|
||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices);
|
||||
|
||||
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
||||
|
||||
@ -138,8 +152,8 @@ protected:
|
||||
|
||||
std::vector<DeviceTable> _tables;
|
||||
|
||||
cudaGraphicsResource *_cudaVertexResource,
|
||||
*_cudaVaryingResource;
|
||||
OsdCudaVertexBuffer *_currentVertexBuffer,
|
||||
*_currentVaryingBuffer;
|
||||
|
||||
float *_deviceVertices,
|
||||
*_deviceVaryings;
|
||||
|
@ -61,6 +61,8 @@
|
||||
#include <GL/glew.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
|
||||
#define OPT_E0_IT_VEC4
|
||||
#define OPT_E0_S_VEC2
|
||||
@ -88,65 +90,28 @@ static const char *shaderDefines = ""
|
||||
#endif
|
||||
;
|
||||
|
||||
std::vector<OsdGlslKernelDispatcher::ComputeShader> OsdGlslKernelDispatcher::shaderRegistry;
|
||||
|
||||
OsdGlslKernelDispatcher::OsdGlslKernelDispatcher(int levels)
|
||||
: OsdKernelDispatcher(levels)
|
||||
{
|
||||
_vertexBuffer = 0;
|
||||
_varyingBuffer = 0;
|
||||
_prgKernel = 0;
|
||||
_currentVertexBuffer = 0;
|
||||
_currentVaryingBuffer = 0;
|
||||
_shader = 0;
|
||||
|
||||
glGenTextures(1, &_vertexTexture);
|
||||
glGenTextures(1, &_varyingTexture);
|
||||
|
||||
_tableBuffers.resize(TABLE_MAX);
|
||||
_tableTextures.resize(TABLE_MAX);
|
||||
_tableUniforms.resize(TABLE_MAX);
|
||||
_tableOffsetUniforms.resize(TABLE_MAX);
|
||||
|
||||
glGenBuffers(TABLE_MAX, &_tableBuffers[0]);
|
||||
glGenTextures(TABLE_MAX, &_tableTextures[0]);
|
||||
|
||||
subComputeFace = 0;
|
||||
subComputeEdge = 0;
|
||||
subComputeVertexA = 0;
|
||||
subComputeVertexB = 0;
|
||||
uniformVertexPass = 0;
|
||||
uniformIndexStart = 0;
|
||||
uniformIndexOffset = 0;
|
||||
|
||||
compile(shaderSource, shaderDefines);
|
||||
|
||||
subComputeFace = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeFace");
|
||||
subComputeEdge = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeEdge");
|
||||
subComputeVertexA = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeVertexA");
|
||||
subComputeVertexB = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeVertexB");
|
||||
subComputeLoopVertexB = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "loopComputeVertexB");
|
||||
|
||||
uniformVertexPass = glGetUniformLocation(_prgKernel, "vertexPass");
|
||||
uniformIndexStart = glGetUniformLocation(_prgKernel, "indexStart");
|
||||
uniformIndexOffset = glGetUniformLocation(_prgKernel, "indexOffset");
|
||||
|
||||
_tableUniforms[F_IT] = glGetUniformLocation(_prgKernel, "_F0_IT");
|
||||
_tableUniforms[F_ITa] = glGetUniformLocation(_prgKernel, "_F0_ITa");
|
||||
_tableUniforms[E_IT] = glGetUniformLocation(_prgKernel, "_E0_IT");
|
||||
_tableUniforms[V_IT] = glGetUniformLocation(_prgKernel, "_V0_IT");
|
||||
_tableUniforms[V_ITa] = glGetUniformLocation(_prgKernel, "_V0_ITa");
|
||||
_tableUniforms[E_W] = glGetUniformLocation(_prgKernel, "_E0_S");
|
||||
_tableUniforms[V_W] = glGetUniformLocation(_prgKernel, "_V0_S");
|
||||
_tableOffsetUniforms[F_IT] = glGetUniformLocation(_prgKernel, "F_IT_ofs");
|
||||
_tableOffsetUniforms[F_ITa] = glGetUniformLocation(_prgKernel, "F_ITa_ofs");
|
||||
_tableOffsetUniforms[E_IT] = glGetUniformLocation(_prgKernel, "E_IT_ofs");
|
||||
_tableOffsetUniforms[V_IT] = glGetUniformLocation(_prgKernel, "V_IT_ofs");
|
||||
_tableOffsetUniforms[V_ITa] = glGetUniformLocation(_prgKernel, "V_ITa_ofs");
|
||||
_tableOffsetUniforms[E_W] = glGetUniformLocation(_prgKernel, "E_W_ofs");
|
||||
_tableOffsetUniforms[V_W] = glGetUniformLocation(_prgKernel, "V_W_ofs");
|
||||
}
|
||||
|
||||
OsdGlslKernelDispatcher::~OsdGlslKernelDispatcher() {
|
||||
|
||||
if (_prgKernel)
|
||||
glDeleteProgram(_prgKernel);
|
||||
|
||||
glDeleteTextures(1, &_vertexTexture);
|
||||
glDeleteTextures(1, &_varyingTexture);
|
||||
|
||||
@ -163,98 +128,134 @@ OsdGlslKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr)
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::BeginLaunchKernel() {
|
||||
OsdGlslKernelDispatcher::OnKernelLaunch() {
|
||||
|
||||
glUseProgram(_prgKernel);
|
||||
glEnable(GL_RASTERIZER_DISCARD);
|
||||
_shader->UseProgram();
|
||||
|
||||
//XXX what if loop..
|
||||
bindTextureBuffer(_tableUniforms[F_IT], _tableBuffers[F_IT],
|
||||
bindTextureBuffer(_shader->GetTableUniform(F_IT), _tableBuffers[F_IT],
|
||||
_tableTextures[F_IT], GL_R32UI, 2);
|
||||
bindTextureBuffer(_tableUniforms[F_ITa], _tableBuffers[F_ITa],
|
||||
bindTextureBuffer(_shader->GetTableUniform(F_ITa), _tableBuffers[F_ITa],
|
||||
_tableTextures[F_ITa], GL_R32I, 3);
|
||||
|
||||
#ifdef OPT_E0_IT_VEC4
|
||||
bindTextureBuffer(_tableUniforms[E_IT], _tableBuffers[E_IT],
|
||||
bindTextureBuffer(_shader->GetTableUniform(E_IT), _tableBuffers[E_IT],
|
||||
_tableTextures[E_IT], GL_RGBA32UI, 4);
|
||||
#else
|
||||
bindTextureBuffer(_tableUniforms[E_IT], _tableBuffers[E_IT],
|
||||
bindTextureBuffer(_shader->GetTableUniform(E_IT), _tableBuffers[E_IT],
|
||||
_tableTextures[E_IT], GL_R32UI, 4);
|
||||
#endif
|
||||
|
||||
#ifdef OPT_CATMARK_V_IT_VEC2
|
||||
bindTextureBuffer(_tableUniforms[V_IT], _tableBuffers[V_IT],
|
||||
bindTextureBuffer(_shader->GetTableUniform(V_IT), _tableBuffers[V_IT],
|
||||
_tableTextures[V_IT], GL_RG32UI, 5);
|
||||
#else
|
||||
bindTextureBuffer(_tableUniforms[V_IT], _tableBuffers[V_IT],
|
||||
bindTextureBuffer(_shader->GetTableUniform(V_IT), _tableBuffers[V_IT],
|
||||
_tableTextures[V_IT], GL_R32UI, 5);
|
||||
#endif
|
||||
bindTextureBuffer(_tableUniforms[V_ITa], _tableBuffers[V_ITa],
|
||||
bindTextureBuffer(_shader->GetTableUniform(V_ITa), _tableBuffers[V_ITa],
|
||||
_tableTextures[V_ITa], GL_R32I, 6);
|
||||
#ifdef OPT_E0_S_VEC2
|
||||
bindTextureBuffer(_tableUniforms[E_W], _tableBuffers[E_W],
|
||||
bindTextureBuffer(_shader->GetTableUniform(E_W), _tableBuffers[E_W],
|
||||
_tableTextures[E_W], GL_RG32F, 7);
|
||||
#else
|
||||
bindTextureBuffer(_tableUniforms[E_W], _tableBuffers[E_W],
|
||||
bindTextureBuffer(_shader->GetTableUniform(E_W), _tableBuffers[E_W],
|
||||
_tableTextures[E_W], GL_R32F, 7);
|
||||
#endif
|
||||
bindTextureBuffer(_tableUniforms[V_W], _tableBuffers[V_W],
|
||||
bindTextureBuffer(_shader->GetTableUniform(V_W), _tableBuffers[V_W],
|
||||
_tableTextures[V_W], GL_R32F, 8);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::EndLaunchKernel() {
|
||||
OsdGlslKernelDispatcher::OnKernelFinish() {
|
||||
|
||||
unbindTextureBuffer(2);
|
||||
unbindTextureBuffer(3);
|
||||
unbindTextureBuffer(4);
|
||||
unbindTextureBuffer(5);
|
||||
unbindTextureBuffer(6);
|
||||
unbindTextureBuffer(7);
|
||||
unbindTextureBuffer(8);
|
||||
|
||||
glDisable(GL_RASTERIZER_DISCARD);
|
||||
glUseProgram(0);
|
||||
|
||||
// XXX Unbind table buffer
|
||||
}
|
||||
|
||||
OsdVertexBuffer *
|
||||
OsdGlslKernelDispatcher::InitializeVertexBuffer(int numElements, int count)
|
||||
OsdGlslKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices)
|
||||
{
|
||||
return new OsdGpuVertexBuffer(numElements, count);
|
||||
return new OsdGpuVertexBuffer(numElements, numVertices);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
||||
|
||||
OsdGpuVertexBuffer *bVertex = dynamic_cast<OsdGpuVertexBuffer *>(vertex);
|
||||
OsdGpuVertexBuffer *bVarying = dynamic_cast<OsdGpuVertexBuffer *>(varying);
|
||||
if (vertex)
|
||||
_currentVertexBuffer = dynamic_cast<OsdGpuVertexBuffer *>(vertex);
|
||||
else
|
||||
_currentVertexBuffer = NULL;
|
||||
|
||||
if (bVertex) {
|
||||
_vertexBuffer = bVertex->GetGpuBuffer();
|
||||
bindTextureBuffer(_vertexUniform, _vertexBuffer, _vertexTexture, GL_RGB32F, 0);
|
||||
if (varying)
|
||||
_currentVaryingBuffer = dynamic_cast<OsdGpuVertexBuffer *>(varying);
|
||||
else
|
||||
_currentVaryingBuffer = NULL;
|
||||
|
||||
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
|
||||
int numVaryingElements = varying ? varying->GetNumElements() : 0;
|
||||
|
||||
// find appropriate shader program from registry (compile it if needed)
|
||||
std::vector<ComputeShader>::iterator it =
|
||||
std::find_if(shaderRegistry.begin(), shaderRegistry.end(),
|
||||
ComputeShader::Match(numVertexElements, numVaryingElements));
|
||||
|
||||
_shader = NULL;
|
||||
if (it != shaderRegistry.end()) {
|
||||
_shader = &(*it);
|
||||
} else {
|
||||
shaderRegistry.push_back(ComputeShader());
|
||||
_shader = &shaderRegistry.back();
|
||||
_shader->Compile(numVertexElements, numVaryingElements);
|
||||
}
|
||||
|
||||
if (bVarying) {
|
||||
_varyingBuffer = bVarying->GetGpuBuffer();
|
||||
bindTextureBuffer(_varyingUniform, _varyingBuffer, _varyingTexture, GL_R32F, 0);
|
||||
_shader->UseProgram(); // need to bind textures
|
||||
|
||||
// bind vertex texture
|
||||
if (_currentVertexBuffer) {
|
||||
bindTextureBuffer(_shader->GetVertexUniform(), _currentVertexBuffer->GetGpuBuffer(), _vertexTexture, GL_RGB32F, 0);
|
||||
}
|
||||
|
||||
glUseProgram(_prgKernel);
|
||||
glUniform1i(_vertexUniform, 0);
|
||||
if (_currentVaryingBuffer) {
|
||||
bindTextureBuffer(_shader->GetVaryingUniform(), _currentVaryingBuffer->GetGpuBuffer(), _varyingTexture, GL_R32F, 1);
|
||||
}
|
||||
|
||||
#if 0 // experiment to use image store function
|
||||
glActiveTexture(GL_TEXTURE0 + 0);
|
||||
glBindImageTextureEXT(0, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F);
|
||||
|
||||
if (_numVarying > 0) {
|
||||
glUniform1i(_varyingUniform, 1);
|
||||
glBindImageTextureEXT(1, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F);
|
||||
}
|
||||
#endif
|
||||
|
||||
CHECK_GL_ERROR("BindVertexBuffer \n");
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::UnbindVertexBuffer()
|
||||
{
|
||||
if (_currentVertexBuffer) {
|
||||
unbindTextureBuffer(0);
|
||||
}
|
||||
if (_currentVaryingBuffer) {
|
||||
unbindTextureBuffer(1);
|
||||
}
|
||||
_currentVertexBuffer = NULL;
|
||||
_currentVaryingBuffer = NULL;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::Synchronize() {
|
||||
glFinish();
|
||||
@ -263,14 +264,15 @@ OsdGlslKernelDispatcher::Synchronize() {
|
||||
void
|
||||
OsdGlslKernelDispatcher::bindTextureBuffer(
|
||||
GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const {
|
||||
|
||||
|
||||
if (sampler == -1) {
|
||||
OSD_ERROR("BindTextureError:: sampler = %d\n", sampler);
|
||||
return;
|
||||
}
|
||||
OSD_DEBUG("BindTextureBuffer sampler=%d, buffer=%d, texture = %d, E%x\n", sampler, buffer, texture, glGetError());
|
||||
OSD_DEBUG("BindTextureBuffer unit=%d, sampler=%d, buffer=%d, texture = %d, E%x\n", unit, sampler, buffer, texture, glGetError());
|
||||
|
||||
glUniform1i(sampler, unit);
|
||||
CHECK_GL_ERROR("BindTextureBuffer glUniform %d\n", unit);
|
||||
glActiveTexture(GL_TEXTURE0 + unit);
|
||||
CHECK_GL_ERROR("BindTextureBuffer glActiveTexture %d\n", unit);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, texture);
|
||||
@ -280,38 +282,137 @@ OsdGlslKernelDispatcher::bindTextureBuffer(
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
}
|
||||
|
||||
bool
|
||||
OsdGlslKernelDispatcher::compile(const char *shaderSource, const char *shaderDefine) {
|
||||
|
||||
_prgKernel = glCreateProgram();
|
||||
void
|
||||
OsdGlslKernelDispatcher::unbindTextureBuffer(int unit) const {
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + unit);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyCatmarkFaceVerticesKernel(
|
||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
_shader->ApplyCatmarkFaceVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_tableOffsets[F_IT][level-1],
|
||||
_tableOffsets[F_ITa][level-1],
|
||||
offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
_shader->ApplyCatmarkEdgeVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_tableOffsets[E_IT][level-1],
|
||||
_tableOffsets[E_W][level-1],
|
||||
offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelB(
|
||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
_shader->ApplyCatmarkVertexVerticesKernelB(_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_tableOffsets[V_IT][level-1],
|
||||
_tableOffsets[V_ITa][level-1],
|
||||
_tableOffsets[V_W][level-1],
|
||||
offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelA(
|
||||
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
||||
|
||||
_shader->ApplyCatmarkVertexVerticesKernelA(_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_tableOffsets[V_ITa][level-1],
|
||||
_tableOffsets[V_W][level-1],
|
||||
offset, pass, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyLoopEdgeVerticesKernel(
|
||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
_shader->ApplyLoopEdgeVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_tableOffsets[E_IT][level-1],
|
||||
_tableOffsets[E_W][level-1],
|
||||
offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelB(
|
||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
_shader->ApplyLoopVertexVerticesKernelB(_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_tableOffsets[V_IT][level-1],
|
||||
_tableOffsets[V_ITa][level-1],
|
||||
_tableOffsets[V_W][level-1],
|
||||
offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelA(
|
||||
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
||||
|
||||
_shader->ApplyLoopVertexVerticesKernelA(_currentVertexBuffer, _currentVaryingBuffer,
|
||||
_tableOffsets[V_ITa][level-1],
|
||||
_tableOffsets[V_W][level-1],
|
||||
offset, pass, start, end);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------
|
||||
|
||||
OsdGlslKernelDispatcher::ComputeShader::ComputeShader() :
|
||||
_program(0)
|
||||
{
|
||||
}
|
||||
|
||||
OsdGlslKernelDispatcher::ComputeShader::~ComputeShader()
|
||||
{
|
||||
if (_program)
|
||||
glDeleteProgram(_program);
|
||||
}
|
||||
|
||||
bool
|
||||
OsdGlslKernelDispatcher::ComputeShader::Compile(int numVertexElements, int numVaryingElements) {
|
||||
|
||||
// XXX: NOTE: current GLSL only supports numVertexElements = 6!!!
|
||||
assert(numVertexElements == 6);
|
||||
|
||||
_numVertexElements = numVertexElements;
|
||||
_numVaryingElements = numVaryingElements;
|
||||
_program = glCreateProgram();
|
||||
|
||||
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
|
||||
|
||||
char constantDefine[256];
|
||||
snprintf(constantDefine, 256,
|
||||
"#define NUM_VARYING_ELEMENTS %d\n", _numVarying);
|
||||
"#define NUM_VARYING_ELEMENTS %d\n", numVaryingElements);
|
||||
|
||||
const char *shaderSources[3];
|
||||
shaderSources[0] = constantDefine;
|
||||
shaderSources[1] = shaderDefine;
|
||||
shaderSources[1] = shaderDefines;
|
||||
shaderSources[2] = shaderSource;
|
||||
glShaderSource(shader, 3, shaderSources, NULL);
|
||||
glCompileShader(shader);
|
||||
glAttachShader(_prgKernel, shader);
|
||||
glAttachShader(_program, shader);
|
||||
|
||||
const char *outputs[] = { "outPosition",
|
||||
"outNormal",
|
||||
"gl_NextBuffer",
|
||||
"outVaryingData" };
|
||||
int nOutputs = _numVarying > 0 ? 4 : 2;
|
||||
|
||||
glTransformFeedbackVaryings(_prgKernel, nOutputs, outputs, GL_INTERLEAVED_ATTRIBS);
|
||||
int nOutputs = numVaryingElements > 0 ? 4 : 2;
|
||||
|
||||
glTransformFeedbackVaryings(_program, nOutputs, outputs, GL_INTERLEAVED_ATTRIBS);
|
||||
|
||||
CHECK_GL_ERROR("Transform feedback initialize \n");
|
||||
|
||||
GLint linked = 0;
|
||||
glLinkProgram(_prgKernel);
|
||||
glGetProgramiv(_prgKernel, GL_LINK_STATUS, &linked);
|
||||
glLinkProgram(_program);
|
||||
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
|
||||
|
||||
if (linked == GL_FALSE) {
|
||||
OSD_ERROR("Fail to link shader\n");
|
||||
@ -320,56 +421,79 @@ OsdGlslKernelDispatcher::compile(const char *shaderSource, const char *shaderDef
|
||||
glGetShaderInfoLog(shader, 1024, NULL, buffer);
|
||||
OSD_ERROR(buffer);
|
||||
|
||||
glGetProgramInfoLog(_prgKernel, 1024, NULL, buffer);
|
||||
glGetProgramInfoLog(_program, 1024, NULL, buffer);
|
||||
OSD_ERROR(buffer);
|
||||
|
||||
glDeleteProgram(_prgKernel);
|
||||
_prgKernel = 0;
|
||||
glDeleteProgram(_program);
|
||||
_program = 0;
|
||||
// XXX ERROR HANDLE
|
||||
return false;
|
||||
}
|
||||
|
||||
glDeleteShader(shader);
|
||||
|
||||
_vertexUniform = glGetUniformLocation(_prgKernel, "vertex");
|
||||
_varyingUniform = glGetUniformLocation(_prgKernel, "varyingData");
|
||||
_vertexUniform = glGetUniformLocation(_program, "vertex");
|
||||
_varyingUniform = glGetUniformLocation(_program, "varyingData");
|
||||
|
||||
_subComputeFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeFace");
|
||||
_subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge");
|
||||
_subComputeVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexA");
|
||||
_subComputeVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexB");
|
||||
_subComputeLoopVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "loopComputeVertexB");
|
||||
|
||||
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
|
||||
_uniformIndexStart = glGetUniformLocation(_program, "indexStart");
|
||||
_uniformIndexOffset = glGetUniformLocation(_program, "indexOffset");
|
||||
|
||||
_tableUniforms.resize(TABLE_MAX);
|
||||
_tableOffsetUniforms.resize(TABLE_MAX);
|
||||
|
||||
_tableUniforms[F_IT] = glGetUniformLocation(_program, "_F0_IT");
|
||||
_tableUniforms[F_ITa] = glGetUniformLocation(_program, "_F0_ITa");
|
||||
_tableUniforms[E_IT] = glGetUniformLocation(_program, "_E0_IT");
|
||||
_tableUniforms[V_IT] = glGetUniformLocation(_program, "_V0_IT");
|
||||
_tableUniforms[V_ITa] = glGetUniformLocation(_program, "_V0_ITa");
|
||||
_tableUniforms[E_W] = glGetUniformLocation(_program, "_E0_S");
|
||||
_tableUniforms[V_W] = glGetUniformLocation(_program, "_V0_S");
|
||||
_tableOffsetUniforms[F_IT] = glGetUniformLocation(_program, "F_IT_ofs");
|
||||
_tableOffsetUniforms[F_ITa] = glGetUniformLocation(_program, "F_ITa_ofs");
|
||||
_tableOffsetUniforms[E_IT] = glGetUniformLocation(_program, "E_IT_ofs");
|
||||
_tableOffsetUniforms[V_IT] = glGetUniformLocation(_program, "V_IT_ofs");
|
||||
_tableOffsetUniforms[V_ITa] = glGetUniformLocation(_program, "V_ITa_ofs");
|
||||
_tableOffsetUniforms[E_W] = glGetUniformLocation(_program, "E_W_ofs");
|
||||
_tableOffsetUniforms[V_W] = glGetUniformLocation(_program, "V_W_ofs");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::unbindTextureBuffer(int unit) const {
|
||||
glActiveTexture(GL_TEXTURE0 + unit);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::transformGpuBufferData(GLuint kernel, GLint offset, int start, int end, bool vertexPass) const {
|
||||
OsdGlslKernelDispatcher::ComputeShader::transformGpuBufferData(OsdGpuVertexBuffer *vertexBuffer, OsdGpuVertexBuffer *varyingBuffer,
|
||||
GLint offset, int start, int end) const {
|
||||
int count = end - start;
|
||||
if (count <= 0) return;
|
||||
OSD_DEBUG("_transformGpuBufferData kernel=%d E%x, offset=%d, count=%d\n", kernel, glGetError(), offset, count);
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &kernel);
|
||||
glUniform1i(uniformVertexPass, vertexPass); // XXX
|
||||
OSD_DEBUG("_transformGpuBufferData offset=%d, count=%d\n", glGetError(), offset, count);
|
||||
|
||||
// set batch range
|
||||
glUniform1i(uniformIndexStart, start);
|
||||
glUniform1i(uniformIndexOffset, offset);
|
||||
glUniform1i(_uniformIndexStart, start);
|
||||
glUniform1i(_uniformIndexOffset, offset);
|
||||
// XXX: end is not used here now
|
||||
CHECK_GL_ERROR("Uniform index set at offset=%d. start=%d\n", offset, start);
|
||||
|
||||
// set transform feedback buffer
|
||||
int vertexStride = _numVertexElements*sizeof(float);
|
||||
int varyingStride = _numVarying*sizeof(float);
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, _vertexBuffer,
|
||||
(start+offset)*vertexStride, count*vertexStride);
|
||||
CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n");
|
||||
if (vertexBuffer) {
|
||||
int vertexStride = vertexBuffer->GetNumElements()*sizeof(float);
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vertexBuffer->GetGpuBuffer(),
|
||||
(start+offset)*vertexStride, count*vertexStride);
|
||||
}
|
||||
|
||||
if (_numVarying > 0){
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, _varyingBuffer,
|
||||
if (varyingBuffer){
|
||||
int varyingStride = varyingBuffer->GetNumElements()*sizeof(float);
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, varyingBuffer->GetGpuBuffer(),
|
||||
(start+offset)*varyingStride, count*varyingStride);
|
||||
}
|
||||
|
||||
CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n");
|
||||
|
||||
glBeginTransformFeedback(GL_POINTS);
|
||||
|
||||
CHECK_GL_ERROR("transformGpuBufferData glBeginTransformFeedback\n");
|
||||
@ -387,68 +511,84 @@ OsdGlslKernelDispatcher::transformGpuBufferData(GLuint kernel, GLint offset, int
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyCatmarkFaceVerticesKernel(
|
||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
glUniform1i(_tableOffsetUniforms[F_IT], _tableOffsets[F_IT][level-1]);
|
||||
glUniform1i(_tableOffsetUniforms[F_ITa], _tableOffsets[F_ITa][level-1]);
|
||||
transformGpuBufferData(subComputeFace, offset, start, end);
|
||||
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkFaceVerticesKernel(
|
||||
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int F_IT_ofs, int F_ITa_ofs, int offset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeFace);
|
||||
glUniform1i(_tableOffsetUniforms[F_IT], F_IT_ofs);
|
||||
glUniform1i(_tableOffsetUniforms[F_ITa], F_ITa_ofs);
|
||||
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyCatmarkEdgeVerticesKernel(
|
||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
|
||||
glUniform1i(_tableOffsetUniforms[E_IT], _tableOffsets[E_IT][level-1]);
|
||||
glUniform1i(_tableOffsetUniforms[E_W], _tableOffsets[E_W][level-1]);
|
||||
transformGpuBufferData(subComputeEdge, offset, start, end);
|
||||
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkEdgeVerticesKernel(
|
||||
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int E_IT_ofs, int E_W_ofs, int offset, int start, int end) {
|
||||
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge);
|
||||
glUniform1i(_tableOffsetUniforms[E_IT], E_IT_ofs);
|
||||
glUniform1i(_tableOffsetUniforms[E_W], E_W_ofs);
|
||||
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelB(
|
||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkVertexVerticesKernelB(
|
||||
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end) {
|
||||
|
||||
glUniform1i(_tableOffsetUniforms[V_IT], _tableOffsets[V_IT][level-1]);
|
||||
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
|
||||
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
|
||||
transformGpuBufferData(subComputeVertexB, offset, start, end);
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexB);
|
||||
glUniform1i(_tableOffsetUniforms[V_IT], V_IT_ofs);
|
||||
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
|
||||
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
|
||||
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelA(
|
||||
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
||||
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkVertexVerticesKernelA(
|
||||
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end) {
|
||||
|
||||
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
|
||||
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
|
||||
transformGpuBufferData(subComputeVertexA, offset, start, end, pass);
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA);
|
||||
glUniform1i(_uniformVertexPass, pass ? 1 : 0);
|
||||
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
|
||||
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
|
||||
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyLoopEdgeVerticesKernel(
|
||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
OsdGlslKernelDispatcher::ComputeShader::ApplyLoopEdgeVerticesKernel(
|
||||
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int E_IT_ofs, int E_W_ofs, int offset, int start, int end) {
|
||||
|
||||
glUniform1i(_tableOffsetUniforms[E_IT], _tableOffsets[E_IT][level-1]);
|
||||
glUniform1i(_tableOffsetUniforms[E_W], _tableOffsets[E_W][level-1]);
|
||||
transformGpuBufferData(subComputeEdge, offset, start, end);
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge);
|
||||
glUniform1i(_tableOffsetUniforms[E_IT], E_IT_ofs);
|
||||
glUniform1i(_tableOffsetUniforms[E_W], E_W_ofs);
|
||||
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelB(
|
||||
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
|
||||
OsdGlslKernelDispatcher::ComputeShader::ApplyLoopVertexVerticesKernelB(
|
||||
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end) {
|
||||
|
||||
glUniform1i(_tableOffsetUniforms[V_IT], _tableOffsets[V_IT][level-1]);
|
||||
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
|
||||
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
|
||||
transformGpuBufferData(subComputeLoopVertexB, offset, start, end);
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeLoopVertexB);
|
||||
glUniform1i(_tableOffsetUniforms[V_IT], V_IT_ofs);
|
||||
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
|
||||
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
|
||||
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||
}
|
||||
|
||||
void
|
||||
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelA(
|
||||
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
|
||||
OsdGlslKernelDispatcher::ComputeShader::ApplyLoopVertexVerticesKernelA(
|
||||
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end) {
|
||||
|
||||
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
|
||||
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
|
||||
transformGpuBufferData(subComputeVertexA, offset, start, end, pass);
|
||||
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA);
|
||||
glUniform1i(_uniformVertexPass, pass ? 1 : 0);
|
||||
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
|
||||
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
|
||||
transformGpuBufferData(vertex, varying, offset, start, end);
|
||||
}
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -89,11 +89,11 @@ public:
|
||||
|
||||
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
|
||||
|
||||
virtual void BeginLaunchKernel();
|
||||
|
||||
virtual void EndLaunchKernel();
|
||||
virtual void OnKernelLaunch();
|
||||
|
||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count);
|
||||
virtual void OnKernelFinish();
|
||||
|
||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices);
|
||||
|
||||
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
||||
|
||||
@ -110,42 +110,100 @@ public:
|
||||
|
||||
protected:
|
||||
|
||||
class ComputeShader {
|
||||
public:
|
||||
ComputeShader();
|
||||
~ComputeShader();
|
||||
|
||||
bool Compile(int numVertexElements, int numVaryingElements);
|
||||
|
||||
GLuint GetTableUniform(int table) const {
|
||||
return _tableUniforms[table];
|
||||
}
|
||||
GLuint GetVertexUniform() const { return _vertexUniform; }
|
||||
GLuint GetVaryingUniform() const { return _varyingUniform; }
|
||||
|
||||
void ApplyCatmarkFaceVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int F_IT_ofs, int F_ITa_ofs, int offset, int start, int end);
|
||||
|
||||
void ApplyCatmarkEdgeVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int E_IT_ofs, int E_W_ofs, int offset, int start, int end);
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelB(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end);
|
||||
|
||||
void ApplyCatmarkVertexVerticesKernelA(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end);
|
||||
|
||||
void ApplyLoopEdgeVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int E_IT_ofs, int E_W_ofs, int offset, int start, int end);
|
||||
|
||||
void ApplyLoopVertexVerticesKernelB(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end);
|
||||
|
||||
void ApplyLoopVertexVerticesKernelA(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end);
|
||||
|
||||
void UseProgram () const {
|
||||
glUseProgram(_program);
|
||||
}
|
||||
|
||||
struct Match {
|
||||
Match(int numVertexElements, int numVaryingElements) :
|
||||
_numVertexElements(numVertexElements), _numVaryingElements(numVaryingElements) {}
|
||||
bool operator() (const ComputeShader &shader) {
|
||||
return (shader._numVertexElements == _numVertexElements
|
||||
&& shader._numVaryingElements == _numVaryingElements);
|
||||
}
|
||||
int _numVertexElements, _numVaryingElements;
|
||||
};
|
||||
|
||||
friend class Match;
|
||||
|
||||
private:
|
||||
void transformGpuBufferData(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
|
||||
GLint offset, int start, int end) const;
|
||||
|
||||
int _numVertexElements;
|
||||
int _numVaryingElements;
|
||||
|
||||
GLuint _program;
|
||||
|
||||
GLuint _uniformVertexPass;
|
||||
GLuint _uniformIndexStart;
|
||||
GLuint _uniformIndexOffset;
|
||||
|
||||
GLuint _vertexUniform,
|
||||
_varyingUniform;
|
||||
|
||||
// shader locations
|
||||
GLuint _subComputeFace, _subComputeEdge, _subComputeVertexA, _subComputeVertexB;
|
||||
GLuint _subComputeLoopVertexB;
|
||||
|
||||
std::vector<GLuint> _tableUniforms;
|
||||
std::vector<GLuint> _tableOffsetUniforms;
|
||||
|
||||
};
|
||||
|
||||
void bindTextureBuffer(GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const;
|
||||
|
||||
void unbindTextureBuffer(int unit) const;
|
||||
|
||||
void transformGpuBufferData(GLuint kernel, GLint offset, int start, int end, bool vertexPass=false) const;
|
||||
|
||||
bool compile(const char *shaderSource, const char *shaderDefine);
|
||||
|
||||
GLuint _prgKernel;
|
||||
|
||||
int _numVertexElements,
|
||||
_numVarying;
|
||||
|
||||
GLuint _vertexBuffer,
|
||||
_varyingBuffer;
|
||||
ComputeShader * _shader;
|
||||
|
||||
// texture for vertex
|
||||
GLuint _vertexTexture,
|
||||
_varyingTexture;
|
||||
|
||||
GLuint _vertexUniform,
|
||||
_varyingUniform;
|
||||
|
||||
OsdGpuVertexBuffer *_currentVertexBuffer,
|
||||
*_currentVaryingBuffer;
|
||||
|
||||
// table buffers
|
||||
std::vector<GLuint> _tableBuffers;
|
||||
std::vector<GLuint> _tableTextures;
|
||||
std::vector<GLuint> _tableUniforms;
|
||||
std::vector<GLuint> _tableOffsetUniforms;
|
||||
|
||||
GLuint uniformVertexPass;
|
||||
GLuint uniformIndexStart;
|
||||
GLuint uniformIndexOffset;
|
||||
|
||||
// shader locations
|
||||
GLuint subComputeFace, subComputeEdge, subComputeVertexA, subComputeVertexB;
|
||||
GLuint subComputeLoopVertexB;
|
||||
// static shader registry (XXX tentative..)
|
||||
static std::vector<ComputeShader> shaderRegistry;
|
||||
};
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -82,9 +82,9 @@ public:
|
||||
virtual void CopyTable(int tableIndex, size_t size, const void *ptr) = 0;
|
||||
|
||||
|
||||
virtual void BeginLaunchKernel() = 0;
|
||||
virtual void OnKernelLaunch() = 0;
|
||||
|
||||
virtual void EndLaunchKernel() = 0;
|
||||
virtual void OnKernelFinish() = 0;
|
||||
|
||||
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count) = 0;
|
||||
|
||||
|
@ -102,7 +102,6 @@ OsdMesh::Create(OsdHbrMesh *hbrMesh, int level, const std::string &kernel) {
|
||||
|
||||
OSD_DEBUG("PREP: NumCoarseVertex = %d\n", _mMesh->GetNumCoarseVertices());
|
||||
OSD_DEBUG("PREP: NumVertex = %d\n", _mMesh->GetNumVertices());
|
||||
OSD_DEBUG("PREP: NumTables = %d\n", _mMesh->GetNumSubdivisionTables());
|
||||
|
||||
const FarSubdivisionTables<OsdVertex>* table = _mMesh->GetSubdivision();
|
||||
|
||||
@ -145,11 +144,11 @@ OsdMesh::Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
|
||||
|
||||
_dispatcher->BindVertexBuffer(vertex, varying);
|
||||
|
||||
_dispatcher->BeginLaunchKernel();
|
||||
_dispatcher->OnKernelLaunch();
|
||||
|
||||
_mMesh->Subdivide(_level+1);
|
||||
|
||||
_dispatcher->EndLaunchKernel();
|
||||
_dispatcher->OnKernelFinish();
|
||||
|
||||
_dispatcher->UnbindVertexBuffer();
|
||||
}
|
||||
|
@ -95,7 +95,13 @@ public:
|
||||
|
||||
OsdVertexBuffer *InitializeVertexBuffer(int numElements);
|
||||
|
||||
void Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
|
||||
// for non-interleaved vertex data
|
||||
void Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying = NULL);
|
||||
|
||||
/*
|
||||
// for interleaved vertex data ?
|
||||
template <class T> void Subdivide(T *vertex) { }
|
||||
*/
|
||||
|
||||
void Synchronize();
|
||||
|
||||
|
@ -9,69 +9,83 @@ namespace OPENSUBDIV_VERSION {
|
||||
|
||||
class OsdVertexBuffer {
|
||||
public:
|
||||
OsdVertexBuffer(int numElements) : _numElements(numElements) {}
|
||||
virtual ~OsdVertexBuffer() {}
|
||||
virtual void UpdateData(const float *src, int count) = 0;
|
||||
|
||||
virtual void UpdateData(const float *src, int numVertices) = 0;
|
||||
|
||||
virtual GLuint GetGpuBuffer() = 0;
|
||||
|
||||
int GetNumElements() const {
|
||||
return _numElements;
|
||||
}
|
||||
|
||||
protected:
|
||||
int _numElements;
|
||||
};
|
||||
|
||||
class OsdGpuVertexBuffer : public OsdVertexBuffer {
|
||||
public:
|
||||
OsdGpuVertexBuffer(int numElements, int count) : _vbo(0), _numElements(numElements) {
|
||||
int stride = numElements * count * sizeof(float);
|
||||
OsdGpuVertexBuffer(int numElements, int numVertices) : OsdVertexBuffer(numElements), _vbo(0) {
|
||||
int size = numElements * numVertices * sizeof(float);
|
||||
glGenBuffers(1, &_vbo);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
|
||||
glBufferData(GL_ARRAY_BUFFER, stride, 0, GL_STREAM_DRAW);
|
||||
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_STREAM_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
}
|
||||
virtual ~OsdGpuVertexBuffer() {
|
||||
glDeleteBuffers(1, &_vbo);
|
||||
}
|
||||
virtual void UpdateData(const float *src, int count) {
|
||||
|
||||
virtual void UpdateData(const float *src, int numVertices) {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
|
||||
float * pointer = (float*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
|
||||
memcpy(pointer, src, _numElements * count * sizeof(float));
|
||||
memcpy(pointer, src, _numElements * numVertices * sizeof(float));
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
}
|
||||
|
||||
virtual GLuint GetGpuBuffer() {
|
||||
return _vbo;
|
||||
}
|
||||
private:
|
||||
|
||||
protected:
|
||||
GLuint _vbo;
|
||||
int _numElements;
|
||||
};
|
||||
|
||||
class OsdCpuVertexBuffer : public OsdVertexBuffer {
|
||||
public:
|
||||
OsdCpuVertexBuffer(int numElements, int count) : _cpuVbo(NULL), _vboSize(0), _numElements(numElements), _vbo(0) {
|
||||
_cpuVbo = new float[numElements * count];
|
||||
_vboSize = numElements * count;
|
||||
OsdCpuVertexBuffer(int numElements, int numVertices) : OsdVertexBuffer(numElements), _cpuVbo(NULL), _vboSize(0), _vbo(0) {
|
||||
_vboSize = numElements * numVertices;
|
||||
_cpuVbo = new float[numElements * numVertices];
|
||||
}
|
||||
virtual ~OsdCpuVertexBuffer() {
|
||||
if(_cpuVbo) delete[] _cpuVbo;
|
||||
if(_vbo) glDeleteBuffers(1, &_vbo);
|
||||
delete [] _cpuVbo;
|
||||
if (_vbo)
|
||||
glDeleteBuffers(1, &_vbo);
|
||||
}
|
||||
virtual void UpdateData(const float *src, int count) {
|
||||
memcpy(_cpuVbo, src, _numElements * count * sizeof(float));
|
||||
|
||||
virtual void UpdateData(const float *src, int numVertices) {
|
||||
memcpy(_cpuVbo, src, _numElements * numVertices * sizeof(float));
|
||||
}
|
||||
|
||||
float *GetCpuBuffer() {
|
||||
return _cpuVbo;
|
||||
}
|
||||
|
||||
// XXX: this method name is missleading
|
||||
virtual GLuint GetGpuBuffer() {
|
||||
if(!_vbo) glGenBuffers(1, &_vbo);
|
||||
if (!_vbo)
|
||||
glGenBuffers(1, &_vbo);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
|
||||
glBufferData(GL_ARRAY_BUFFER, _vboSize * sizeof(float), _cpuVbo, GL_STREAM_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
return _vbo;
|
||||
}
|
||||
|
||||
int GetNumElements() const {
|
||||
return _numElements;
|
||||
}
|
||||
private:
|
||||
protected:
|
||||
float *_cpuVbo;
|
||||
int _vboSize;
|
||||
int _numElements;
|
||||
GLuint _vbo;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user