Added OsdVertexBuffer class. OsdMesh no longer has vertex buffer and vertex

specification (how many elements exists in the buffer).
client will create OsdVertexBuffer and provide it as an argument of
OsdMesh::Subdivide() function. It would be more flexible and hopefully matches
various use cases.

Since each dispatcher has to accept arbitrary vertex buffer, introduced a simple
shader registry into glslDispatcher. It will configure shaders for given vertex
elements on demand (for now, just works only for varying buffer).

Fixed cuda kernel's GL resource leakage. Since cuda GL interop seems one-way,
OsdCudaVertexBuffer manages vertex updating instead of just using
OsdGpuVertexBuffer.

Cleaned up some kernel codes and renamed ambiguous names.
This commit is contained in:
Takahito Tejima 2012-06-12 16:28:17 -07:00
parent 29b85a0e8c
commit 851c00d04c
12 changed files with 644 additions and 402 deletions

View File

@ -66,29 +66,33 @@
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
OsdCpuKernelDispatcher::DeviceTable::~DeviceTable() {
OsdCpuKernelDispatcher::SubdivisionTable::~SubdivisionTable() {
if (devicePtr)
free(devicePtr);
if (ptr)
free(ptr);
}
void
OsdCpuKernelDispatcher::DeviceTable::Copy( int size, const void *table ) {
OsdCpuKernelDispatcher::SubdivisionTable::Copy( int size, const void *table ) {
if (size > 0) {
if (devicePtr)
free(devicePtr);
devicePtr = malloc(size);
memcpy(devicePtr, table, size);
if (ptr)
free(ptr);
ptr = malloc(size);
memcpy(ptr, table, size);
}
}
OsdCpuKernelDispatcher::OsdCpuKernelDispatcher( int levels )
: OsdKernelDispatcher(levels) {
: OsdKernelDispatcher(levels), _currentVertexBuffer(NULL), _currentVaryingBuffer(NULL), _vdesc(NULL) {
_tables.resize(TABLE_MAX);
}
OsdCpuKernelDispatcher::~OsdCpuKernelDispatcher() { }
OsdCpuKernelDispatcher::~OsdCpuKernelDispatcher() {
if (_vdesc)
delete _vdesc;
}
void
OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) {
@ -96,30 +100,37 @@ OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr)
_tables[tableIndex].Copy(size, ptr);
}
void
OsdCpuKernelDispatcher::BeginLaunchKernel() { }
void
OsdCpuKernelDispatcher::EndLaunchKernel() { }
OsdVertexBuffer *
OsdCpuKernelDispatcher::InitializeVertexBuffer(int numElements, int count)
OsdCpuKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices)
{
return new OsdCpuVertexBuffer(numElements, count);
return new OsdCpuVertexBuffer(numElements, numVertices);
}
void
OsdCpuKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
_vertexBuffer = dynamic_cast<OsdCpuVertexBuffer *>(vertex);
_varyingBuffer = dynamic_cast<OsdCpuVertexBuffer *>(varying);
if (vertex)
_currentVertexBuffer = dynamic_cast<OsdCpuVertexBuffer *>(vertex);
else
_currentVertexBuffer = NULL;
if (varying)
_currentVaryingBuffer = dynamic_cast<OsdCpuVertexBuffer *>(varying);
else
_currentVaryingBuffer = NULL;
_vdesc = new VertexDescriptor(_currentVertexBuffer ? _currentVertexBuffer->GetNumElements() : 0,
_currentVaryingBuffer ? _currentVaryingBuffer->GetNumElements() : 0);
}
void
OsdCpuKernelDispatcher::UnbindVertexBuffer()
{
_vertexBuffer = NULL;
_varyingBuffer = NULL;
delete _vdesc;
_vdesc = NULL;
_currentVertexBuffer = NULL;
_currentVaryingBuffer = NULL;
}
void
@ -129,21 +140,17 @@ OsdCpuKernelDispatcher::Synchronize() { }
void
OsdCpuKernelDispatcher::ApplyBilinearFaceVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
VertexDescriptor vd(_numVertexElements, _numVaryingElements);
computeFace(&vd, _vbo, _varyingVbo,
(int*)_tables[F_IT].devicePtr + _tableOffsets[F_IT][level-1],
(int*)_tables[F_ITa].devicePtr + _tableOffsets[F_ITa][level-1],
computeFace(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
(int*)_tables[F_IT].ptr + _tableOffsets[F_IT][level-1],
(int*)_tables[F_ITa].ptr + _tableOffsets[F_ITa][level-1],
offset, start, end);
}
void
OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
VertexDescriptor vd(_numVertexElements, _numVaryingElements);
computeBilinearEdge(&vd, _vbo, _varyingVbo,
(int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1],
computeBilinearEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
(int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1],
offset,
start, end);
}
@ -151,38 +158,26 @@ OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh<OsdVertex> * me
void
OsdCpuKernelDispatcher::ApplyBilinearVertexVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
VertexDescriptor vd(_numVertexElements, _numVaryingElements);
computeBilinearVertex(&vd, _vbo, _varyingVbo,
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
computeBilinearVertex(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
offset, start, end);
}
void
OsdCpuKernelDispatcher::ApplyCatmarkFaceVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
computeFace(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
(int*)_tables[F_IT].devicePtr + _tableOffsets[F_IT][level-1],
(int*)_tables[F_ITa].devicePtr + _tableOffsets[F_ITa][level-1],
computeFace(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
(int*)_tables[F_IT].ptr + _tableOffsets[F_IT][level-1],
(int*)_tables[F_ITa].ptr + _tableOffsets[F_ITa][level-1],
offset, start, end);
float *p = _vertexBuffer->GetCpuBuffer();
for(int i = 0; i < 150; i+=3){
printf("%f %f %f\n", p[0], p[1], p[2]);
p+=3;
}
}
void
OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
computeEdge(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
(int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1],
(float*)_tables[E_W].devicePtr + _tableOffsets[E_W][level-1],
computeEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
(int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1],
(float*)_tables[E_W].ptr + _tableOffsets[E_W][level-1],
offset,
start, end);
}
@ -190,34 +185,28 @@ OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh<OsdVertex> * mes
void
OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelB( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
computeVertexB(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
(int*)_tables[V_IT].devicePtr + _tableOffsets[V_IT][level-1],
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
computeVertexB(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
(int*)_tables[V_IT].ptr + _tableOffsets[V_IT][level-1],
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
offset, start, end);
}
void
OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelA( FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
computeVertexA(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
computeVertexA(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
offset, start, end, pass);
}
void
OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
computeEdge(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
(int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1],
(float*)_tables[E_W].devicePtr + _tableOffsets[E_W][level-1],
computeEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
(int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1],
(float*)_tables[E_W].ptr + _tableOffsets[E_W][level-1],
offset,
start, end);
}
@ -225,28 +214,23 @@ OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh<OsdVertex> * mesh,
void
OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelB( FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
computeLoopVertexB(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
(int*)_tables[V_IT].devicePtr + _tableOffsets[V_IT][level-1],
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
computeLoopVertexB(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
(int*)_tables[V_IT].ptr + _tableOffsets[V_IT][level-1],
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
offset, start, end);
}
void
OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelA( FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements());
computeVertexA(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(),
(int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1],
(float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1],
computeVertexA(_vdesc, GetVertexBuffer(), GetVaryingBuffer(),
(int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1],
(float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1],
offset, start, end, pass);
}
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv

View File

@ -70,6 +70,8 @@
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
class VertexDescriptor;
class OsdCpuKernelDispatcher : public OsdKernelDispatcher
{
public:
@ -102,11 +104,11 @@ public:
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
virtual void BeginLaunchKernel();
virtual void OnKernelLaunch() {}
virtual void EndLaunchKernel();
virtual void OnKernelFinish() {}
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count);
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices);
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
@ -123,18 +125,26 @@ public:
protected:
struct DeviceTable {
DeviceTable() : devicePtr(NULL) { }
~DeviceTable();
struct SubdivisionTable {
SubdivisionTable() : ptr(NULL) { }
~SubdivisionTable();
void Copy(int size, const void *ptr);
void *devicePtr;
void *ptr;
};
OsdCpuVertexBuffer *_vertexBuffer, *_varyingBuffer;
float *GetVertexBuffer() const { return _currentVertexBuffer ? _currentVertexBuffer->GetCpuBuffer() : NULL; }
std::vector<DeviceTable> _tables;
float *GetVaryingBuffer() const { return _currentVaryingBuffer ? _currentVaryingBuffer->GetCpuBuffer() : NULL; }
OsdCpuVertexBuffer *_currentVertexBuffer,
*_currentVaryingBuffer;
VertexDescriptor *_vdesc;
std::vector<SubdivisionTable> _tables;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -64,9 +64,6 @@ namespace OPENSUBDIV_VERSION {
void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying, const int *F_IT, const int *F_ITa, int offset, int start, int end) {
int ve = vdesc->numVertexElements;
int vev = vdesc->numVaryingElements;
#ifdef _OPENMP
#pragma omp parallel for
#endif
@ -77,23 +74,19 @@ void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying
float weight = 1.0f/n;
// XXX: should use local vertex struct variable instead of accumulating directly into global memory.
float *dst = &vertex[(offset + i)*ve];
float *dstVarying = &varying[(offset + i)*vev];
vdesc->Clear(dst, dstVarying);
int dstIndex = offset + i;
vdesc->Clear(vertex, varying, dstIndex);
for (int j=0; j<n; ++j) {
int index = F_IT[h+j];
vdesc->AddWithWeight(dst, &vertex[index*ve], weight);
vdesc->AddVaryingWithWeight(dstVarying, &varying[index*vev], weight);
vdesc->AddWithWeight(vertex, dstIndex, index, weight);
vdesc->AddVaryingWithWeight(varying, dstIndex, index, weight);
}
}
}
void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, const float *E_W, int offset, int start, int end) {
int ve = vdesc->numVertexElements;
int vev = vdesc->numVaryingElements;
#ifdef _OPENMP
#pragma omp parallel for
#endif
@ -105,30 +98,26 @@ void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying,
float vertWeight = E_W[i*2+0];
float *dst = &vertex[(offset+i)*ve];
float *dstVarying = &varying[(offset+i)*vev];
vdesc->Clear(dst, dstVarying);
int dstIndex = offset + i;
vdesc->Clear(vertex, varying, dstIndex);
vdesc->AddWithWeight(dst, &vertex[eidx0*ve], vertWeight);
vdesc->AddWithWeight(dst, &vertex[eidx1*ve], vertWeight);
vdesc->AddWithWeight(vertex, dstIndex, eidx0, vertWeight);
vdesc->AddWithWeight(vertex, dstIndex, eidx1, vertWeight);
if (eidx2 != -1) {
float faceWeight = E_W[i*2+1];
vdesc->AddWithWeight(dst, &vertex[eidx2*ve], faceWeight);
vdesc->AddWithWeight(dst, &vertex[eidx3*ve], faceWeight);
vdesc->AddWithWeight(vertex, dstIndex, eidx2, faceWeight);
vdesc->AddWithWeight(vertex, dstIndex, eidx3, faceWeight);
}
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx0*vev], 0.5f);
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx1*vev], 0.5f);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
}
}
void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const float *V_W, int offset, int start, int end, int pass) {
int ve = vdesc->numVertexElements;
int vev = vdesc->numVaryingElements;
#ifdef _OPENMP
#pragma omp parallel for
#endif
@ -146,29 +135,25 @@ void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying
if (weight>0.0f && weight<1.0f && n > 0)
weight=1.0f-weight;
float *dst = &vertex[(offset+i)*ve];
float *dstVarying = &varying[(offset+i)*vev];
int dstIndex = offset + i;
if(not pass)
vdesc->Clear(dst, dstVarying);
vdesc->Clear(vertex, varying, dstIndex);
if (eidx0==-1 || (pass==0 && (n==-1)) ) {
vdesc->AddWithWeight(dst, &vertex[p*ve], weight);
vdesc->AddWithWeight(vertex, dstIndex, p, weight);
} else {
vdesc->AddWithWeight(dst, &vertex[p*ve], weight * 0.75f);
vdesc->AddWithWeight(dst, &vertex[eidx0*ve], weight * 0.125f);
vdesc->AddWithWeight(dst, &vertex[eidx1*ve], weight * 0.125f);
vdesc->AddWithWeight(vertex, dstIndex, p, weight * 0.75f);
vdesc->AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f);
vdesc->AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f);
}
if (not pass)
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0);
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
}
}
void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) {
int ve = vdesc->numVertexElements;
int vev = vdesc->numVaryingElements;
#ifdef _OPENMP
#pragma omp parallel for
#endif
@ -181,25 +166,21 @@ void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying
float wp = 1.0f/float(n*n);
float wv = (n-2.0f) * n * wp;
float *dst = &vertex[(offset+i)*ve];
float *dstVarying = &varying[(offset+i)*vev];
vdesc->Clear(dst, dstVarying);
int dstIndex = offset + i;
vdesc->Clear(vertex, varying, dstIndex);
vdesc->AddWithWeight(dst, &vertex[p*ve], weight * wv);
vdesc->AddWithWeight(vertex, dstIndex, p, weight * wv);
for (int j = 0; j < n; ++j) {
vdesc->AddWithWeight(dst, &vertex[V_IT[h+j*2]*ve], weight * wp);
vdesc->AddWithWeight(dst, &vertex[V_IT[h+j*2+1]*ve], weight * wp);
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp);
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
}
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0);
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
}
}
void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) {
int ve = vdesc->numVertexElements;
int vev = vdesc->numVaryingElements;
#ifdef _OPENMP
#pragma omp parallel for
#endif
@ -214,24 +195,20 @@ void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *var
beta = beta * beta;
beta = (0.625f - beta) * wp;
float *dst = &vertex[(offset+i)*ve];
float *dstVarying = &varying[(offset+i)*vev];
vdesc->Clear(dst, dstVarying);
int dstIndex = offset + i;
vdesc->Clear(vertex, varying, dstIndex);
vdesc->AddWithWeight(dst, &vertex[p*ve], weight * (1.0f - (beta * n)));
vdesc->AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n)));
for (int j = 0; j < n; ++j)
vdesc->AddWithWeight(dst, &vertex[V_IT[h+j]*ve], weight * beta);
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0f);
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
}
}
void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, int offset, int start, int end) {
int ve = vdesc->numVertexElements;
int vev = vdesc->numVaryingElements;
#ifdef _OPENMP
#pragma omp parallel for
#endif
@ -239,35 +216,30 @@ void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *va
int eidx0 = E_IT[2*i+0];
int eidx1 = E_IT[2*i+1];
float *dst = &vertex[(offset+i)*ve];
float *dstVarying = &varying[(offset+i)*vev];
vdesc->Clear(dst, dstVarying);
int dstIndex = offset + i;
vdesc->Clear(vertex, varying, dstIndex);
vdesc->AddWithWeight(vertex, dstIndex, eidx0, 0.5f);
vdesc->AddWithWeight(vertex, dstIndex, eidx1, 0.5f);
vdesc->AddWithWeight(dst, &vertex[eidx0*ve], 0.5f);
vdesc->AddWithWeight(dst, &vertex[eidx1*ve], 0.5f);
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx0*vev], 0.5f);
vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx1*vev], 0.5f);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
}
}
void computeBilinearVertex(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, int offset, int start, int end) {
int ve = vdesc->numVertexElements;
int vev = vdesc->numVaryingElements;
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int i = start; i < end; i++) {
int p = V_ITa[i];
float *dst = &vertex[(offset+i)*ve];
float *dstVarying = &varying[(offset+i)*vev];
vdesc->Clear(dst, dstVarying);
int dstIndex = offset + i;
vdesc->Clear(vertex, varying, dstIndex);
vdesc->AddWithWeight(dst, &vertex[p*ve], 1.0f);
vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0f);
vdesc->AddWithWeight(vertex, dstIndex, p, 1.0f);
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
}
}

View File

@ -67,20 +67,28 @@ struct VertexDescriptor {
VertexDescriptor(int numVertexElem, int numVaryingElem)
: numVertexElements(numVertexElem), numVaryingElements(numVaryingElem) { }
void Clear(float *vertex, float *varying) const {
for (int i = 0; i < numVertexElements; ++i)
vertex[i] = 0.0f;
void Clear(float *vertex, float *varying, int index) const {
if (vertex) {
for (int i = 0; i < numVertexElements; ++i)
vertex[index*numVertexElements+i] = 0.0f;
}
for (int i = 0; i < numVaryingElements; ++i)
varying[i] = 0.0f;
if (varying) {
for (int i = 0; i < numVaryingElements; ++i)
varying[index*numVaryingElements+i] = 0.0f;
}
}
void AddWithWeight(float *vertex, const float *src, float weight) const {
void AddWithWeight(float *vertex, int dstIndex, int srcIndex, float weight) const {
int d = dstIndex * numVertexElements;
int s = srcIndex * numVertexElements;
for (int i = 0; i < numVertexElements; ++i)
vertex[i] += src[i] * weight;
vertex[d++] += vertex[s++] * weight;
}
void AddVaryingWithWeight(float *varying, const float *src, float weight) const {
void AddVaryingWithWeight(float *varying, int dstIndex, int srcIndex, float weight) const {
int d = dstIndex * numVaryingElements;
int s = srcIndex * numVaryingElements;
for (int i = 0; i < numVaryingElements; ++i)
varying[i] += src[i] * weight;
varying[d++] += varying[s++] * weight;
}
int numVertexElements;

View File

@ -81,7 +81,42 @@ void OsdCudaComputeBilinearVertex(float *vertex, float *varying, int numUserVert
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
OsdCudaVertexBuffer::OsdCudaVertexBuffer(int numElements, int numVertices) :
OsdGpuVertexBuffer(numElements, numVertices) {
// register vbo as cuda resource
cudaGraphicsGLRegisterBuffer(&_cudaResource, _vbo, cudaGraphicsMapFlagsNone);
}
void
OsdCudaVertexBuffer::UpdateData(const float *src, int numVertices) {
void *dst = Map();
cudaMemcpy(dst, src, _numElements * numVertices * sizeof(float), cudaMemcpyHostToDevice);
Unmap();
}
void *
OsdCudaVertexBuffer::Map() {
size_t num_bytes;
void *ptr;
cudaGraphicsMapResources(1, &_cudaResource, 0);
cudaGraphicsResourceGetMappedPointer(&ptr, &num_bytes, _cudaResource);
return ptr;
}
void
OsdCudaVertexBuffer::Unmap() {
cudaGraphicsUnmapResources(1, &_cudaResource, 0);
}
OsdCudaVertexBuffer::~OsdCudaVertexBuffer() {
cudaGraphicsUnregisterResource(_cudaResource);
}
// -------------------------------------------------------------------------------
OsdCudaKernelDispatcher::DeviceTable::~DeviceTable() {
if (devicePtr) cudaFree(devicePtr);
@ -95,18 +130,16 @@ OsdCudaKernelDispatcher::DeviceTable::Copy(int size, const void *ptr) {
cudaMalloc(&devicePtr, size);
cudaMemcpy(devicePtr, ptr, size, cudaMemcpyHostToDevice);
}
// -------------------------------------------------------------------------------
OsdCudaKernelDispatcher::OsdCudaKernelDispatcher(int levels)
: OsdKernelDispatcher(levels),
_cudaVertexResource(NULL),
_cudaVaryingResource(NULL)
: OsdKernelDispatcher(levels)
{
_tables.resize(TABLE_MAX);
}
OsdCudaKernelDispatcher::~OsdCudaKernelDispatcher() {
cudaDeviceReset(); // XXX: necessary?
}
void
@ -115,48 +148,52 @@ OsdCudaKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr)
_tables[tableIndex].Copy(size, ptr);
}
void
OsdCudaKernelDispatcher::BeginLaunchKernel() { }
void
OsdCudaKernelDispatcher::EndLaunchKernel() { }
OsdVertexBuffer *
OsdCudaKernelDispatcher::InitializeVertexBuffer(int numElements, int count)
OsdCudaKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices)
{
return new OsdGpuVertexBuffer(numElements, count);
return new OsdCudaVertexBuffer(numElements, numVertices);
}
void
OsdCudaKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
OsdGpuVertexBuffer *bVertex = dynamic_cast<OsdGpuVertexBuffer *>(vertex);
OsdGpuVertexBuffer *bVarying = dynamic_cast<OsdGpuVertexBuffer *>(varying);
size_t num_bytes;
if (vertex)
_currentVertexBuffer = dynamic_cast<OsdCudaVertexBuffer *>(vertex);
else
_currentVertexBuffer = NULL;
if (bVertex) {
cudaGraphicsGLRegisterBuffer(&_cudaVertexResource, bVertex->GetGpuBuffer(), cudaGraphicsMapFlagsWriteDiscard);
cudaGraphicsMapResources(1, &_cudaVertexResource, 0);
cudaGraphicsResourceGetMappedPointer((void **)&_deviceVertices, &num_bytes, _cudaVertexResource);
if (varying)
_currentVaryingBuffer = dynamic_cast<OsdCudaVertexBuffer *>(varying);
else
_currentVaryingBuffer = NULL;
if (_currentVertexBuffer) {
_deviceVertices = (float*)_currentVertexBuffer->Map();
// XXX todo remove _numVertexElements
_numVertexElements = _currentVertexBuffer->GetNumElements();
} else {
_numVertexElements = 0;
}
if (bVarying) {
cudaGraphicsGLRegisterBuffer(&_cudaVaryingResource, bVarying->GetGpuBuffer(), cudaGraphicsMapFlagsWriteDiscard);
cudaGraphicsMapResources(1, &_cudaVaryingResource, 0);
cudaGraphicsResourceGetMappedPointer((void **)&_deviceVaryings, &num_bytes, _cudaVaryingResource);
if (_currentVaryingBuffer) {
_deviceVaryings = (float*)_currentVaryingBuffer->Map();
_numVaryingElements = _currentVaryingBuffer->GetNumElements();
} else {
_numVaryingElements = 0;
}
}
void
OsdCudaKernelDispatcher::UnbindVertexBuffer()
{
if (_cudaVertexResource)
cudaGraphicsUnmapResources(1, &_cudaVertexResource, 0);
if (_cudaVaryingResource)
cudaGraphicsUnmapResources(1, &_cudaVertexResource, 0);
if (_currentVertexBuffer){
_currentVertexBuffer->Unmap();
}
if (_currentVaryingBuffer)
_currentVaryingBuffer->Unmap();
_cudaVertexResource = NULL;
_cudaVaryingResource = NULL;
_currentVertexBuffer = NULL;
_currentVaryingBuffer = NULL;
}
void

View File

@ -59,6 +59,7 @@
#include "../version.h"
#include "../osd/kernelDispatcher.h"
#include "../osd/vertexBuffer.h"
#include <GL/glew.h>
@ -72,6 +73,19 @@ namespace OPENSUBDIV_VERSION {
struct DeviceVertex;
class OsdCudaVertexBuffer : public OsdGpuVertexBuffer {
public:
OsdCudaVertexBuffer(int numElements, int numVertices);
virtual ~OsdCudaVertexBuffer();
virtual void UpdateData(const float *src, int numVertices);
void * Map();
void Unmap();
protected:
cudaGraphicsResource *_cudaResource;
};
class OsdCudaKernelDispatcher : public OsdKernelDispatcher
{
public:
@ -106,11 +120,11 @@ public:
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
virtual void BeginLaunchKernel();
virtual void EndLaunchKernel();
virtual void OnKernelLaunch() {}
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count);
virtual void OnKernelFinish() {}
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices);
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
@ -138,8 +152,8 @@ protected:
std::vector<DeviceTable> _tables;
cudaGraphicsResource *_cudaVertexResource,
*_cudaVaryingResource;
OsdCudaVertexBuffer *_currentVertexBuffer,
*_currentVaryingBuffer;
float *_deviceVertices,
*_deviceVaryings;

View File

@ -61,6 +61,8 @@
#include <GL/glew.h>
#include <stdlib.h>
#include <string.h>
#include <functional>
#include <algorithm>
#define OPT_E0_IT_VEC4
#define OPT_E0_S_VEC2
@ -88,65 +90,28 @@ static const char *shaderDefines = ""
#endif
;
std::vector<OsdGlslKernelDispatcher::ComputeShader> OsdGlslKernelDispatcher::shaderRegistry;
OsdGlslKernelDispatcher::OsdGlslKernelDispatcher(int levels)
: OsdKernelDispatcher(levels)
{
_vertexBuffer = 0;
_varyingBuffer = 0;
_prgKernel = 0;
_currentVertexBuffer = 0;
_currentVaryingBuffer = 0;
_shader = 0;
glGenTextures(1, &_vertexTexture);
glGenTextures(1, &_varyingTexture);
_tableBuffers.resize(TABLE_MAX);
_tableTextures.resize(TABLE_MAX);
_tableUniforms.resize(TABLE_MAX);
_tableOffsetUniforms.resize(TABLE_MAX);
glGenBuffers(TABLE_MAX, &_tableBuffers[0]);
glGenTextures(TABLE_MAX, &_tableTextures[0]);
subComputeFace = 0;
subComputeEdge = 0;
subComputeVertexA = 0;
subComputeVertexB = 0;
uniformVertexPass = 0;
uniformIndexStart = 0;
uniformIndexOffset = 0;
compile(shaderSource, shaderDefines);
subComputeFace = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeFace");
subComputeEdge = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeEdge");
subComputeVertexA = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeVertexA");
subComputeVertexB = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeVertexB");
subComputeLoopVertexB = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "loopComputeVertexB");
uniformVertexPass = glGetUniformLocation(_prgKernel, "vertexPass");
uniformIndexStart = glGetUniformLocation(_prgKernel, "indexStart");
uniformIndexOffset = glGetUniformLocation(_prgKernel, "indexOffset");
_tableUniforms[F_IT] = glGetUniformLocation(_prgKernel, "_F0_IT");
_tableUniforms[F_ITa] = glGetUniformLocation(_prgKernel, "_F0_ITa");
_tableUniforms[E_IT] = glGetUniformLocation(_prgKernel, "_E0_IT");
_tableUniforms[V_IT] = glGetUniformLocation(_prgKernel, "_V0_IT");
_tableUniforms[V_ITa] = glGetUniformLocation(_prgKernel, "_V0_ITa");
_tableUniforms[E_W] = glGetUniformLocation(_prgKernel, "_E0_S");
_tableUniforms[V_W] = glGetUniformLocation(_prgKernel, "_V0_S");
_tableOffsetUniforms[F_IT] = glGetUniformLocation(_prgKernel, "F_IT_ofs");
_tableOffsetUniforms[F_ITa] = glGetUniformLocation(_prgKernel, "F_ITa_ofs");
_tableOffsetUniforms[E_IT] = glGetUniformLocation(_prgKernel, "E_IT_ofs");
_tableOffsetUniforms[V_IT] = glGetUniformLocation(_prgKernel, "V_IT_ofs");
_tableOffsetUniforms[V_ITa] = glGetUniformLocation(_prgKernel, "V_ITa_ofs");
_tableOffsetUniforms[E_W] = glGetUniformLocation(_prgKernel, "E_W_ofs");
_tableOffsetUniforms[V_W] = glGetUniformLocation(_prgKernel, "V_W_ofs");
}
OsdGlslKernelDispatcher::~OsdGlslKernelDispatcher() {
if (_prgKernel)
glDeleteProgram(_prgKernel);
glDeleteTextures(1, &_vertexTexture);
glDeleteTextures(1, &_varyingTexture);
@ -163,98 +128,134 @@ OsdGlslKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr)
}
void
OsdGlslKernelDispatcher::BeginLaunchKernel() {
OsdGlslKernelDispatcher::OnKernelLaunch() {
glUseProgram(_prgKernel);
glEnable(GL_RASTERIZER_DISCARD);
_shader->UseProgram();
//XXX what if loop..
bindTextureBuffer(_tableUniforms[F_IT], _tableBuffers[F_IT],
bindTextureBuffer(_shader->GetTableUniform(F_IT), _tableBuffers[F_IT],
_tableTextures[F_IT], GL_R32UI, 2);
bindTextureBuffer(_tableUniforms[F_ITa], _tableBuffers[F_ITa],
bindTextureBuffer(_shader->GetTableUniform(F_ITa), _tableBuffers[F_ITa],
_tableTextures[F_ITa], GL_R32I, 3);
#ifdef OPT_E0_IT_VEC4
bindTextureBuffer(_tableUniforms[E_IT], _tableBuffers[E_IT],
bindTextureBuffer(_shader->GetTableUniform(E_IT), _tableBuffers[E_IT],
_tableTextures[E_IT], GL_RGBA32UI, 4);
#else
bindTextureBuffer(_tableUniforms[E_IT], _tableBuffers[E_IT],
bindTextureBuffer(_shader->GetTableUniform(E_IT), _tableBuffers[E_IT],
_tableTextures[E_IT], GL_R32UI, 4);
#endif
#ifdef OPT_CATMARK_V_IT_VEC2
bindTextureBuffer(_tableUniforms[V_IT], _tableBuffers[V_IT],
bindTextureBuffer(_shader->GetTableUniform(V_IT), _tableBuffers[V_IT],
_tableTextures[V_IT], GL_RG32UI, 5);
#else
bindTextureBuffer(_tableUniforms[V_IT], _tableBuffers[V_IT],
bindTextureBuffer(_shader->GetTableUniform(V_IT), _tableBuffers[V_IT],
_tableTextures[V_IT], GL_R32UI, 5);
#endif
bindTextureBuffer(_tableUniforms[V_ITa], _tableBuffers[V_ITa],
bindTextureBuffer(_shader->GetTableUniform(V_ITa), _tableBuffers[V_ITa],
_tableTextures[V_ITa], GL_R32I, 6);
#ifdef OPT_E0_S_VEC2
bindTextureBuffer(_tableUniforms[E_W], _tableBuffers[E_W],
bindTextureBuffer(_shader->GetTableUniform(E_W), _tableBuffers[E_W],
_tableTextures[E_W], GL_RG32F, 7);
#else
bindTextureBuffer(_tableUniforms[E_W], _tableBuffers[E_W],
bindTextureBuffer(_shader->GetTableUniform(E_W), _tableBuffers[E_W],
_tableTextures[E_W], GL_R32F, 7);
#endif
bindTextureBuffer(_tableUniforms[V_W], _tableBuffers[V_W],
bindTextureBuffer(_shader->GetTableUniform(V_W), _tableBuffers[V_W],
_tableTextures[V_W], GL_R32F, 8);
}
void
OsdGlslKernelDispatcher::EndLaunchKernel() {
OsdGlslKernelDispatcher::OnKernelFinish() {
unbindTextureBuffer(2);
unbindTextureBuffer(3);
unbindTextureBuffer(4);
unbindTextureBuffer(5);
unbindTextureBuffer(6);
unbindTextureBuffer(7);
unbindTextureBuffer(8);
glDisable(GL_RASTERIZER_DISCARD);
glUseProgram(0);
// XXX Unbind table buffer
}
OsdVertexBuffer *
OsdGlslKernelDispatcher::InitializeVertexBuffer(int numElements, int count)
OsdGlslKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices)
{
return new OsdGpuVertexBuffer(numElements, count);
return new OsdGpuVertexBuffer(numElements, numVertices);
}
void
OsdGlslKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
OsdGpuVertexBuffer *bVertex = dynamic_cast<OsdGpuVertexBuffer *>(vertex);
OsdGpuVertexBuffer *bVarying = dynamic_cast<OsdGpuVertexBuffer *>(varying);
if (vertex)
_currentVertexBuffer = dynamic_cast<OsdGpuVertexBuffer *>(vertex);
else
_currentVertexBuffer = NULL;
if (bVertex) {
_vertexBuffer = bVertex->GetGpuBuffer();
bindTextureBuffer(_vertexUniform, _vertexBuffer, _vertexTexture, GL_RGB32F, 0);
if (varying)
_currentVaryingBuffer = dynamic_cast<OsdGpuVertexBuffer *>(varying);
else
_currentVaryingBuffer = NULL;
int numVertexElements = vertex ? vertex->GetNumElements() : 0;
int numVaryingElements = varying ? varying->GetNumElements() : 0;
// find appropriate shader program from registry (compile it if needed)
std::vector<ComputeShader>::iterator it =
std::find_if(shaderRegistry.begin(), shaderRegistry.end(),
ComputeShader::Match(numVertexElements, numVaryingElements));
_shader = NULL;
if (it != shaderRegistry.end()) {
_shader = &(*it);
} else {
shaderRegistry.push_back(ComputeShader());
_shader = &shaderRegistry.back();
_shader->Compile(numVertexElements, numVaryingElements);
}
if (bVarying) {
_varyingBuffer = bVarying->GetGpuBuffer();
bindTextureBuffer(_varyingUniform, _varyingBuffer, _varyingTexture, GL_R32F, 0);
_shader->UseProgram(); // need to bind textures
// bind vertex texture
if (_currentVertexBuffer) {
bindTextureBuffer(_shader->GetVertexUniform(), _currentVertexBuffer->GetGpuBuffer(), _vertexTexture, GL_RGB32F, 0);
}
glUseProgram(_prgKernel);
glUniform1i(_vertexUniform, 0);
if (_currentVaryingBuffer) {
bindTextureBuffer(_shader->GetVaryingUniform(), _currentVaryingBuffer->GetGpuBuffer(), _varyingTexture, GL_R32F, 1);
}
#if 0 // experiment to use image store function
glActiveTexture(GL_TEXTURE0 + 0);
glBindImageTextureEXT(0, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F);
if (_numVarying > 0) {
glUniform1i(_varyingUniform, 1);
glBindImageTextureEXT(1, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F);
}
#endif
CHECK_GL_ERROR("BindVertexBuffer \n");
}
void
OsdGlslKernelDispatcher::UnbindVertexBuffer()
{
if (_currentVertexBuffer) {
unbindTextureBuffer(0);
}
if (_currentVaryingBuffer) {
unbindTextureBuffer(1);
}
_currentVertexBuffer = NULL;
_currentVaryingBuffer = NULL;
}
void
OsdGlslKernelDispatcher::Synchronize() {
glFinish();
@ -263,14 +264,15 @@ OsdGlslKernelDispatcher::Synchronize() {
void
OsdGlslKernelDispatcher::bindTextureBuffer(
GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const {
if (sampler == -1) {
OSD_ERROR("BindTextureError:: sampler = %d\n", sampler);
return;
}
OSD_DEBUG("BindTextureBuffer sampler=%d, buffer=%d, texture = %d, E%x\n", sampler, buffer, texture, glGetError());
OSD_DEBUG("BindTextureBuffer unit=%d, sampler=%d, buffer=%d, texture = %d, E%x\n", unit, sampler, buffer, texture, glGetError());
glUniform1i(sampler, unit);
CHECK_GL_ERROR("BindTextureBuffer glUniform %d\n", unit);
glActiveTexture(GL_TEXTURE0 + unit);
CHECK_GL_ERROR("BindTextureBuffer glActiveTexture %d\n", unit);
glBindTexture(GL_TEXTURE_BUFFER, texture);
@ -280,38 +282,137 @@ OsdGlslKernelDispatcher::bindTextureBuffer(
glActiveTexture(GL_TEXTURE0);
}
bool
OsdGlslKernelDispatcher::compile(const char *shaderSource, const char *shaderDefine) {
_prgKernel = glCreateProgram();
void
OsdGlslKernelDispatcher::unbindTextureBuffer(int unit) const {
glActiveTexture(GL_TEXTURE0 + unit);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
void
OsdGlslKernelDispatcher::ApplyCatmarkFaceVerticesKernel(
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
_shader->ApplyCatmarkFaceVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer,
_tableOffsets[F_IT][level-1],
_tableOffsets[F_ITa][level-1],
offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyCatmarkEdgeVerticesKernel(
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
_shader->ApplyCatmarkEdgeVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer,
_tableOffsets[E_IT][level-1],
_tableOffsets[E_W][level-1],
offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelB(
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
_shader->ApplyCatmarkVertexVerticesKernelB(_currentVertexBuffer, _currentVaryingBuffer,
_tableOffsets[V_IT][level-1],
_tableOffsets[V_ITa][level-1],
_tableOffsets[V_W][level-1],
offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelA(
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
_shader->ApplyCatmarkVertexVerticesKernelA(_currentVertexBuffer, _currentVaryingBuffer,
_tableOffsets[V_ITa][level-1],
_tableOffsets[V_W][level-1],
offset, pass, start, end);
}
void
OsdGlslKernelDispatcher::ApplyLoopEdgeVerticesKernel(
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
_shader->ApplyLoopEdgeVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer,
_tableOffsets[E_IT][level-1],
_tableOffsets[E_W][level-1],
offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelB(
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
_shader->ApplyLoopVertexVerticesKernelB(_currentVertexBuffer, _currentVaryingBuffer,
_tableOffsets[V_IT][level-1],
_tableOffsets[V_ITa][level-1],
_tableOffsets[V_W][level-1],
offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelA(
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
_shader->ApplyLoopVertexVerticesKernelA(_currentVertexBuffer, _currentVaryingBuffer,
_tableOffsets[V_ITa][level-1],
_tableOffsets[V_W][level-1],
offset, pass, start, end);
}
// -------------------------------------------------------------------------------
OsdGlslKernelDispatcher::ComputeShader::ComputeShader() :
_program(0)
{
}
OsdGlslKernelDispatcher::ComputeShader::~ComputeShader()
{
if (_program)
glDeleteProgram(_program);
}
bool
OsdGlslKernelDispatcher::ComputeShader::Compile(int numVertexElements, int numVaryingElements) {
// XXX: NOTE: current GLSL only supports numVertexElements = 6!!!
assert(numVertexElements == 6);
_numVertexElements = numVertexElements;
_numVaryingElements = numVaryingElements;
_program = glCreateProgram();
GLuint shader = glCreateShader(GL_VERTEX_SHADER);
char constantDefine[256];
snprintf(constantDefine, 256,
"#define NUM_VARYING_ELEMENTS %d\n", _numVarying);
"#define NUM_VARYING_ELEMENTS %d\n", numVaryingElements);
const char *shaderSources[3];
shaderSources[0] = constantDefine;
shaderSources[1] = shaderDefine;
shaderSources[1] = shaderDefines;
shaderSources[2] = shaderSource;
glShaderSource(shader, 3, shaderSources, NULL);
glCompileShader(shader);
glAttachShader(_prgKernel, shader);
glAttachShader(_program, shader);
const char *outputs[] = { "outPosition",
"outNormal",
"gl_NextBuffer",
"outVaryingData" };
int nOutputs = _numVarying > 0 ? 4 : 2;
glTransformFeedbackVaryings(_prgKernel, nOutputs, outputs, GL_INTERLEAVED_ATTRIBS);
int nOutputs = numVaryingElements > 0 ? 4 : 2;
glTransformFeedbackVaryings(_program, nOutputs, outputs, GL_INTERLEAVED_ATTRIBS);
CHECK_GL_ERROR("Transform feedback initialize \n");
GLint linked = 0;
glLinkProgram(_prgKernel);
glGetProgramiv(_prgKernel, GL_LINK_STATUS, &linked);
glLinkProgram(_program);
glGetProgramiv(_program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
OSD_ERROR("Fail to link shader\n");
@ -320,56 +421,79 @@ OsdGlslKernelDispatcher::compile(const char *shaderSource, const char *shaderDef
glGetShaderInfoLog(shader, 1024, NULL, buffer);
OSD_ERROR(buffer);
glGetProgramInfoLog(_prgKernel, 1024, NULL, buffer);
glGetProgramInfoLog(_program, 1024, NULL, buffer);
OSD_ERROR(buffer);
glDeleteProgram(_prgKernel);
_prgKernel = 0;
glDeleteProgram(_program);
_program = 0;
// XXX ERROR HANDLE
return false;
}
glDeleteShader(shader);
_vertexUniform = glGetUniformLocation(_prgKernel, "vertex");
_varyingUniform = glGetUniformLocation(_prgKernel, "varyingData");
_vertexUniform = glGetUniformLocation(_program, "vertex");
_varyingUniform = glGetUniformLocation(_program, "varyingData");
_subComputeFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeFace");
_subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge");
_subComputeVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexA");
_subComputeVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexB");
_subComputeLoopVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "loopComputeVertexB");
_uniformVertexPass = glGetUniformLocation(_program, "vertexPass");
_uniformIndexStart = glGetUniformLocation(_program, "indexStart");
_uniformIndexOffset = glGetUniformLocation(_program, "indexOffset");
_tableUniforms.resize(TABLE_MAX);
_tableOffsetUniforms.resize(TABLE_MAX);
_tableUniforms[F_IT] = glGetUniformLocation(_program, "_F0_IT");
_tableUniforms[F_ITa] = glGetUniformLocation(_program, "_F0_ITa");
_tableUniforms[E_IT] = glGetUniformLocation(_program, "_E0_IT");
_tableUniforms[V_IT] = glGetUniformLocation(_program, "_V0_IT");
_tableUniforms[V_ITa] = glGetUniformLocation(_program, "_V0_ITa");
_tableUniforms[E_W] = glGetUniformLocation(_program, "_E0_S");
_tableUniforms[V_W] = glGetUniformLocation(_program, "_V0_S");
_tableOffsetUniforms[F_IT] = glGetUniformLocation(_program, "F_IT_ofs");
_tableOffsetUniforms[F_ITa] = glGetUniformLocation(_program, "F_ITa_ofs");
_tableOffsetUniforms[E_IT] = glGetUniformLocation(_program, "E_IT_ofs");
_tableOffsetUniforms[V_IT] = glGetUniformLocation(_program, "V_IT_ofs");
_tableOffsetUniforms[V_ITa] = glGetUniformLocation(_program, "V_ITa_ofs");
_tableOffsetUniforms[E_W] = glGetUniformLocation(_program, "E_W_ofs");
_tableOffsetUniforms[V_W] = glGetUniformLocation(_program, "V_W_ofs");
return true;
}
void
OsdGlslKernelDispatcher::unbindTextureBuffer(int unit) const {
glActiveTexture(GL_TEXTURE0 + unit);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
void
OsdGlslKernelDispatcher::transformGpuBufferData(GLuint kernel, GLint offset, int start, int end, bool vertexPass) const {
OsdGlslKernelDispatcher::ComputeShader::transformGpuBufferData(OsdGpuVertexBuffer *vertexBuffer, OsdGpuVertexBuffer *varyingBuffer,
GLint offset, int start, int end) const {
int count = end - start;
if (count <= 0) return;
OSD_DEBUG("_transformGpuBufferData kernel=%d E%x, offset=%d, count=%d\n", kernel, glGetError(), offset, count);
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &kernel);
glUniform1i(uniformVertexPass, vertexPass); // XXX
OSD_DEBUG("_transformGpuBufferData offset=%d, count=%d\n", glGetError(), offset, count);
// set batch range
glUniform1i(uniformIndexStart, start);
glUniform1i(uniformIndexOffset, offset);
glUniform1i(_uniformIndexStart, start);
glUniform1i(_uniformIndexOffset, offset);
// XXX: end is not used here now
CHECK_GL_ERROR("Uniform index set at offset=%d. start=%d\n", offset, start);
// set transform feedback buffer
int vertexStride = _numVertexElements*sizeof(float);
int varyingStride = _numVarying*sizeof(float);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, _vertexBuffer,
(start+offset)*vertexStride, count*vertexStride);
CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n");
if (vertexBuffer) {
int vertexStride = vertexBuffer->GetNumElements()*sizeof(float);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vertexBuffer->GetGpuBuffer(),
(start+offset)*vertexStride, count*vertexStride);
}
if (_numVarying > 0){
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, _varyingBuffer,
if (varyingBuffer){
int varyingStride = varyingBuffer->GetNumElements()*sizeof(float);
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, varyingBuffer->GetGpuBuffer(),
(start+offset)*varyingStride, count*varyingStride);
}
CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n");
glBeginTransformFeedback(GL_POINTS);
CHECK_GL_ERROR("transformGpuBufferData glBeginTransformFeedback\n");
@ -387,68 +511,84 @@ OsdGlslKernelDispatcher::transformGpuBufferData(GLuint kernel, GLint offset, int
}
void
OsdGlslKernelDispatcher::ApplyCatmarkFaceVerticesKernel(
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
glUniform1i(_tableOffsetUniforms[F_IT], _tableOffsets[F_IT][level-1]);
glUniform1i(_tableOffsetUniforms[F_ITa], _tableOffsets[F_ITa][level-1]);
transformGpuBufferData(subComputeFace, offset, start, end);
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkFaceVerticesKernel(
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int F_IT_ofs, int F_ITa_ofs, int offset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeFace);
glUniform1i(_tableOffsetUniforms[F_IT], F_IT_ofs);
glUniform1i(_tableOffsetUniforms[F_ITa], F_ITa_ofs);
transformGpuBufferData(vertex, varying, offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyCatmarkEdgeVerticesKernel(
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
glUniform1i(_tableOffsetUniforms[E_IT], _tableOffsets[E_IT][level-1]);
glUniform1i(_tableOffsetUniforms[E_W], _tableOffsets[E_W][level-1]);
transformGpuBufferData(subComputeEdge, offset, start, end);
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkEdgeVerticesKernel(
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int E_IT_ofs, int E_W_ofs, int offset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge);
glUniform1i(_tableOffsetUniforms[E_IT], E_IT_ofs);
glUniform1i(_tableOffsetUniforms[E_W], E_W_ofs);
transformGpuBufferData(vertex, varying, offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelB(
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkVertexVerticesKernelB(
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end) {
glUniform1i(_tableOffsetUniforms[V_IT], _tableOffsets[V_IT][level-1]);
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
transformGpuBufferData(subComputeVertexB, offset, start, end);
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexB);
glUniform1i(_tableOffsetUniforms[V_IT], V_IT_ofs);
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
transformGpuBufferData(vertex, varying, offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelA(
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkVertexVerticesKernelA(
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end) {
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
transformGpuBufferData(subComputeVertexA, offset, start, end, pass);
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA);
glUniform1i(_uniformVertexPass, pass ? 1 : 0);
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
transformGpuBufferData(vertex, varying, offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyLoopEdgeVerticesKernel(
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
OsdGlslKernelDispatcher::ComputeShader::ApplyLoopEdgeVerticesKernel(
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int E_IT_ofs, int E_W_ofs, int offset, int start, int end) {
glUniform1i(_tableOffsetUniforms[E_IT], _tableOffsets[E_IT][level-1]);
glUniform1i(_tableOffsetUniforms[E_W], _tableOffsets[E_W][level-1]);
transformGpuBufferData(subComputeEdge, offset, start, end);
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge);
glUniform1i(_tableOffsetUniforms[E_IT], E_IT_ofs);
glUniform1i(_tableOffsetUniforms[E_W], E_W_ofs);
transformGpuBufferData(vertex, varying, offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelB(
FarMesh<OsdVertex> * mesh, int offset, int level, int start, int end, void * data) const {
OsdGlslKernelDispatcher::ComputeShader::ApplyLoopVertexVerticesKernelB(
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end) {
glUniform1i(_tableOffsetUniforms[V_IT], _tableOffsets[V_IT][level-1]);
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
transformGpuBufferData(subComputeLoopVertexB, offset, start, end);
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeLoopVertexB);
glUniform1i(_tableOffsetUniforms[V_IT], V_IT_ofs);
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
transformGpuBufferData(vertex, varying, offset, start, end);
}
void
OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelA(
FarMesh<OsdVertex> * mesh, int offset, bool pass, int level, int start, int end, void * data) const {
OsdGlslKernelDispatcher::ComputeShader::ApplyLoopVertexVerticesKernelA(
OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end) {
glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]);
glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]);
transformGpuBufferData(subComputeVertexA, offset, start, end, pass);
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA);
glUniform1i(_uniformVertexPass, pass ? 1 : 0);
glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs);
glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs);
transformGpuBufferData(vertex, varying, offset, start, end);
}
} // end namespace OPENSUBDIV_VERSION

View File

@ -89,11 +89,11 @@ public:
virtual void CopyTable(int tableIndex, size_t size, const void *ptr);
virtual void BeginLaunchKernel();
virtual void EndLaunchKernel();
virtual void OnKernelLaunch();
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count);
virtual void OnKernelFinish();
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices);
virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
@ -110,42 +110,100 @@ public:
protected:
class ComputeShader {
public:
ComputeShader();
~ComputeShader();
bool Compile(int numVertexElements, int numVaryingElements);
GLuint GetTableUniform(int table) const {
return _tableUniforms[table];
}
GLuint GetVertexUniform() const { return _vertexUniform; }
GLuint GetVaryingUniform() const { return _varyingUniform; }
void ApplyCatmarkFaceVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int F_IT_ofs, int F_ITa_ofs, int offset, int start, int end);
void ApplyCatmarkEdgeVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int E_IT_ofs, int E_W_ofs, int offset, int start, int end);
void ApplyCatmarkVertexVerticesKernelB(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end);
void ApplyCatmarkVertexVerticesKernelA(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end);
void ApplyLoopEdgeVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int E_IT_ofs, int E_W_ofs, int offset, int start, int end);
void ApplyLoopVertexVerticesKernelB(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end);
void ApplyLoopVertexVerticesKernelA(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end);
void UseProgram () const {
glUseProgram(_program);
}
struct Match {
Match(int numVertexElements, int numVaryingElements) :
_numVertexElements(numVertexElements), _numVaryingElements(numVaryingElements) {}
bool operator() (const ComputeShader &shader) {
return (shader._numVertexElements == _numVertexElements
&& shader._numVaryingElements == _numVaryingElements);
}
int _numVertexElements, _numVaryingElements;
};
friend class Match;
private:
void transformGpuBufferData(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying,
GLint offset, int start, int end) const;
int _numVertexElements;
int _numVaryingElements;
GLuint _program;
GLuint _uniformVertexPass;
GLuint _uniformIndexStart;
GLuint _uniformIndexOffset;
GLuint _vertexUniform,
_varyingUniform;
// shader locations
GLuint _subComputeFace, _subComputeEdge, _subComputeVertexA, _subComputeVertexB;
GLuint _subComputeLoopVertexB;
std::vector<GLuint> _tableUniforms;
std::vector<GLuint> _tableOffsetUniforms;
};
void bindTextureBuffer(GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const;
void unbindTextureBuffer(int unit) const;
void transformGpuBufferData(GLuint kernel, GLint offset, int start, int end, bool vertexPass=false) const;
bool compile(const char *shaderSource, const char *shaderDefine);
GLuint _prgKernel;
int _numVertexElements,
_numVarying;
GLuint _vertexBuffer,
_varyingBuffer;
ComputeShader * _shader;
// texture for vertex
GLuint _vertexTexture,
_varyingTexture;
GLuint _vertexUniform,
_varyingUniform;
OsdGpuVertexBuffer *_currentVertexBuffer,
*_currentVaryingBuffer;
// table buffers
std::vector<GLuint> _tableBuffers;
std::vector<GLuint> _tableTextures;
std::vector<GLuint> _tableUniforms;
std::vector<GLuint> _tableOffsetUniforms;
GLuint uniformVertexPass;
GLuint uniformIndexStart;
GLuint uniformIndexOffset;
// shader locations
GLuint subComputeFace, subComputeEdge, subComputeVertexA, subComputeVertexB;
GLuint subComputeLoopVertexB;
// static shader registry (XXX tentative..)
static std::vector<ComputeShader> shaderRegistry;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -82,9 +82,9 @@ public:
virtual void CopyTable(int tableIndex, size_t size, const void *ptr) = 0;
virtual void BeginLaunchKernel() = 0;
virtual void OnKernelLaunch() = 0;
virtual void EndLaunchKernel() = 0;
virtual void OnKernelFinish() = 0;
virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count) = 0;

View File

@ -102,7 +102,6 @@ OsdMesh::Create(OsdHbrMesh *hbrMesh, int level, const std::string &kernel) {
OSD_DEBUG("PREP: NumCoarseVertex = %d\n", _mMesh->GetNumCoarseVertices());
OSD_DEBUG("PREP: NumVertex = %d\n", _mMesh->GetNumVertices());
OSD_DEBUG("PREP: NumTables = %d\n", _mMesh->GetNumSubdivisionTables());
const FarSubdivisionTables<OsdVertex>* table = _mMesh->GetSubdivision();
@ -145,11 +144,11 @@ OsdMesh::Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) {
_dispatcher->BindVertexBuffer(vertex, varying);
_dispatcher->BeginLaunchKernel();
_dispatcher->OnKernelLaunch();
_mMesh->Subdivide(_level+1);
_dispatcher->EndLaunchKernel();
_dispatcher->OnKernelFinish();
_dispatcher->UnbindVertexBuffer();
}

View File

@ -95,7 +95,13 @@ public:
OsdVertexBuffer *InitializeVertexBuffer(int numElements);
void Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying);
// for non-interleaved vertex data
void Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying = NULL);
/*
// for interleaved vertex data ?
template <class T> void Subdivide(T *vertex) { }
*/
void Synchronize();

View File

@ -9,69 +9,83 @@ namespace OPENSUBDIV_VERSION {
class OsdVertexBuffer {
public:
OsdVertexBuffer(int numElements) : _numElements(numElements) {}
virtual ~OsdVertexBuffer() {}
virtual void UpdateData(const float *src, int count) = 0;
virtual void UpdateData(const float *src, int numVertices) = 0;
virtual GLuint GetGpuBuffer() = 0;
int GetNumElements() const {
return _numElements;
}
protected:
int _numElements;
};
class OsdGpuVertexBuffer : public OsdVertexBuffer {
public:
OsdGpuVertexBuffer(int numElements, int count) : _vbo(0), _numElements(numElements) {
int stride = numElements * count * sizeof(float);
OsdGpuVertexBuffer(int numElements, int numVertices) : OsdVertexBuffer(numElements), _vbo(0) {
int size = numElements * numVertices * sizeof(float);
glGenBuffers(1, &_vbo);
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
glBufferData(GL_ARRAY_BUFFER, stride, 0, GL_STREAM_DRAW);
glBufferData(GL_ARRAY_BUFFER, size, 0, GL_STREAM_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
virtual ~OsdGpuVertexBuffer() {
glDeleteBuffers(1, &_vbo);
}
virtual void UpdateData(const float *src, int count) {
virtual void UpdateData(const float *src, int numVertices) {
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
float * pointer = (float*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
memcpy(pointer, src, _numElements * count * sizeof(float));
memcpy(pointer, src, _numElements * numVertices * sizeof(float));
glUnmapBuffer(GL_ARRAY_BUFFER);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
virtual GLuint GetGpuBuffer() {
return _vbo;
}
private:
protected:
GLuint _vbo;
int _numElements;
};
class OsdCpuVertexBuffer : public OsdVertexBuffer {
public:
OsdCpuVertexBuffer(int numElements, int count) : _cpuVbo(NULL), _vboSize(0), _numElements(numElements), _vbo(0) {
_cpuVbo = new float[numElements * count];
_vboSize = numElements * count;
OsdCpuVertexBuffer(int numElements, int numVertices) : OsdVertexBuffer(numElements), _cpuVbo(NULL), _vboSize(0), _vbo(0) {
_vboSize = numElements * numVertices;
_cpuVbo = new float[numElements * numVertices];
}
virtual ~OsdCpuVertexBuffer() {
if(_cpuVbo) delete[] _cpuVbo;
if(_vbo) glDeleteBuffers(1, &_vbo);
delete [] _cpuVbo;
if (_vbo)
glDeleteBuffers(1, &_vbo);
}
virtual void UpdateData(const float *src, int count) {
memcpy(_cpuVbo, src, _numElements * count * sizeof(float));
virtual void UpdateData(const float *src, int numVertices) {
memcpy(_cpuVbo, src, _numElements * numVertices * sizeof(float));
}
float *GetCpuBuffer() {
return _cpuVbo;
}
// XXX: this method name is missleading
virtual GLuint GetGpuBuffer() {
if(!_vbo) glGenBuffers(1, &_vbo);
if (!_vbo)
glGenBuffers(1, &_vbo);
glBindBuffer(GL_ARRAY_BUFFER, _vbo);
glBufferData(GL_ARRAY_BUFFER, _vboSize * sizeof(float), _cpuVbo, GL_STREAM_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
return _vbo;
}
int GetNumElements() const {
return _numElements;
}
private:
protected:
float *_cpuVbo;
int _vboSize;
int _numElements;
GLuint _vbo;
};