diff --git a/opensubdiv/osd/cpuDispatcher.cpp b/opensubdiv/osd/cpuDispatcher.cpp index 222ab39a..02c2f904 100644 --- a/opensubdiv/osd/cpuDispatcher.cpp +++ b/opensubdiv/osd/cpuDispatcher.cpp @@ -66,29 +66,33 @@ namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { -OsdCpuKernelDispatcher::DeviceTable::~DeviceTable() { +OsdCpuKernelDispatcher::SubdivisionTable::~SubdivisionTable() { - if (devicePtr) - free(devicePtr); + if (ptr) + free(ptr); } void -OsdCpuKernelDispatcher::DeviceTable::Copy( int size, const void *table ) { +OsdCpuKernelDispatcher::SubdivisionTable::Copy( int size, const void *table ) { if (size > 0) { - if (devicePtr) - free(devicePtr); - devicePtr = malloc(size); - memcpy(devicePtr, table, size); + if (ptr) + free(ptr); + ptr = malloc(size); + memcpy(ptr, table, size); } } OsdCpuKernelDispatcher::OsdCpuKernelDispatcher( int levels ) - : OsdKernelDispatcher(levels) { + : OsdKernelDispatcher(levels), _currentVertexBuffer(NULL), _currentVaryingBuffer(NULL), _vdesc(NULL) { _tables.resize(TABLE_MAX); } -OsdCpuKernelDispatcher::~OsdCpuKernelDispatcher() { } +OsdCpuKernelDispatcher::~OsdCpuKernelDispatcher() { + + if (_vdesc) + delete _vdesc; +} void OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) { @@ -96,30 +100,37 @@ OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) _tables[tableIndex].Copy(size, ptr); } -void -OsdCpuKernelDispatcher::BeginLaunchKernel() { } - -void -OsdCpuKernelDispatcher::EndLaunchKernel() { } - OsdVertexBuffer * -OsdCpuKernelDispatcher::InitializeVertexBuffer(int numElements, int count) +OsdCpuKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices) { - return new OsdCpuVertexBuffer(numElements, count); + return new OsdCpuVertexBuffer(numElements, numVertices); } void OsdCpuKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) { - _vertexBuffer = dynamic_cast(vertex); - _varyingBuffer = dynamic_cast(varying); + if (vertex) + _currentVertexBuffer = dynamic_cast(vertex); + else + _currentVertexBuffer = NULL; + + if (varying) + _currentVaryingBuffer = dynamic_cast(varying); + else + _currentVaryingBuffer = NULL; + + _vdesc = new VertexDescriptor(_currentVertexBuffer ? _currentVertexBuffer->GetNumElements() : 0, + _currentVaryingBuffer ? _currentVaryingBuffer->GetNumElements() : 0); } void OsdCpuKernelDispatcher::UnbindVertexBuffer() { - _vertexBuffer = NULL; - _varyingBuffer = NULL; + delete _vdesc; + _vdesc = NULL; + + _currentVertexBuffer = NULL; + _currentVaryingBuffer = NULL; } void @@ -129,21 +140,17 @@ OsdCpuKernelDispatcher::Synchronize() { } void OsdCpuKernelDispatcher::ApplyBilinearFaceVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_numVertexElements, _numVaryingElements); - - computeFace(&vd, _vbo, _varyingVbo, - (int*)_tables[F_IT].devicePtr + _tableOffsets[F_IT][level-1], - (int*)_tables[F_ITa].devicePtr + _tableOffsets[F_ITa][level-1], + computeFace(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[F_IT].ptr + _tableOffsets[F_IT][level-1], + (int*)_tables[F_ITa].ptr + _tableOffsets[F_ITa][level-1], offset, start, end); } void OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_numVertexElements, _numVaryingElements); - - computeBilinearEdge(&vd, _vbo, _varyingVbo, - (int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1], + computeBilinearEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1], offset, start, end); } @@ -151,38 +158,26 @@ OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh * me void OsdCpuKernelDispatcher::ApplyBilinearVertexVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_numVertexElements, _numVaryingElements); - - computeBilinearVertex(&vd, _vbo, _varyingVbo, - (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1], + computeBilinearVertex(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1], offset, start, end); } void OsdCpuKernelDispatcher::ApplyCatmarkFaceVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeFace(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[F_IT].devicePtr + _tableOffsets[F_IT][level-1], - (int*)_tables[F_ITa].devicePtr + _tableOffsets[F_ITa][level-1], + computeFace(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[F_IT].ptr + _tableOffsets[F_IT][level-1], + (int*)_tables[F_ITa].ptr + _tableOffsets[F_ITa][level-1], offset, start, end); - - float *p = _vertexBuffer->GetCpuBuffer(); - for(int i = 0; i < 150; i+=3){ - printf("%f %f %f\n", p[0], p[1], p[2]); - p+=3; - } } void OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeEdge(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1], - (float*)_tables[E_W].devicePtr + _tableOffsets[E_W][level-1], + computeEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1], + (float*)_tables[E_W].ptr + _tableOffsets[E_W][level-1], offset, start, end); } @@ -190,34 +185,28 @@ OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh * mes void OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelB( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeVertexB(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1], - (int*)_tables[V_IT].devicePtr + _tableOffsets[V_IT][level-1], - (float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1], + computeVertexB(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1], + (int*)_tables[V_IT].ptr + _tableOffsets[V_IT][level-1], + (float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1], offset, start, end); } void OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelA( FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeVertexA(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1], - (float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1], + computeVertexA(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1], + (float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1], offset, start, end, pass); } void OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeEdge(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1], - (float*)_tables[E_W].devicePtr + _tableOffsets[E_W][level-1], + computeEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1], + (float*)_tables[E_W].ptr + _tableOffsets[E_W][level-1], offset, start, end); } @@ -225,28 +214,23 @@ OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh * mesh, void OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelB( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeLoopVertexB(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1], - (int*)_tables[V_IT].devicePtr + _tableOffsets[V_IT][level-1], - (float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1], + computeLoopVertexB(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1], + (int*)_tables[V_IT].ptr + _tableOffsets[V_IT][level-1], + (float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1], offset, start, end); } void OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelA( FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeVertexA(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1], - (float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1], + computeVertexA(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1], + (float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1], offset, start, end, pass); } } // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; } // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuDispatcher.h b/opensubdiv/osd/cpuDispatcher.h index 6be66766..79105d96 100644 --- a/opensubdiv/osd/cpuDispatcher.h +++ b/opensubdiv/osd/cpuDispatcher.h @@ -70,6 +70,8 @@ namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { +class VertexDescriptor; + class OsdCpuKernelDispatcher : public OsdKernelDispatcher { public: @@ -102,11 +104,11 @@ public: virtual void CopyTable(int tableIndex, size_t size, const void *ptr); - virtual void BeginLaunchKernel(); + virtual void OnKernelLaunch() {} - virtual void EndLaunchKernel(); + virtual void OnKernelFinish() {} - virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count); + virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices); virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying); @@ -123,18 +125,26 @@ public: protected: - struct DeviceTable { - DeviceTable() : devicePtr(NULL) { } - ~DeviceTable(); + struct SubdivisionTable { + SubdivisionTable() : ptr(NULL) { } + + ~SubdivisionTable(); void Copy(int size, const void *ptr); - void *devicePtr; + void *ptr; }; - OsdCpuVertexBuffer *_vertexBuffer, *_varyingBuffer; + float *GetVertexBuffer() const { return _currentVertexBuffer ? _currentVertexBuffer->GetCpuBuffer() : NULL; } - std::vector _tables; + float *GetVaryingBuffer() const { return _currentVaryingBuffer ? _currentVaryingBuffer->GetCpuBuffer() : NULL; } + + OsdCpuVertexBuffer *_currentVertexBuffer, + *_currentVaryingBuffer; + + VertexDescriptor *_vdesc; + + std::vector _tables; }; } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/cpuKernel.cpp b/opensubdiv/osd/cpuKernel.cpp index 2016b504..074f723d 100644 --- a/opensubdiv/osd/cpuKernel.cpp +++ b/opensubdiv/osd/cpuKernel.cpp @@ -64,9 +64,6 @@ namespace OPENSUBDIV_VERSION { void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying, const int *F_IT, const int *F_ITa, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -77,23 +74,19 @@ void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying float weight = 1.0f/n; // XXX: should use local vertex struct variable instead of accumulating directly into global memory. - float *dst = &vertex[(offset + i)*ve]; - float *dstVarying = &varying[(offset + i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); for (int j=0; jAddWithWeight(dst, &vertex[index*ve], weight); - vdesc->AddVaryingWithWeight(dstVarying, &varying[index*vev], weight); + vdesc->AddWithWeight(vertex, dstIndex, index, weight); + vdesc->AddVaryingWithWeight(varying, dstIndex, index, weight); } } } void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, const float *E_W, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -105,30 +98,26 @@ void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying, float vertWeight = E_W[i*2+0]; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); - vdesc->AddWithWeight(dst, &vertex[eidx0*ve], vertWeight); - vdesc->AddWithWeight(dst, &vertex[eidx1*ve], vertWeight); + vdesc->AddWithWeight(vertex, dstIndex, eidx0, vertWeight); + vdesc->AddWithWeight(vertex, dstIndex, eidx1, vertWeight); if (eidx2 != -1) { float faceWeight = E_W[i*2+1]; - vdesc->AddWithWeight(dst, &vertex[eidx2*ve], faceWeight); - vdesc->AddWithWeight(dst, &vertex[eidx3*ve], faceWeight); + vdesc->AddWithWeight(vertex, dstIndex, eidx2, faceWeight); + vdesc->AddWithWeight(vertex, dstIndex, eidx3, faceWeight); } - vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx0*vev], 0.5f); - vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx1*vev], 0.5f); + vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f); + vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f); } } void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const float *V_W, int offset, int start, int end, int pass) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -146,29 +135,25 @@ void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying if (weight>0.0f && weight<1.0f && n > 0) weight=1.0f-weight; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; + int dstIndex = offset + i; if(not pass) - vdesc->Clear(dst, dstVarying); + vdesc->Clear(vertex, varying, dstIndex); if (eidx0==-1 || (pass==0 && (n==-1)) ) { - vdesc->AddWithWeight(dst, &vertex[p*ve], weight); + vdesc->AddWithWeight(vertex, dstIndex, p, weight); } else { - vdesc->AddWithWeight(dst, &vertex[p*ve], weight * 0.75f); - vdesc->AddWithWeight(dst, &vertex[eidx0*ve], weight * 0.125f); - vdesc->AddWithWeight(dst, &vertex[eidx1*ve], weight * 0.125f); + vdesc->AddWithWeight(vertex, dstIndex, p, weight * 0.75f); + vdesc->AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f); + vdesc->AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f); } if (not pass) - vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0); + vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); } } void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -181,25 +166,21 @@ void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying float wp = 1.0f/float(n*n); float wv = (n-2.0f) * n * wp; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); - vdesc->AddWithWeight(dst, &vertex[p*ve], weight * wv); + vdesc->AddWithWeight(vertex, dstIndex, p, weight * wv); for (int j = 0; j < n; ++j) { - vdesc->AddWithWeight(dst, &vertex[V_IT[h+j*2]*ve], weight * wp); - vdesc->AddWithWeight(dst, &vertex[V_IT[h+j*2+1]*ve], weight * wp); + vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp); + vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp); } - vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0); + vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); } } void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -214,24 +195,20 @@ void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *var beta = beta * beta; beta = (0.625f - beta) * wp; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); - vdesc->AddWithWeight(dst, &vertex[p*ve], weight * (1.0f - (beta * n))); + vdesc->AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n))); for (int j = 0; j < n; ++j) - vdesc->AddWithWeight(dst, &vertex[V_IT[h+j]*ve], weight * beta); + vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta); - vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0f); + vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); } } void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -239,35 +216,30 @@ void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *va int eidx0 = E_IT[2*i+0]; int eidx1 = E_IT[2*i+1]; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); + + vdesc->AddWithWeight(vertex, dstIndex, eidx0, 0.5f); + vdesc->AddWithWeight(vertex, dstIndex, eidx1, 0.5f); - vdesc->AddWithWeight(dst, &vertex[eidx0*ve], 0.5f); - vdesc->AddWithWeight(dst, &vertex[eidx1*ve], 0.5f); - - vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx0*vev], 0.5f); - vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx1*vev], 0.5f); + vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f); + vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f); } } void computeBilinearVertex(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif for (int i = start; i < end; i++) { int p = V_ITa[i]; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); - vdesc->AddWithWeight(dst, &vertex[p*ve], 1.0f); - vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0f); + vdesc->AddWithWeight(vertex, dstIndex, p, 1.0f); + vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); } } diff --git a/opensubdiv/osd/cpuKernel.h b/opensubdiv/osd/cpuKernel.h index ba6d9293..c90e9f60 100644 --- a/opensubdiv/osd/cpuKernel.h +++ b/opensubdiv/osd/cpuKernel.h @@ -67,20 +67,28 @@ struct VertexDescriptor { VertexDescriptor(int numVertexElem, int numVaryingElem) : numVertexElements(numVertexElem), numVaryingElements(numVaryingElem) { } - void Clear(float *vertex, float *varying) const { - for (int i = 0; i < numVertexElements; ++i) - vertex[i] = 0.0f; + void Clear(float *vertex, float *varying, int index) const { + if (vertex) { + for (int i = 0; i < numVertexElements; ++i) + vertex[index*numVertexElements+i] = 0.0f; + } - for (int i = 0; i < numVaryingElements; ++i) - varying[i] = 0.0f; + if (varying) { + for (int i = 0; i < numVaryingElements; ++i) + varying[index*numVaryingElements+i] = 0.0f; + } } - void AddWithWeight(float *vertex, const float *src, float weight) const { + void AddWithWeight(float *vertex, int dstIndex, int srcIndex, float weight) const { + int d = dstIndex * numVertexElements; + int s = srcIndex * numVertexElements; for (int i = 0; i < numVertexElements; ++i) - vertex[i] += src[i] * weight; + vertex[d++] += vertex[s++] * weight; } - void AddVaryingWithWeight(float *varying, const float *src, float weight) const { + void AddVaryingWithWeight(float *varying, int dstIndex, int srcIndex, float weight) const { + int d = dstIndex * numVaryingElements; + int s = srcIndex * numVaryingElements; for (int i = 0; i < numVaryingElements; ++i) - varying[i] += src[i] * weight; + varying[d++] += varying[s++] * weight; } int numVertexElements; diff --git a/opensubdiv/osd/cudaDispatcher.cpp b/opensubdiv/osd/cudaDispatcher.cpp index b53e9e39..ce7e2f8d 100644 --- a/opensubdiv/osd/cudaDispatcher.cpp +++ b/opensubdiv/osd/cudaDispatcher.cpp @@ -81,7 +81,42 @@ void OsdCudaComputeBilinearVertex(float *vertex, float *varying, int numUserVert namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { +OsdCudaVertexBuffer::OsdCudaVertexBuffer(int numElements, int numVertices) : + OsdGpuVertexBuffer(numElements, numVertices) { + // register vbo as cuda resource + cudaGraphicsGLRegisterBuffer(&_cudaResource, _vbo, cudaGraphicsMapFlagsNone); +} + +void +OsdCudaVertexBuffer::UpdateData(const float *src, int numVertices) { + + void *dst = Map(); + cudaMemcpy(dst, src, _numElements * numVertices * sizeof(float), cudaMemcpyHostToDevice); + Unmap(); +} + +void * +OsdCudaVertexBuffer::Map() { + + size_t num_bytes; + void *ptr; + + cudaGraphicsMapResources(1, &_cudaResource, 0); + cudaGraphicsResourceGetMappedPointer(&ptr, &num_bytes, _cudaResource); + return ptr; +} + +void +OsdCudaVertexBuffer::Unmap() { + cudaGraphicsUnmapResources(1, &_cudaResource, 0); +} + +OsdCudaVertexBuffer::~OsdCudaVertexBuffer() { + cudaGraphicsUnregisterResource(_cudaResource); +} + +// ------------------------------------------------------------------------------- OsdCudaKernelDispatcher::DeviceTable::~DeviceTable() { if (devicePtr) cudaFree(devicePtr); @@ -95,18 +130,16 @@ OsdCudaKernelDispatcher::DeviceTable::Copy(int size, const void *ptr) { cudaMalloc(&devicePtr, size); cudaMemcpy(devicePtr, ptr, size, cudaMemcpyHostToDevice); } +// ------------------------------------------------------------------------------- OsdCudaKernelDispatcher::OsdCudaKernelDispatcher(int levels) - : OsdKernelDispatcher(levels), - _cudaVertexResource(NULL), - _cudaVaryingResource(NULL) + : OsdKernelDispatcher(levels) { _tables.resize(TABLE_MAX); } OsdCudaKernelDispatcher::~OsdCudaKernelDispatcher() { - cudaDeviceReset(); // XXX: necessary? } void @@ -115,48 +148,52 @@ OsdCudaKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) _tables[tableIndex].Copy(size, ptr); } -void -OsdCudaKernelDispatcher::BeginLaunchKernel() { } - -void -OsdCudaKernelDispatcher::EndLaunchKernel() { } - OsdVertexBuffer * -OsdCudaKernelDispatcher::InitializeVertexBuffer(int numElements, int count) +OsdCudaKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices) { - return new OsdGpuVertexBuffer(numElements, count); + return new OsdCudaVertexBuffer(numElements, numVertices); } void OsdCudaKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) { - OsdGpuVertexBuffer *bVertex = dynamic_cast(vertex); - OsdGpuVertexBuffer *bVarying = dynamic_cast(varying); - size_t num_bytes; + if (vertex) + _currentVertexBuffer = dynamic_cast(vertex); + else + _currentVertexBuffer = NULL; - if (bVertex) { - cudaGraphicsGLRegisterBuffer(&_cudaVertexResource, bVertex->GetGpuBuffer(), cudaGraphicsMapFlagsWriteDiscard); - cudaGraphicsMapResources(1, &_cudaVertexResource, 0); - cudaGraphicsResourceGetMappedPointer((void **)&_deviceVertices, &num_bytes, _cudaVertexResource); + if (varying) + _currentVaryingBuffer = dynamic_cast(varying); + else + _currentVaryingBuffer = NULL; + + if (_currentVertexBuffer) { + _deviceVertices = (float*)_currentVertexBuffer->Map(); + // XXX todo remove _numVertexElements + _numVertexElements = _currentVertexBuffer->GetNumElements(); + } else { + _numVertexElements = 0; } - if (bVarying) { - cudaGraphicsGLRegisterBuffer(&_cudaVaryingResource, bVarying->GetGpuBuffer(), cudaGraphicsMapFlagsWriteDiscard); - cudaGraphicsMapResources(1, &_cudaVaryingResource, 0); - cudaGraphicsResourceGetMappedPointer((void **)&_deviceVaryings, &num_bytes, _cudaVaryingResource); + if (_currentVaryingBuffer) { + _deviceVaryings = (float*)_currentVaryingBuffer->Map(); + _numVaryingElements = _currentVaryingBuffer->GetNumElements(); + } else { + _numVaryingElements = 0; } } void OsdCudaKernelDispatcher::UnbindVertexBuffer() { - if (_cudaVertexResource) - cudaGraphicsUnmapResources(1, &_cudaVertexResource, 0); - if (_cudaVaryingResource) - cudaGraphicsUnmapResources(1, &_cudaVertexResource, 0); + if (_currentVertexBuffer){ + _currentVertexBuffer->Unmap(); + } + if (_currentVaryingBuffer) + _currentVaryingBuffer->Unmap(); - _cudaVertexResource = NULL; - _cudaVaryingResource = NULL; + _currentVertexBuffer = NULL; + _currentVaryingBuffer = NULL; } void diff --git a/opensubdiv/osd/cudaDispatcher.h b/opensubdiv/osd/cudaDispatcher.h index e6673fb0..695d848c 100644 --- a/opensubdiv/osd/cudaDispatcher.h +++ b/opensubdiv/osd/cudaDispatcher.h @@ -59,6 +59,7 @@ #include "../version.h" #include "../osd/kernelDispatcher.h" +#include "../osd/vertexBuffer.h" #include @@ -72,6 +73,19 @@ namespace OPENSUBDIV_VERSION { struct DeviceVertex; +class OsdCudaVertexBuffer : public OsdGpuVertexBuffer { +public: + OsdCudaVertexBuffer(int numElements, int numVertices); + virtual ~OsdCudaVertexBuffer(); + + virtual void UpdateData(const float *src, int numVertices); + void * Map(); + void Unmap(); + +protected: + cudaGraphicsResource *_cudaResource; +}; + class OsdCudaKernelDispatcher : public OsdKernelDispatcher { public: @@ -106,11 +120,11 @@ public: virtual void CopyTable(int tableIndex, size_t size, const void *ptr); - virtual void BeginLaunchKernel(); - - virtual void EndLaunchKernel(); + virtual void OnKernelLaunch() {} - virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count); + virtual void OnKernelFinish() {} + + virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices); virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying); @@ -138,8 +152,8 @@ protected: std::vector _tables; - cudaGraphicsResource *_cudaVertexResource, - *_cudaVaryingResource; + OsdCudaVertexBuffer *_currentVertexBuffer, + *_currentVaryingBuffer; float *_deviceVertices, *_deviceVaryings; diff --git a/opensubdiv/osd/glslDispatcher.cpp b/opensubdiv/osd/glslDispatcher.cpp index 312f8627..4cabceed 100644 --- a/opensubdiv/osd/glslDispatcher.cpp +++ b/opensubdiv/osd/glslDispatcher.cpp @@ -61,6 +61,8 @@ #include #include #include +#include +#include #define OPT_E0_IT_VEC4 #define OPT_E0_S_VEC2 @@ -88,65 +90,28 @@ static const char *shaderDefines = "" #endif ; +std::vector OsdGlslKernelDispatcher::shaderRegistry; + OsdGlslKernelDispatcher::OsdGlslKernelDispatcher(int levels) : OsdKernelDispatcher(levels) { - _vertexBuffer = 0; - _varyingBuffer = 0; - _prgKernel = 0; + _currentVertexBuffer = 0; + _currentVaryingBuffer = 0; + _shader = 0; glGenTextures(1, &_vertexTexture); glGenTextures(1, &_varyingTexture); _tableBuffers.resize(TABLE_MAX); _tableTextures.resize(TABLE_MAX); - _tableUniforms.resize(TABLE_MAX); - _tableOffsetUniforms.resize(TABLE_MAX); glGenBuffers(TABLE_MAX, &_tableBuffers[0]); glGenTextures(TABLE_MAX, &_tableTextures[0]); - subComputeFace = 0; - subComputeEdge = 0; - subComputeVertexA = 0; - subComputeVertexB = 0; - uniformVertexPass = 0; - uniformIndexStart = 0; - uniformIndexOffset = 0; - - compile(shaderSource, shaderDefines); - - subComputeFace = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeFace"); - subComputeEdge = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeEdge"); - subComputeVertexA = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeVertexA"); - subComputeVertexB = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeVertexB"); - subComputeLoopVertexB = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "loopComputeVertexB"); - - uniformVertexPass = glGetUniformLocation(_prgKernel, "vertexPass"); - uniformIndexStart = glGetUniformLocation(_prgKernel, "indexStart"); - uniformIndexOffset = glGetUniformLocation(_prgKernel, "indexOffset"); - - _tableUniforms[F_IT] = glGetUniformLocation(_prgKernel, "_F0_IT"); - _tableUniforms[F_ITa] = glGetUniformLocation(_prgKernel, "_F0_ITa"); - _tableUniforms[E_IT] = glGetUniformLocation(_prgKernel, "_E0_IT"); - _tableUniforms[V_IT] = glGetUniformLocation(_prgKernel, "_V0_IT"); - _tableUniforms[V_ITa] = glGetUniformLocation(_prgKernel, "_V0_ITa"); - _tableUniforms[E_W] = glGetUniformLocation(_prgKernel, "_E0_S"); - _tableUniforms[V_W] = glGetUniformLocation(_prgKernel, "_V0_S"); - _tableOffsetUniforms[F_IT] = glGetUniformLocation(_prgKernel, "F_IT_ofs"); - _tableOffsetUniforms[F_ITa] = glGetUniformLocation(_prgKernel, "F_ITa_ofs"); - _tableOffsetUniforms[E_IT] = glGetUniformLocation(_prgKernel, "E_IT_ofs"); - _tableOffsetUniforms[V_IT] = glGetUniformLocation(_prgKernel, "V_IT_ofs"); - _tableOffsetUniforms[V_ITa] = glGetUniformLocation(_prgKernel, "V_ITa_ofs"); - _tableOffsetUniforms[E_W] = glGetUniformLocation(_prgKernel, "E_W_ofs"); - _tableOffsetUniforms[V_W] = glGetUniformLocation(_prgKernel, "V_W_ofs"); } OsdGlslKernelDispatcher::~OsdGlslKernelDispatcher() { - if (_prgKernel) - glDeleteProgram(_prgKernel); - glDeleteTextures(1, &_vertexTexture); glDeleteTextures(1, &_varyingTexture); @@ -163,98 +128,134 @@ OsdGlslKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) } void -OsdGlslKernelDispatcher::BeginLaunchKernel() { +OsdGlslKernelDispatcher::OnKernelLaunch() { - glUseProgram(_prgKernel); glEnable(GL_RASTERIZER_DISCARD); + _shader->UseProgram(); //XXX what if loop.. - bindTextureBuffer(_tableUniforms[F_IT], _tableBuffers[F_IT], + bindTextureBuffer(_shader->GetTableUniform(F_IT), _tableBuffers[F_IT], _tableTextures[F_IT], GL_R32UI, 2); - bindTextureBuffer(_tableUniforms[F_ITa], _tableBuffers[F_ITa], + bindTextureBuffer(_shader->GetTableUniform(F_ITa), _tableBuffers[F_ITa], _tableTextures[F_ITa], GL_R32I, 3); #ifdef OPT_E0_IT_VEC4 - bindTextureBuffer(_tableUniforms[E_IT], _tableBuffers[E_IT], + bindTextureBuffer(_shader->GetTableUniform(E_IT), _tableBuffers[E_IT], _tableTextures[E_IT], GL_RGBA32UI, 4); #else - bindTextureBuffer(_tableUniforms[E_IT], _tableBuffers[E_IT], + bindTextureBuffer(_shader->GetTableUniform(E_IT), _tableBuffers[E_IT], _tableTextures[E_IT], GL_R32UI, 4); #endif #ifdef OPT_CATMARK_V_IT_VEC2 - bindTextureBuffer(_tableUniforms[V_IT], _tableBuffers[V_IT], + bindTextureBuffer(_shader->GetTableUniform(V_IT), _tableBuffers[V_IT], _tableTextures[V_IT], GL_RG32UI, 5); #else - bindTextureBuffer(_tableUniforms[V_IT], _tableBuffers[V_IT], + bindTextureBuffer(_shader->GetTableUniform(V_IT), _tableBuffers[V_IT], _tableTextures[V_IT], GL_R32UI, 5); #endif - bindTextureBuffer(_tableUniforms[V_ITa], _tableBuffers[V_ITa], + bindTextureBuffer(_shader->GetTableUniform(V_ITa), _tableBuffers[V_ITa], _tableTextures[V_ITa], GL_R32I, 6); #ifdef OPT_E0_S_VEC2 - bindTextureBuffer(_tableUniforms[E_W], _tableBuffers[E_W], + bindTextureBuffer(_shader->GetTableUniform(E_W), _tableBuffers[E_W], _tableTextures[E_W], GL_RG32F, 7); #else - bindTextureBuffer(_tableUniforms[E_W], _tableBuffers[E_W], + bindTextureBuffer(_shader->GetTableUniform(E_W), _tableBuffers[E_W], _tableTextures[E_W], GL_R32F, 7); #endif - bindTextureBuffer(_tableUniforms[V_W], _tableBuffers[V_W], + bindTextureBuffer(_shader->GetTableUniform(V_W), _tableBuffers[V_W], _tableTextures[V_W], GL_R32F, 8); } void -OsdGlslKernelDispatcher::EndLaunchKernel() { +OsdGlslKernelDispatcher::OnKernelFinish() { + + unbindTextureBuffer(2); + unbindTextureBuffer(3); + unbindTextureBuffer(4); + unbindTextureBuffer(5); + unbindTextureBuffer(6); + unbindTextureBuffer(7); + unbindTextureBuffer(8); glDisable(GL_RASTERIZER_DISCARD); glUseProgram(0); - - // XXX Unbind table buffer } OsdVertexBuffer * -OsdGlslKernelDispatcher::InitializeVertexBuffer(int numElements, int count) +OsdGlslKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices) { - return new OsdGpuVertexBuffer(numElements, count); + return new OsdGpuVertexBuffer(numElements, numVertices); } void OsdGlslKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) { - OsdGpuVertexBuffer *bVertex = dynamic_cast(vertex); - OsdGpuVertexBuffer *bVarying = dynamic_cast(varying); + if (vertex) + _currentVertexBuffer = dynamic_cast(vertex); + else + _currentVertexBuffer = NULL; - if (bVertex) { - _vertexBuffer = bVertex->GetGpuBuffer(); - bindTextureBuffer(_vertexUniform, _vertexBuffer, _vertexTexture, GL_RGB32F, 0); + if (varying) + _currentVaryingBuffer = dynamic_cast(varying); + else + _currentVaryingBuffer = NULL; + + int numVertexElements = vertex ? vertex->GetNumElements() : 0; + int numVaryingElements = varying ? varying->GetNumElements() : 0; + + // find appropriate shader program from registry (compile it if needed) + std::vector::iterator it = + std::find_if(shaderRegistry.begin(), shaderRegistry.end(), + ComputeShader::Match(numVertexElements, numVaryingElements)); + + _shader = NULL; + if (it != shaderRegistry.end()) { + _shader = &(*it); + } else { + shaderRegistry.push_back(ComputeShader()); + _shader = &shaderRegistry.back(); + _shader->Compile(numVertexElements, numVaryingElements); } - if (bVarying) { - _varyingBuffer = bVarying->GetGpuBuffer(); - bindTextureBuffer(_varyingUniform, _varyingBuffer, _varyingTexture, GL_R32F, 0); + _shader->UseProgram(); // need to bind textures + + // bind vertex texture + if (_currentVertexBuffer) { + bindTextureBuffer(_shader->GetVertexUniform(), _currentVertexBuffer->GetGpuBuffer(), _vertexTexture, GL_RGB32F, 0); } - glUseProgram(_prgKernel); - glUniform1i(_vertexUniform, 0); + if (_currentVaryingBuffer) { + bindTextureBuffer(_shader->GetVaryingUniform(), _currentVaryingBuffer->GetGpuBuffer(), _varyingTexture, GL_R32F, 1); + } #if 0 // experiment to use image store function glActiveTexture(GL_TEXTURE0 + 0); glBindImageTextureEXT(0, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F); if (_numVarying > 0) { - glUniform1i(_varyingUniform, 1); glBindImageTextureEXT(1, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F); } #endif + CHECK_GL_ERROR("BindVertexBuffer \n"); + } void OsdGlslKernelDispatcher::UnbindVertexBuffer() { + if (_currentVertexBuffer) { + unbindTextureBuffer(0); + } + if (_currentVaryingBuffer) { + unbindTextureBuffer(1); + } + _currentVertexBuffer = NULL; + _currentVaryingBuffer = NULL; } - void OsdGlslKernelDispatcher::Synchronize() { glFinish(); @@ -263,14 +264,15 @@ OsdGlslKernelDispatcher::Synchronize() { void OsdGlslKernelDispatcher::bindTextureBuffer( GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const { - + if (sampler == -1) { OSD_ERROR("BindTextureError:: sampler = %d\n", sampler); return; } - OSD_DEBUG("BindTextureBuffer sampler=%d, buffer=%d, texture = %d, E%x\n", sampler, buffer, texture, glGetError()); + OSD_DEBUG("BindTextureBuffer unit=%d, sampler=%d, buffer=%d, texture = %d, E%x\n", unit, sampler, buffer, texture, glGetError()); glUniform1i(sampler, unit); + CHECK_GL_ERROR("BindTextureBuffer glUniform %d\n", unit); glActiveTexture(GL_TEXTURE0 + unit); CHECK_GL_ERROR("BindTextureBuffer glActiveTexture %d\n", unit); glBindTexture(GL_TEXTURE_BUFFER, texture); @@ -280,38 +282,137 @@ OsdGlslKernelDispatcher::bindTextureBuffer( glActiveTexture(GL_TEXTURE0); } -bool -OsdGlslKernelDispatcher::compile(const char *shaderSource, const char *shaderDefine) { - _prgKernel = glCreateProgram(); +void +OsdGlslKernelDispatcher::unbindTextureBuffer(int unit) const { + + glActiveTexture(GL_TEXTURE0 + unit); + glBindTexture(GL_TEXTURE_BUFFER, 0); +} + +void +OsdGlslKernelDispatcher::ApplyCatmarkFaceVerticesKernel( + FarMesh * mesh, int offset, int level, int start, int end, void * data) const { + + _shader->ApplyCatmarkFaceVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[F_IT][level-1], + _tableOffsets[F_ITa][level-1], + offset, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( + FarMesh * mesh, int offset, int level, int start, int end, void * data) const { + + _shader->ApplyCatmarkEdgeVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[E_IT][level-1], + _tableOffsets[E_W][level-1], + offset, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelB( + FarMesh * mesh, int offset, int level, int start, int end, void * data) const { + + _shader->ApplyCatmarkVertexVerticesKernelB(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[V_IT][level-1], + _tableOffsets[V_ITa][level-1], + _tableOffsets[V_W][level-1], + offset, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelA( + FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { + + _shader->ApplyCatmarkVertexVerticesKernelA(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[V_ITa][level-1], + _tableOffsets[V_W][level-1], + offset, pass, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyLoopEdgeVerticesKernel( + FarMesh * mesh, int offset, int level, int start, int end, void * data) const { + + _shader->ApplyLoopEdgeVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[E_IT][level-1], + _tableOffsets[E_W][level-1], + offset, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelB( + FarMesh * mesh, int offset, int level, int start, int end, void * data) const { + + _shader->ApplyLoopVertexVerticesKernelB(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[V_IT][level-1], + _tableOffsets[V_ITa][level-1], + _tableOffsets[V_W][level-1], + offset, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelA( + FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { + + _shader->ApplyLoopVertexVerticesKernelA(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[V_ITa][level-1], + _tableOffsets[V_W][level-1], + offset, pass, start, end); +} + +// ------------------------------------------------------------------------------- + +OsdGlslKernelDispatcher::ComputeShader::ComputeShader() : + _program(0) +{ +} + +OsdGlslKernelDispatcher::ComputeShader::~ComputeShader() +{ + if (_program) + glDeleteProgram(_program); +} + +bool +OsdGlslKernelDispatcher::ComputeShader::Compile(int numVertexElements, int numVaryingElements) { + + // XXX: NOTE: current GLSL only supports numVertexElements = 6!!! + assert(numVertexElements == 6); + + _numVertexElements = numVertexElements; + _numVaryingElements = numVaryingElements; + _program = glCreateProgram(); GLuint shader = glCreateShader(GL_VERTEX_SHADER); char constantDefine[256]; snprintf(constantDefine, 256, - "#define NUM_VARYING_ELEMENTS %d\n", _numVarying); + "#define NUM_VARYING_ELEMENTS %d\n", numVaryingElements); const char *shaderSources[3]; shaderSources[0] = constantDefine; - shaderSources[1] = shaderDefine; + shaderSources[1] = shaderDefines; shaderSources[2] = shaderSource; glShaderSource(shader, 3, shaderSources, NULL); glCompileShader(shader); - glAttachShader(_prgKernel, shader); + glAttachShader(_program, shader); const char *outputs[] = { "outPosition", "outNormal", "gl_NextBuffer", "outVaryingData" }; - int nOutputs = _numVarying > 0 ? 4 : 2; - glTransformFeedbackVaryings(_prgKernel, nOutputs, outputs, GL_INTERLEAVED_ATTRIBS); + int nOutputs = numVaryingElements > 0 ? 4 : 2; + + glTransformFeedbackVaryings(_program, nOutputs, outputs, GL_INTERLEAVED_ATTRIBS); CHECK_GL_ERROR("Transform feedback initialize \n"); GLint linked = 0; - glLinkProgram(_prgKernel); - glGetProgramiv(_prgKernel, GL_LINK_STATUS, &linked); + glLinkProgram(_program); + glGetProgramiv(_program, GL_LINK_STATUS, &linked); if (linked == GL_FALSE) { OSD_ERROR("Fail to link shader\n"); @@ -320,56 +421,79 @@ OsdGlslKernelDispatcher::compile(const char *shaderSource, const char *shaderDef glGetShaderInfoLog(shader, 1024, NULL, buffer); OSD_ERROR(buffer); - glGetProgramInfoLog(_prgKernel, 1024, NULL, buffer); + glGetProgramInfoLog(_program, 1024, NULL, buffer); OSD_ERROR(buffer); - glDeleteProgram(_prgKernel); - _prgKernel = 0; + glDeleteProgram(_program); + _program = 0; // XXX ERROR HANDLE return false; } glDeleteShader(shader); - _vertexUniform = glGetUniformLocation(_prgKernel, "vertex"); - _varyingUniform = glGetUniformLocation(_prgKernel, "varyingData"); + _vertexUniform = glGetUniformLocation(_program, "vertex"); + _varyingUniform = glGetUniformLocation(_program, "varyingData"); + + _subComputeFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeFace"); + _subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge"); + _subComputeVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexA"); + _subComputeVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexB"); + _subComputeLoopVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "loopComputeVertexB"); + + _uniformVertexPass = glGetUniformLocation(_program, "vertexPass"); + _uniformIndexStart = glGetUniformLocation(_program, "indexStart"); + _uniformIndexOffset = glGetUniformLocation(_program, "indexOffset"); + + _tableUniforms.resize(TABLE_MAX); + _tableOffsetUniforms.resize(TABLE_MAX); + + _tableUniforms[F_IT] = glGetUniformLocation(_program, "_F0_IT"); + _tableUniforms[F_ITa] = glGetUniformLocation(_program, "_F0_ITa"); + _tableUniforms[E_IT] = glGetUniformLocation(_program, "_E0_IT"); + _tableUniforms[V_IT] = glGetUniformLocation(_program, "_V0_IT"); + _tableUniforms[V_ITa] = glGetUniformLocation(_program, "_V0_ITa"); + _tableUniforms[E_W] = glGetUniformLocation(_program, "_E0_S"); + _tableUniforms[V_W] = glGetUniformLocation(_program, "_V0_S"); + _tableOffsetUniforms[F_IT] = glGetUniformLocation(_program, "F_IT_ofs"); + _tableOffsetUniforms[F_ITa] = glGetUniformLocation(_program, "F_ITa_ofs"); + _tableOffsetUniforms[E_IT] = glGetUniformLocation(_program, "E_IT_ofs"); + _tableOffsetUniforms[V_IT] = glGetUniformLocation(_program, "V_IT_ofs"); + _tableOffsetUniforms[V_ITa] = glGetUniformLocation(_program, "V_ITa_ofs"); + _tableOffsetUniforms[E_W] = glGetUniformLocation(_program, "E_W_ofs"); + _tableOffsetUniforms[V_W] = glGetUniformLocation(_program, "V_W_ofs"); return true; } void -OsdGlslKernelDispatcher::unbindTextureBuffer(int unit) const { - glActiveTexture(GL_TEXTURE0 + unit); - glBindTexture(GL_TEXTURE_BUFFER, 0); -} - -void -OsdGlslKernelDispatcher::transformGpuBufferData(GLuint kernel, GLint offset, int start, int end, bool vertexPass) const { +OsdGlslKernelDispatcher::ComputeShader::transformGpuBufferData(OsdGpuVertexBuffer *vertexBuffer, OsdGpuVertexBuffer *varyingBuffer, + GLint offset, int start, int end) const { int count = end - start; if (count <= 0) return; - OSD_DEBUG("_transformGpuBufferData kernel=%d E%x, offset=%d, count=%d\n", kernel, glGetError(), offset, count); - - glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &kernel); - glUniform1i(uniformVertexPass, vertexPass); // XXX + OSD_DEBUG("_transformGpuBufferData offset=%d, count=%d\n", glGetError(), offset, count); // set batch range - glUniform1i(uniformIndexStart, start); - glUniform1i(uniformIndexOffset, offset); + glUniform1i(_uniformIndexStart, start); + glUniform1i(_uniformIndexOffset, offset); // XXX: end is not used here now CHECK_GL_ERROR("Uniform index set at offset=%d. start=%d\n", offset, start); // set transform feedback buffer - int vertexStride = _numVertexElements*sizeof(float); - int varyingStride = _numVarying*sizeof(float); - glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, _vertexBuffer, - (start+offset)*vertexStride, count*vertexStride); - CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n"); + if (vertexBuffer) { + int vertexStride = vertexBuffer->GetNumElements()*sizeof(float); + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vertexBuffer->GetGpuBuffer(), + (start+offset)*vertexStride, count*vertexStride); + } - if (_numVarying > 0){ - glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, _varyingBuffer, + if (varyingBuffer){ + int varyingStride = varyingBuffer->GetNumElements()*sizeof(float); + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, varyingBuffer->GetGpuBuffer(), (start+offset)*varyingStride, count*varyingStride); } + CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n"); + glBeginTransformFeedback(GL_POINTS); CHECK_GL_ERROR("transformGpuBufferData glBeginTransformFeedback\n"); @@ -387,68 +511,84 @@ OsdGlslKernelDispatcher::transformGpuBufferData(GLuint kernel, GLint offset, int } void -OsdGlslKernelDispatcher::ApplyCatmarkFaceVerticesKernel( - FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - - glUniform1i(_tableOffsetUniforms[F_IT], _tableOffsets[F_IT][level-1]); - glUniform1i(_tableOffsetUniforms[F_ITa], _tableOffsets[F_ITa][level-1]); - transformGpuBufferData(subComputeFace, offset, start, end); +OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkFaceVerticesKernel( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int F_IT_ofs, int F_ITa_ofs, int offset, int start, int end) { + + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeFace); + glUniform1i(_tableOffsetUniforms[F_IT], F_IT_ofs); + glUniform1i(_tableOffsetUniforms[F_ITa], F_ITa_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( - FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - - glUniform1i(_tableOffsetUniforms[E_IT], _tableOffsets[E_IT][level-1]); - glUniform1i(_tableOffsetUniforms[E_W], _tableOffsets[E_W][level-1]); - transformGpuBufferData(subComputeEdge, offset, start, end); +OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkEdgeVerticesKernel( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int E_IT_ofs, int E_W_ofs, int offset, int start, int end) { + + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge); + glUniform1i(_tableOffsetUniforms[E_IT], E_IT_ofs); + glUniform1i(_tableOffsetUniforms[E_W], E_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelB( - FarMesh * mesh, int offset, int level, int start, int end, void * data) const { +OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkVertexVerticesKernelB( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end) { - glUniform1i(_tableOffsetUniforms[V_IT], _tableOffsets[V_IT][level-1]); - glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]); - glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]); - transformGpuBufferData(subComputeVertexB, offset, start, end); + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexB); + glUniform1i(_tableOffsetUniforms[V_IT], V_IT_ofs); + glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs); + glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelA( - FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { +OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkVertexVerticesKernelA( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end) { - glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]); - glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]); - transformGpuBufferData(subComputeVertexA, offset, start, end, pass); + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA); + glUniform1i(_uniformVertexPass, pass ? 1 : 0); + glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs); + glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyLoopEdgeVerticesKernel( - FarMesh * mesh, int offset, int level, int start, int end, void * data) const { +OsdGlslKernelDispatcher::ComputeShader::ApplyLoopEdgeVerticesKernel( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int E_IT_ofs, int E_W_ofs, int offset, int start, int end) { - glUniform1i(_tableOffsetUniforms[E_IT], _tableOffsets[E_IT][level-1]); - glUniform1i(_tableOffsetUniforms[E_W], _tableOffsets[E_W][level-1]); - transformGpuBufferData(subComputeEdge, offset, start, end); + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge); + glUniform1i(_tableOffsetUniforms[E_IT], E_IT_ofs); + glUniform1i(_tableOffsetUniforms[E_W], E_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelB( - FarMesh * mesh, int offset, int level, int start, int end, void * data) const { +OsdGlslKernelDispatcher::ComputeShader::ApplyLoopVertexVerticesKernelB( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end) { - glUniform1i(_tableOffsetUniforms[V_IT], _tableOffsets[V_IT][level-1]); - glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]); - glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]); - transformGpuBufferData(subComputeLoopVertexB, offset, start, end); + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeLoopVertexB); + glUniform1i(_tableOffsetUniforms[V_IT], V_IT_ofs); + glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs); + glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelA( - FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { +OsdGlslKernelDispatcher::ComputeShader::ApplyLoopVertexVerticesKernelA( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end) { - glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]); - glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]); - transformGpuBufferData(subComputeVertexA, offset, start, end, pass); + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA); + glUniform1i(_uniformVertexPass, pass ? 1 : 0); + glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs); + glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/glslDispatcher.h b/opensubdiv/osd/glslDispatcher.h index 7e2c007f..eefef98b 100644 --- a/opensubdiv/osd/glslDispatcher.h +++ b/opensubdiv/osd/glslDispatcher.h @@ -89,11 +89,11 @@ public: virtual void CopyTable(int tableIndex, size_t size, const void *ptr); - virtual void BeginLaunchKernel(); - - virtual void EndLaunchKernel(); + virtual void OnKernelLaunch(); - virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count); + virtual void OnKernelFinish(); + + virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices); virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying); @@ -110,42 +110,100 @@ public: protected: + class ComputeShader { + public: + ComputeShader(); + ~ComputeShader(); + + bool Compile(int numVertexElements, int numVaryingElements); + + GLuint GetTableUniform(int table) const { + return _tableUniforms[table]; + } + GLuint GetVertexUniform() const { return _vertexUniform; } + GLuint GetVaryingUniform() const { return _varyingUniform; } + + void ApplyCatmarkFaceVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int F_IT_ofs, int F_ITa_ofs, int offset, int start, int end); + + void ApplyCatmarkEdgeVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int E_IT_ofs, int E_W_ofs, int offset, int start, int end); + + void ApplyCatmarkVertexVerticesKernelB(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end); + + void ApplyCatmarkVertexVerticesKernelA(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end); + + void ApplyLoopEdgeVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int E_IT_ofs, int E_W_ofs, int offset, int start, int end); + + void ApplyLoopVertexVerticesKernelB(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end); + + void ApplyLoopVertexVerticesKernelA(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end); + + void UseProgram () const { + glUseProgram(_program); + } + + struct Match { + Match(int numVertexElements, int numVaryingElements) : + _numVertexElements(numVertexElements), _numVaryingElements(numVaryingElements) {} + bool operator() (const ComputeShader &shader) { + return (shader._numVertexElements == _numVertexElements + && shader._numVaryingElements == _numVaryingElements); + } + int _numVertexElements, _numVaryingElements; + }; + + friend class Match; + + private: + void transformGpuBufferData(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + GLint offset, int start, int end) const; + + int _numVertexElements; + int _numVaryingElements; + + GLuint _program; + + GLuint _uniformVertexPass; + GLuint _uniformIndexStart; + GLuint _uniformIndexOffset; + + GLuint _vertexUniform, + _varyingUniform; + + // shader locations + GLuint _subComputeFace, _subComputeEdge, _subComputeVertexA, _subComputeVertexB; + GLuint _subComputeLoopVertexB; + + std::vector _tableUniforms; + std::vector _tableOffsetUniforms; + + }; + void bindTextureBuffer(GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const; void unbindTextureBuffer(int unit) const; - void transformGpuBufferData(GLuint kernel, GLint offset, int start, int end, bool vertexPass=false) const; - - bool compile(const char *shaderSource, const char *shaderDefine); - - GLuint _prgKernel; - - int _numVertexElements, - _numVarying; - - GLuint _vertexBuffer, - _varyingBuffer; + ComputeShader * _shader; // texture for vertex GLuint _vertexTexture, _varyingTexture; - - GLuint _vertexUniform, - _varyingUniform; + + OsdGpuVertexBuffer *_currentVertexBuffer, + *_currentVaryingBuffer; // table buffers std::vector _tableBuffers; std::vector _tableTextures; - std::vector _tableUniforms; - std::vector _tableOffsetUniforms; - GLuint uniformVertexPass; - GLuint uniformIndexStart; - GLuint uniformIndexOffset; - - // shader locations - GLuint subComputeFace, subComputeEdge, subComputeVertexA, subComputeVertexB; - GLuint subComputeLoopVertexB; + // static shader registry (XXX tentative..) + static std::vector shaderRegistry; }; } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/kernelDispatcher.h b/opensubdiv/osd/kernelDispatcher.h index a6b815ff..5c34cd2c 100644 --- a/opensubdiv/osd/kernelDispatcher.h +++ b/opensubdiv/osd/kernelDispatcher.h @@ -82,9 +82,9 @@ public: virtual void CopyTable(int tableIndex, size_t size, const void *ptr) = 0; - virtual void BeginLaunchKernel() = 0; + virtual void OnKernelLaunch() = 0; - virtual void EndLaunchKernel() = 0; + virtual void OnKernelFinish() = 0; virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count) = 0; diff --git a/opensubdiv/osd/mesh.cpp b/opensubdiv/osd/mesh.cpp index 74417de8..757b4649 100644 --- a/opensubdiv/osd/mesh.cpp +++ b/opensubdiv/osd/mesh.cpp @@ -102,7 +102,6 @@ OsdMesh::Create(OsdHbrMesh *hbrMesh, int level, const std::string &kernel) { OSD_DEBUG("PREP: NumCoarseVertex = %d\n", _mMesh->GetNumCoarseVertices()); OSD_DEBUG("PREP: NumVertex = %d\n", _mMesh->GetNumVertices()); - OSD_DEBUG("PREP: NumTables = %d\n", _mMesh->GetNumSubdivisionTables()); const FarSubdivisionTables* table = _mMesh->GetSubdivision(); @@ -145,11 +144,11 @@ OsdMesh::Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) { _dispatcher->BindVertexBuffer(vertex, varying); - _dispatcher->BeginLaunchKernel(); + _dispatcher->OnKernelLaunch(); _mMesh->Subdivide(_level+1); - _dispatcher->EndLaunchKernel(); + _dispatcher->OnKernelFinish(); _dispatcher->UnbindVertexBuffer(); } diff --git a/opensubdiv/osd/mesh.h b/opensubdiv/osd/mesh.h index 6558f2f9..b2790231 100644 --- a/opensubdiv/osd/mesh.h +++ b/opensubdiv/osd/mesh.h @@ -95,7 +95,13 @@ public: OsdVertexBuffer *InitializeVertexBuffer(int numElements); - void Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying); + // for non-interleaved vertex data + void Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying = NULL); + +/* + // for interleaved vertex data ? + template void Subdivide(T *vertex) { } +*/ void Synchronize(); diff --git a/opensubdiv/osd/vertexBuffer.h b/opensubdiv/osd/vertexBuffer.h index 9111dc16..dce37f68 100644 --- a/opensubdiv/osd/vertexBuffer.h +++ b/opensubdiv/osd/vertexBuffer.h @@ -9,69 +9,83 @@ namespace OPENSUBDIV_VERSION { class OsdVertexBuffer { public: + OsdVertexBuffer(int numElements) : _numElements(numElements) {} virtual ~OsdVertexBuffer() {} - virtual void UpdateData(const float *src, int count) = 0; + + virtual void UpdateData(const float *src, int numVertices) = 0; + virtual GLuint GetGpuBuffer() = 0; + + int GetNumElements() const { + return _numElements; + } + +protected: + int _numElements; }; class OsdGpuVertexBuffer : public OsdVertexBuffer { public: - OsdGpuVertexBuffer(int numElements, int count) : _vbo(0), _numElements(numElements) { - int stride = numElements * count * sizeof(float); + OsdGpuVertexBuffer(int numElements, int numVertices) : OsdVertexBuffer(numElements), _vbo(0) { + int size = numElements * numVertices * sizeof(float); glGenBuffers(1, &_vbo); glBindBuffer(GL_ARRAY_BUFFER, _vbo); - glBufferData(GL_ARRAY_BUFFER, stride, 0, GL_STREAM_DRAW); + glBufferData(GL_ARRAY_BUFFER, size, 0, GL_STREAM_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); } virtual ~OsdGpuVertexBuffer() { glDeleteBuffers(1, &_vbo); } - virtual void UpdateData(const float *src, int count) { + + virtual void UpdateData(const float *src, int numVertices) { glBindBuffer(GL_ARRAY_BUFFER, _vbo); float * pointer = (float*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); - memcpy(pointer, src, _numElements * count * sizeof(float)); + memcpy(pointer, src, _numElements * numVertices * sizeof(float)); glUnmapBuffer(GL_ARRAY_BUFFER); glBindBuffer(GL_ARRAY_BUFFER, 0); } + virtual GLuint GetGpuBuffer() { return _vbo; } -private: + +protected: GLuint _vbo; - int _numElements; }; class OsdCpuVertexBuffer : public OsdVertexBuffer { public: - OsdCpuVertexBuffer(int numElements, int count) : _cpuVbo(NULL), _vboSize(0), _numElements(numElements), _vbo(0) { - _cpuVbo = new float[numElements * count]; - _vboSize = numElements * count; + OsdCpuVertexBuffer(int numElements, int numVertices) : OsdVertexBuffer(numElements), _cpuVbo(NULL), _vboSize(0), _vbo(0) { + _vboSize = numElements * numVertices; + _cpuVbo = new float[numElements * numVertices]; } virtual ~OsdCpuVertexBuffer() { - if(_cpuVbo) delete[] _cpuVbo; - if(_vbo) glDeleteBuffers(1, &_vbo); + delete [] _cpuVbo; + if (_vbo) + glDeleteBuffers(1, &_vbo); } - virtual void UpdateData(const float *src, int count) { - memcpy(_cpuVbo, src, _numElements * count * sizeof(float)); + + virtual void UpdateData(const float *src, int numVertices) { + memcpy(_cpuVbo, src, _numElements * numVertices * sizeof(float)); } + float *GetCpuBuffer() { return _cpuVbo; } + + // XXX: this method name is missleading virtual GLuint GetGpuBuffer() { - if(!_vbo) glGenBuffers(1, &_vbo); + if (!_vbo) + glGenBuffers(1, &_vbo); glBindBuffer(GL_ARRAY_BUFFER, _vbo); glBufferData(GL_ARRAY_BUFFER, _vboSize * sizeof(float), _cpuVbo, GL_STREAM_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); return _vbo; } - int GetNumElements() const { - return _numElements; - } -private: +protected: float *_cpuVbo; int _vboSize; - int _numElements; GLuint _vbo; };