From 851c00d04cba3ad8250f437d3cff55124f915d64 Mon Sep 17 00:00:00 2001 From: Takahito Tejima Date: Tue, 12 Jun 2012 16:28:17 -0700 Subject: [PATCH] Added OsdVertexBuffer class. OsdMesh no longer has vertex buffer and vertex specification (how many elements exists in the buffer). client will create OsdVertexBuffer and provide it as an argument of OsdMesh::Subdivide() function. It would be more flexible and hopefully matches various use cases. Since each dispatcher has to accept arbitrary vertex buffer, introduced a simple shader registry into glslDispatcher. It will configure shaders for given vertex elements on demand (for now, just works only for varying buffer). Fixed cuda kernel's GL resource leakage. Since cuda GL interop seems one-way, OsdCudaVertexBuffer manages vertex updating instead of just using OsdGpuVertexBuffer. Cleaned up some kernel codes and renamed ambiguous names. --- opensubdiv/osd/cpuDispatcher.cpp | 142 +++++----- opensubdiv/osd/cpuDispatcher.h | 28 +- opensubdiv/osd/cpuKernel.cpp | 110 +++----- opensubdiv/osd/cpuKernel.h | 26 +- opensubdiv/osd/cudaDispatcher.cpp | 95 +++++-- opensubdiv/osd/cudaDispatcher.h | 26 +- opensubdiv/osd/glslDispatcher.cpp | 434 ++++++++++++++++++++---------- opensubdiv/osd/glslDispatcher.h | 112 ++++++-- opensubdiv/osd/kernelDispatcher.h | 4 +- opensubdiv/osd/mesh.cpp | 5 +- opensubdiv/osd/mesh.h | 8 +- opensubdiv/osd/vertexBuffer.h | 56 ++-- 12 files changed, 644 insertions(+), 402 deletions(-) diff --git a/opensubdiv/osd/cpuDispatcher.cpp b/opensubdiv/osd/cpuDispatcher.cpp index 222ab39a..02c2f904 100644 --- a/opensubdiv/osd/cpuDispatcher.cpp +++ b/opensubdiv/osd/cpuDispatcher.cpp @@ -66,29 +66,33 @@ namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { -OsdCpuKernelDispatcher::DeviceTable::~DeviceTable() { +OsdCpuKernelDispatcher::SubdivisionTable::~SubdivisionTable() { - if (devicePtr) - free(devicePtr); + if (ptr) + free(ptr); } void -OsdCpuKernelDispatcher::DeviceTable::Copy( int size, const void *table ) { +OsdCpuKernelDispatcher::SubdivisionTable::Copy( int size, const void *table ) { if (size > 0) { - if (devicePtr) - free(devicePtr); - devicePtr = malloc(size); - memcpy(devicePtr, table, size); + if (ptr) + free(ptr); + ptr = malloc(size); + memcpy(ptr, table, size); } } OsdCpuKernelDispatcher::OsdCpuKernelDispatcher( int levels ) - : OsdKernelDispatcher(levels) { + : OsdKernelDispatcher(levels), _currentVertexBuffer(NULL), _currentVaryingBuffer(NULL), _vdesc(NULL) { _tables.resize(TABLE_MAX); } -OsdCpuKernelDispatcher::~OsdCpuKernelDispatcher() { } +OsdCpuKernelDispatcher::~OsdCpuKernelDispatcher() { + + if (_vdesc) + delete _vdesc; +} void OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) { @@ -96,30 +100,37 @@ OsdCpuKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) _tables[tableIndex].Copy(size, ptr); } -void -OsdCpuKernelDispatcher::BeginLaunchKernel() { } - -void -OsdCpuKernelDispatcher::EndLaunchKernel() { } - OsdVertexBuffer * -OsdCpuKernelDispatcher::InitializeVertexBuffer(int numElements, int count) +OsdCpuKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices) { - return new OsdCpuVertexBuffer(numElements, count); + return new OsdCpuVertexBuffer(numElements, numVertices); } void OsdCpuKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) { - _vertexBuffer = dynamic_cast(vertex); - _varyingBuffer = dynamic_cast(varying); + if (vertex) + _currentVertexBuffer = dynamic_cast(vertex); + else + _currentVertexBuffer = NULL; + + if (varying) + _currentVaryingBuffer = dynamic_cast(varying); + else + _currentVaryingBuffer = NULL; + + _vdesc = new VertexDescriptor(_currentVertexBuffer ? _currentVertexBuffer->GetNumElements() : 0, + _currentVaryingBuffer ? _currentVaryingBuffer->GetNumElements() : 0); } void OsdCpuKernelDispatcher::UnbindVertexBuffer() { - _vertexBuffer = NULL; - _varyingBuffer = NULL; + delete _vdesc; + _vdesc = NULL; + + _currentVertexBuffer = NULL; + _currentVaryingBuffer = NULL; } void @@ -129,21 +140,17 @@ OsdCpuKernelDispatcher::Synchronize() { } void OsdCpuKernelDispatcher::ApplyBilinearFaceVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_numVertexElements, _numVaryingElements); - - computeFace(&vd, _vbo, _varyingVbo, - (int*)_tables[F_IT].devicePtr + _tableOffsets[F_IT][level-1], - (int*)_tables[F_ITa].devicePtr + _tableOffsets[F_ITa][level-1], + computeFace(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[F_IT].ptr + _tableOffsets[F_IT][level-1], + (int*)_tables[F_ITa].ptr + _tableOffsets[F_ITa][level-1], offset, start, end); } void OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_numVertexElements, _numVaryingElements); - - computeBilinearEdge(&vd, _vbo, _varyingVbo, - (int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1], + computeBilinearEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1], offset, start, end); } @@ -151,38 +158,26 @@ OsdCpuKernelDispatcher::ApplyBilinearEdgeVerticesKernel( FarMesh * me void OsdCpuKernelDispatcher::ApplyBilinearVertexVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_numVertexElements, _numVaryingElements); - - computeBilinearVertex(&vd, _vbo, _varyingVbo, - (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1], + computeBilinearVertex(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1], offset, start, end); } void OsdCpuKernelDispatcher::ApplyCatmarkFaceVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeFace(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[F_IT].devicePtr + _tableOffsets[F_IT][level-1], - (int*)_tables[F_ITa].devicePtr + _tableOffsets[F_ITa][level-1], + computeFace(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[F_IT].ptr + _tableOffsets[F_IT][level-1], + (int*)_tables[F_ITa].ptr + _tableOffsets[F_ITa][level-1], offset, start, end); - - float *p = _vertexBuffer->GetCpuBuffer(); - for(int i = 0; i < 150; i+=3){ - printf("%f %f %f\n", p[0], p[1], p[2]); - p+=3; - } } void OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeEdge(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1], - (float*)_tables[E_W].devicePtr + _tableOffsets[E_W][level-1], + computeEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1], + (float*)_tables[E_W].ptr + _tableOffsets[E_W][level-1], offset, start, end); } @@ -190,34 +185,28 @@ OsdCpuKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( FarMesh * mes void OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelB( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeVertexB(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1], - (int*)_tables[V_IT].devicePtr + _tableOffsets[V_IT][level-1], - (float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1], + computeVertexB(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1], + (int*)_tables[V_IT].ptr + _tableOffsets[V_IT][level-1], + (float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1], offset, start, end); } void OsdCpuKernelDispatcher::ApplyCatmarkVertexVerticesKernelA( FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeVertexA(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1], - (float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1], + computeVertexA(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1], + (float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1], offset, start, end, pass); } void OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeEdge(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[E_IT].devicePtr + _tableOffsets[E_IT][level-1], - (float*)_tables[E_W].devicePtr + _tableOffsets[E_W][level-1], + computeEdge(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[E_IT].ptr + _tableOffsets[E_IT][level-1], + (float*)_tables[E_W].ptr + _tableOffsets[E_W][level-1], offset, start, end); } @@ -225,28 +214,23 @@ OsdCpuKernelDispatcher::ApplyLoopEdgeVerticesKernel( FarMesh * mesh, void OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelB( FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeLoopVertexB(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1], - (int*)_tables[V_IT].devicePtr + _tableOffsets[V_IT][level-1], - (float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1], + computeLoopVertexB(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1], + (int*)_tables[V_IT].ptr + _tableOffsets[V_IT][level-1], + (float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1], offset, start, end); } void OsdCpuKernelDispatcher::ApplyLoopVertexVerticesKernelA( FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { - VertexDescriptor vd(_vertexBuffer->GetNumElements(), _varyingBuffer->GetNumElements()); - - computeVertexA(&vd, _vertexBuffer->GetCpuBuffer(), _varyingBuffer->GetCpuBuffer(), - (int*)_tables[V_ITa].devicePtr + _tableOffsets[V_ITa][level-1], - (float*)_tables[V_W].devicePtr + _tableOffsets[V_W][level-1], + computeVertexA(_vdesc, GetVertexBuffer(), GetVaryingBuffer(), + (int*)_tables[V_ITa].ptr + _tableOffsets[V_ITa][level-1], + (float*)_tables[V_W].ptr + _tableOffsets[V_W][level-1], offset, start, end, pass); } } // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; } // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuDispatcher.h b/opensubdiv/osd/cpuDispatcher.h index 6be66766..79105d96 100644 --- a/opensubdiv/osd/cpuDispatcher.h +++ b/opensubdiv/osd/cpuDispatcher.h @@ -70,6 +70,8 @@ namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { +class VertexDescriptor; + class OsdCpuKernelDispatcher : public OsdKernelDispatcher { public: @@ -102,11 +104,11 @@ public: virtual void CopyTable(int tableIndex, size_t size, const void *ptr); - virtual void BeginLaunchKernel(); + virtual void OnKernelLaunch() {} - virtual void EndLaunchKernel(); + virtual void OnKernelFinish() {} - virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count); + virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices); virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying); @@ -123,18 +125,26 @@ public: protected: - struct DeviceTable { - DeviceTable() : devicePtr(NULL) { } - ~DeviceTable(); + struct SubdivisionTable { + SubdivisionTable() : ptr(NULL) { } + + ~SubdivisionTable(); void Copy(int size, const void *ptr); - void *devicePtr; + void *ptr; }; - OsdCpuVertexBuffer *_vertexBuffer, *_varyingBuffer; + float *GetVertexBuffer() const { return _currentVertexBuffer ? _currentVertexBuffer->GetCpuBuffer() : NULL; } - std::vector _tables; + float *GetVaryingBuffer() const { return _currentVaryingBuffer ? _currentVaryingBuffer->GetCpuBuffer() : NULL; } + + OsdCpuVertexBuffer *_currentVertexBuffer, + *_currentVaryingBuffer; + + VertexDescriptor *_vdesc; + + std::vector _tables; }; } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/cpuKernel.cpp b/opensubdiv/osd/cpuKernel.cpp index 2016b504..074f723d 100644 --- a/opensubdiv/osd/cpuKernel.cpp +++ b/opensubdiv/osd/cpuKernel.cpp @@ -64,9 +64,6 @@ namespace OPENSUBDIV_VERSION { void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying, const int *F_IT, const int *F_ITa, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -77,23 +74,19 @@ void computeFace( const VertexDescriptor *vdesc, float * vertex, float * varying float weight = 1.0f/n; // XXX: should use local vertex struct variable instead of accumulating directly into global memory. - float *dst = &vertex[(offset + i)*ve]; - float *dstVarying = &varying[(offset + i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); for (int j=0; jAddWithWeight(dst, &vertex[index*ve], weight); - vdesc->AddVaryingWithWeight(dstVarying, &varying[index*vev], weight); + vdesc->AddWithWeight(vertex, dstIndex, index, weight); + vdesc->AddVaryingWithWeight(varying, dstIndex, index, weight); } } } void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, const float *E_W, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -105,30 +98,26 @@ void computeEdge( const VertexDescriptor *vdesc, float *vertex, float *varying, float vertWeight = E_W[i*2+0]; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); - vdesc->AddWithWeight(dst, &vertex[eidx0*ve], vertWeight); - vdesc->AddWithWeight(dst, &vertex[eidx1*ve], vertWeight); + vdesc->AddWithWeight(vertex, dstIndex, eidx0, vertWeight); + vdesc->AddWithWeight(vertex, dstIndex, eidx1, vertWeight); if (eidx2 != -1) { float faceWeight = E_W[i*2+1]; - vdesc->AddWithWeight(dst, &vertex[eidx2*ve], faceWeight); - vdesc->AddWithWeight(dst, &vertex[eidx3*ve], faceWeight); + vdesc->AddWithWeight(vertex, dstIndex, eidx2, faceWeight); + vdesc->AddWithWeight(vertex, dstIndex, eidx3, faceWeight); } - vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx0*vev], 0.5f); - vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx1*vev], 0.5f); + vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f); + vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f); } } void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const float *V_W, int offset, int start, int end, int pass) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -146,29 +135,25 @@ void computeVertexA(const VertexDescriptor *vdesc, float *vertex, float *varying if (weight>0.0f && weight<1.0f && n > 0) weight=1.0f-weight; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; + int dstIndex = offset + i; if(not pass) - vdesc->Clear(dst, dstVarying); + vdesc->Clear(vertex, varying, dstIndex); if (eidx0==-1 || (pass==0 && (n==-1)) ) { - vdesc->AddWithWeight(dst, &vertex[p*ve], weight); + vdesc->AddWithWeight(vertex, dstIndex, p, weight); } else { - vdesc->AddWithWeight(dst, &vertex[p*ve], weight * 0.75f); - vdesc->AddWithWeight(dst, &vertex[eidx0*ve], weight * 0.125f); - vdesc->AddWithWeight(dst, &vertex[eidx1*ve], weight * 0.125f); + vdesc->AddWithWeight(vertex, dstIndex, p, weight * 0.75f); + vdesc->AddWithWeight(vertex, dstIndex, eidx0, weight * 0.125f); + vdesc->AddWithWeight(vertex, dstIndex, eidx1, weight * 0.125f); } if (not pass) - vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0); + vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); } } void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -181,25 +166,21 @@ void computeVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying float wp = 1.0f/float(n*n); float wv = (n-2.0f) * n * wp; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); - vdesc->AddWithWeight(dst, &vertex[p*ve], weight * wv); + vdesc->AddWithWeight(vertex, dstIndex, p, weight * wv); for (int j = 0; j < n; ++j) { - vdesc->AddWithWeight(dst, &vertex[V_IT[h+j*2]*ve], weight * wp); - vdesc->AddWithWeight(dst, &vertex[V_IT[h+j*2+1]*ve], weight * wp); + vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2], weight * wp); + vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp); } - vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0); + vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); } } void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, const int *V_IT, const float *V_W, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -214,24 +195,20 @@ void computeLoopVertexB(const VertexDescriptor *vdesc, float *vertex, float *var beta = beta * beta; beta = (0.625f - beta) * wp; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); - vdesc->AddWithWeight(dst, &vertex[p*ve], weight * (1.0f - (beta * n))); + vdesc->AddWithWeight(vertex, dstIndex, p, weight * (1.0f - (beta * n))); for (int j = 0; j < n; ++j) - vdesc->AddWithWeight(dst, &vertex[V_IT[h+j]*ve], weight * beta); + vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta); - vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0f); + vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); } } void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *E_IT, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif @@ -239,35 +216,30 @@ void computeBilinearEdge(const VertexDescriptor *vdesc, float *vertex, float *va int eidx0 = E_IT[2*i+0]; int eidx1 = E_IT[2*i+1]; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); + + vdesc->AddWithWeight(vertex, dstIndex, eidx0, 0.5f); + vdesc->AddWithWeight(vertex, dstIndex, eidx1, 0.5f); - vdesc->AddWithWeight(dst, &vertex[eidx0*ve], 0.5f); - vdesc->AddWithWeight(dst, &vertex[eidx1*ve], 0.5f); - - vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx0*vev], 0.5f); - vdesc->AddVaryingWithWeight(dstVarying, &varying[eidx1*vev], 0.5f); + vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f); + vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f); } } void computeBilinearVertex(const VertexDescriptor *vdesc, float *vertex, float *varying, const int *V_ITa, int offset, int start, int end) { - int ve = vdesc->numVertexElements; - int vev = vdesc->numVaryingElements; - #ifdef _OPENMP #pragma omp parallel for #endif for (int i = start; i < end; i++) { int p = V_ITa[i]; - float *dst = &vertex[(offset+i)*ve]; - float *dstVarying = &varying[(offset+i)*vev]; - vdesc->Clear(dst, dstVarying); + int dstIndex = offset + i; + vdesc->Clear(vertex, varying, dstIndex); - vdesc->AddWithWeight(dst, &vertex[p*ve], 1.0f); - vdesc->AddVaryingWithWeight(dstVarying, &varying[p*vev], 1.0f); + vdesc->AddWithWeight(vertex, dstIndex, p, 1.0f); + vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); } } diff --git a/opensubdiv/osd/cpuKernel.h b/opensubdiv/osd/cpuKernel.h index ba6d9293..c90e9f60 100644 --- a/opensubdiv/osd/cpuKernel.h +++ b/opensubdiv/osd/cpuKernel.h @@ -67,20 +67,28 @@ struct VertexDescriptor { VertexDescriptor(int numVertexElem, int numVaryingElem) : numVertexElements(numVertexElem), numVaryingElements(numVaryingElem) { } - void Clear(float *vertex, float *varying) const { - for (int i = 0; i < numVertexElements; ++i) - vertex[i] = 0.0f; + void Clear(float *vertex, float *varying, int index) const { + if (vertex) { + for (int i = 0; i < numVertexElements; ++i) + vertex[index*numVertexElements+i] = 0.0f; + } - for (int i = 0; i < numVaryingElements; ++i) - varying[i] = 0.0f; + if (varying) { + for (int i = 0; i < numVaryingElements; ++i) + varying[index*numVaryingElements+i] = 0.0f; + } } - void AddWithWeight(float *vertex, const float *src, float weight) const { + void AddWithWeight(float *vertex, int dstIndex, int srcIndex, float weight) const { + int d = dstIndex * numVertexElements; + int s = srcIndex * numVertexElements; for (int i = 0; i < numVertexElements; ++i) - vertex[i] += src[i] * weight; + vertex[d++] += vertex[s++] * weight; } - void AddVaryingWithWeight(float *varying, const float *src, float weight) const { + void AddVaryingWithWeight(float *varying, int dstIndex, int srcIndex, float weight) const { + int d = dstIndex * numVaryingElements; + int s = srcIndex * numVaryingElements; for (int i = 0; i < numVaryingElements; ++i) - varying[i] += src[i] * weight; + varying[d++] += varying[s++] * weight; } int numVertexElements; diff --git a/opensubdiv/osd/cudaDispatcher.cpp b/opensubdiv/osd/cudaDispatcher.cpp index b53e9e39..ce7e2f8d 100644 --- a/opensubdiv/osd/cudaDispatcher.cpp +++ b/opensubdiv/osd/cudaDispatcher.cpp @@ -81,7 +81,42 @@ void OsdCudaComputeBilinearVertex(float *vertex, float *varying, int numUserVert namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { +OsdCudaVertexBuffer::OsdCudaVertexBuffer(int numElements, int numVertices) : + OsdGpuVertexBuffer(numElements, numVertices) { + // register vbo as cuda resource + cudaGraphicsGLRegisterBuffer(&_cudaResource, _vbo, cudaGraphicsMapFlagsNone); +} + +void +OsdCudaVertexBuffer::UpdateData(const float *src, int numVertices) { + + void *dst = Map(); + cudaMemcpy(dst, src, _numElements * numVertices * sizeof(float), cudaMemcpyHostToDevice); + Unmap(); +} + +void * +OsdCudaVertexBuffer::Map() { + + size_t num_bytes; + void *ptr; + + cudaGraphicsMapResources(1, &_cudaResource, 0); + cudaGraphicsResourceGetMappedPointer(&ptr, &num_bytes, _cudaResource); + return ptr; +} + +void +OsdCudaVertexBuffer::Unmap() { + cudaGraphicsUnmapResources(1, &_cudaResource, 0); +} + +OsdCudaVertexBuffer::~OsdCudaVertexBuffer() { + cudaGraphicsUnregisterResource(_cudaResource); +} + +// ------------------------------------------------------------------------------- OsdCudaKernelDispatcher::DeviceTable::~DeviceTable() { if (devicePtr) cudaFree(devicePtr); @@ -95,18 +130,16 @@ OsdCudaKernelDispatcher::DeviceTable::Copy(int size, const void *ptr) { cudaMalloc(&devicePtr, size); cudaMemcpy(devicePtr, ptr, size, cudaMemcpyHostToDevice); } +// ------------------------------------------------------------------------------- OsdCudaKernelDispatcher::OsdCudaKernelDispatcher(int levels) - : OsdKernelDispatcher(levels), - _cudaVertexResource(NULL), - _cudaVaryingResource(NULL) + : OsdKernelDispatcher(levels) { _tables.resize(TABLE_MAX); } OsdCudaKernelDispatcher::~OsdCudaKernelDispatcher() { - cudaDeviceReset(); // XXX: necessary? } void @@ -115,48 +148,52 @@ OsdCudaKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) _tables[tableIndex].Copy(size, ptr); } -void -OsdCudaKernelDispatcher::BeginLaunchKernel() { } - -void -OsdCudaKernelDispatcher::EndLaunchKernel() { } - OsdVertexBuffer * -OsdCudaKernelDispatcher::InitializeVertexBuffer(int numElements, int count) +OsdCudaKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices) { - return new OsdGpuVertexBuffer(numElements, count); + return new OsdCudaVertexBuffer(numElements, numVertices); } void OsdCudaKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) { - OsdGpuVertexBuffer *bVertex = dynamic_cast(vertex); - OsdGpuVertexBuffer *bVarying = dynamic_cast(varying); - size_t num_bytes; + if (vertex) + _currentVertexBuffer = dynamic_cast(vertex); + else + _currentVertexBuffer = NULL; - if (bVertex) { - cudaGraphicsGLRegisterBuffer(&_cudaVertexResource, bVertex->GetGpuBuffer(), cudaGraphicsMapFlagsWriteDiscard); - cudaGraphicsMapResources(1, &_cudaVertexResource, 0); - cudaGraphicsResourceGetMappedPointer((void **)&_deviceVertices, &num_bytes, _cudaVertexResource); + if (varying) + _currentVaryingBuffer = dynamic_cast(varying); + else + _currentVaryingBuffer = NULL; + + if (_currentVertexBuffer) { + _deviceVertices = (float*)_currentVertexBuffer->Map(); + // XXX todo remove _numVertexElements + _numVertexElements = _currentVertexBuffer->GetNumElements(); + } else { + _numVertexElements = 0; } - if (bVarying) { - cudaGraphicsGLRegisterBuffer(&_cudaVaryingResource, bVarying->GetGpuBuffer(), cudaGraphicsMapFlagsWriteDiscard); - cudaGraphicsMapResources(1, &_cudaVaryingResource, 0); - cudaGraphicsResourceGetMappedPointer((void **)&_deviceVaryings, &num_bytes, _cudaVaryingResource); + if (_currentVaryingBuffer) { + _deviceVaryings = (float*)_currentVaryingBuffer->Map(); + _numVaryingElements = _currentVaryingBuffer->GetNumElements(); + } else { + _numVaryingElements = 0; } } void OsdCudaKernelDispatcher::UnbindVertexBuffer() { - if (_cudaVertexResource) - cudaGraphicsUnmapResources(1, &_cudaVertexResource, 0); - if (_cudaVaryingResource) - cudaGraphicsUnmapResources(1, &_cudaVertexResource, 0); + if (_currentVertexBuffer){ + _currentVertexBuffer->Unmap(); + } + if (_currentVaryingBuffer) + _currentVaryingBuffer->Unmap(); - _cudaVertexResource = NULL; - _cudaVaryingResource = NULL; + _currentVertexBuffer = NULL; + _currentVaryingBuffer = NULL; } void diff --git a/opensubdiv/osd/cudaDispatcher.h b/opensubdiv/osd/cudaDispatcher.h index e6673fb0..695d848c 100644 --- a/opensubdiv/osd/cudaDispatcher.h +++ b/opensubdiv/osd/cudaDispatcher.h @@ -59,6 +59,7 @@ #include "../version.h" #include "../osd/kernelDispatcher.h" +#include "../osd/vertexBuffer.h" #include @@ -72,6 +73,19 @@ namespace OPENSUBDIV_VERSION { struct DeviceVertex; +class OsdCudaVertexBuffer : public OsdGpuVertexBuffer { +public: + OsdCudaVertexBuffer(int numElements, int numVertices); + virtual ~OsdCudaVertexBuffer(); + + virtual void UpdateData(const float *src, int numVertices); + void * Map(); + void Unmap(); + +protected: + cudaGraphicsResource *_cudaResource; +}; + class OsdCudaKernelDispatcher : public OsdKernelDispatcher { public: @@ -106,11 +120,11 @@ public: virtual void CopyTable(int tableIndex, size_t size, const void *ptr); - virtual void BeginLaunchKernel(); - - virtual void EndLaunchKernel(); + virtual void OnKernelLaunch() {} - virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count); + virtual void OnKernelFinish() {} + + virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices); virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying); @@ -138,8 +152,8 @@ protected: std::vector _tables; - cudaGraphicsResource *_cudaVertexResource, - *_cudaVaryingResource; + OsdCudaVertexBuffer *_currentVertexBuffer, + *_currentVaryingBuffer; float *_deviceVertices, *_deviceVaryings; diff --git a/opensubdiv/osd/glslDispatcher.cpp b/opensubdiv/osd/glslDispatcher.cpp index 312f8627..4cabceed 100644 --- a/opensubdiv/osd/glslDispatcher.cpp +++ b/opensubdiv/osd/glslDispatcher.cpp @@ -61,6 +61,8 @@ #include #include #include +#include +#include #define OPT_E0_IT_VEC4 #define OPT_E0_S_VEC2 @@ -88,65 +90,28 @@ static const char *shaderDefines = "" #endif ; +std::vector OsdGlslKernelDispatcher::shaderRegistry; + OsdGlslKernelDispatcher::OsdGlslKernelDispatcher(int levels) : OsdKernelDispatcher(levels) { - _vertexBuffer = 0; - _varyingBuffer = 0; - _prgKernel = 0; + _currentVertexBuffer = 0; + _currentVaryingBuffer = 0; + _shader = 0; glGenTextures(1, &_vertexTexture); glGenTextures(1, &_varyingTexture); _tableBuffers.resize(TABLE_MAX); _tableTextures.resize(TABLE_MAX); - _tableUniforms.resize(TABLE_MAX); - _tableOffsetUniforms.resize(TABLE_MAX); glGenBuffers(TABLE_MAX, &_tableBuffers[0]); glGenTextures(TABLE_MAX, &_tableTextures[0]); - subComputeFace = 0; - subComputeEdge = 0; - subComputeVertexA = 0; - subComputeVertexB = 0; - uniformVertexPass = 0; - uniformIndexStart = 0; - uniformIndexOffset = 0; - - compile(shaderSource, shaderDefines); - - subComputeFace = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeFace"); - subComputeEdge = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeEdge"); - subComputeVertexA = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeVertexA"); - subComputeVertexB = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "catmarkComputeVertexB"); - subComputeLoopVertexB = glGetSubroutineIndex(_prgKernel, GL_VERTEX_SHADER, "loopComputeVertexB"); - - uniformVertexPass = glGetUniformLocation(_prgKernel, "vertexPass"); - uniformIndexStart = glGetUniformLocation(_prgKernel, "indexStart"); - uniformIndexOffset = glGetUniformLocation(_prgKernel, "indexOffset"); - - _tableUniforms[F_IT] = glGetUniformLocation(_prgKernel, "_F0_IT"); - _tableUniforms[F_ITa] = glGetUniformLocation(_prgKernel, "_F0_ITa"); - _tableUniforms[E_IT] = glGetUniformLocation(_prgKernel, "_E0_IT"); - _tableUniforms[V_IT] = glGetUniformLocation(_prgKernel, "_V0_IT"); - _tableUniforms[V_ITa] = glGetUniformLocation(_prgKernel, "_V0_ITa"); - _tableUniforms[E_W] = glGetUniformLocation(_prgKernel, "_E0_S"); - _tableUniforms[V_W] = glGetUniformLocation(_prgKernel, "_V0_S"); - _tableOffsetUniforms[F_IT] = glGetUniformLocation(_prgKernel, "F_IT_ofs"); - _tableOffsetUniforms[F_ITa] = glGetUniformLocation(_prgKernel, "F_ITa_ofs"); - _tableOffsetUniforms[E_IT] = glGetUniformLocation(_prgKernel, "E_IT_ofs"); - _tableOffsetUniforms[V_IT] = glGetUniformLocation(_prgKernel, "V_IT_ofs"); - _tableOffsetUniforms[V_ITa] = glGetUniformLocation(_prgKernel, "V_ITa_ofs"); - _tableOffsetUniforms[E_W] = glGetUniformLocation(_prgKernel, "E_W_ofs"); - _tableOffsetUniforms[V_W] = glGetUniformLocation(_prgKernel, "V_W_ofs"); } OsdGlslKernelDispatcher::~OsdGlslKernelDispatcher() { - if (_prgKernel) - glDeleteProgram(_prgKernel); - glDeleteTextures(1, &_vertexTexture); glDeleteTextures(1, &_varyingTexture); @@ -163,98 +128,134 @@ OsdGlslKernelDispatcher::CopyTable(int tableIndex, size_t size, const void *ptr) } void -OsdGlslKernelDispatcher::BeginLaunchKernel() { +OsdGlslKernelDispatcher::OnKernelLaunch() { - glUseProgram(_prgKernel); glEnable(GL_RASTERIZER_DISCARD); + _shader->UseProgram(); //XXX what if loop.. - bindTextureBuffer(_tableUniforms[F_IT], _tableBuffers[F_IT], + bindTextureBuffer(_shader->GetTableUniform(F_IT), _tableBuffers[F_IT], _tableTextures[F_IT], GL_R32UI, 2); - bindTextureBuffer(_tableUniforms[F_ITa], _tableBuffers[F_ITa], + bindTextureBuffer(_shader->GetTableUniform(F_ITa), _tableBuffers[F_ITa], _tableTextures[F_ITa], GL_R32I, 3); #ifdef OPT_E0_IT_VEC4 - bindTextureBuffer(_tableUniforms[E_IT], _tableBuffers[E_IT], + bindTextureBuffer(_shader->GetTableUniform(E_IT), _tableBuffers[E_IT], _tableTextures[E_IT], GL_RGBA32UI, 4); #else - bindTextureBuffer(_tableUniforms[E_IT], _tableBuffers[E_IT], + bindTextureBuffer(_shader->GetTableUniform(E_IT), _tableBuffers[E_IT], _tableTextures[E_IT], GL_R32UI, 4); #endif #ifdef OPT_CATMARK_V_IT_VEC2 - bindTextureBuffer(_tableUniforms[V_IT], _tableBuffers[V_IT], + bindTextureBuffer(_shader->GetTableUniform(V_IT), _tableBuffers[V_IT], _tableTextures[V_IT], GL_RG32UI, 5); #else - bindTextureBuffer(_tableUniforms[V_IT], _tableBuffers[V_IT], + bindTextureBuffer(_shader->GetTableUniform(V_IT), _tableBuffers[V_IT], _tableTextures[V_IT], GL_R32UI, 5); #endif - bindTextureBuffer(_tableUniforms[V_ITa], _tableBuffers[V_ITa], + bindTextureBuffer(_shader->GetTableUniform(V_ITa), _tableBuffers[V_ITa], _tableTextures[V_ITa], GL_R32I, 6); #ifdef OPT_E0_S_VEC2 - bindTextureBuffer(_tableUniforms[E_W], _tableBuffers[E_W], + bindTextureBuffer(_shader->GetTableUniform(E_W), _tableBuffers[E_W], _tableTextures[E_W], GL_RG32F, 7); #else - bindTextureBuffer(_tableUniforms[E_W], _tableBuffers[E_W], + bindTextureBuffer(_shader->GetTableUniform(E_W), _tableBuffers[E_W], _tableTextures[E_W], GL_R32F, 7); #endif - bindTextureBuffer(_tableUniforms[V_W], _tableBuffers[V_W], + bindTextureBuffer(_shader->GetTableUniform(V_W), _tableBuffers[V_W], _tableTextures[V_W], GL_R32F, 8); } void -OsdGlslKernelDispatcher::EndLaunchKernel() { +OsdGlslKernelDispatcher::OnKernelFinish() { + + unbindTextureBuffer(2); + unbindTextureBuffer(3); + unbindTextureBuffer(4); + unbindTextureBuffer(5); + unbindTextureBuffer(6); + unbindTextureBuffer(7); + unbindTextureBuffer(8); glDisable(GL_RASTERIZER_DISCARD); glUseProgram(0); - - // XXX Unbind table buffer } OsdVertexBuffer * -OsdGlslKernelDispatcher::InitializeVertexBuffer(int numElements, int count) +OsdGlslKernelDispatcher::InitializeVertexBuffer(int numElements, int numVertices) { - return new OsdGpuVertexBuffer(numElements, count); + return new OsdGpuVertexBuffer(numElements, numVertices); } void OsdGlslKernelDispatcher::BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) { - OsdGpuVertexBuffer *bVertex = dynamic_cast(vertex); - OsdGpuVertexBuffer *bVarying = dynamic_cast(varying); + if (vertex) + _currentVertexBuffer = dynamic_cast(vertex); + else + _currentVertexBuffer = NULL; - if (bVertex) { - _vertexBuffer = bVertex->GetGpuBuffer(); - bindTextureBuffer(_vertexUniform, _vertexBuffer, _vertexTexture, GL_RGB32F, 0); + if (varying) + _currentVaryingBuffer = dynamic_cast(varying); + else + _currentVaryingBuffer = NULL; + + int numVertexElements = vertex ? vertex->GetNumElements() : 0; + int numVaryingElements = varying ? varying->GetNumElements() : 0; + + // find appropriate shader program from registry (compile it if needed) + std::vector::iterator it = + std::find_if(shaderRegistry.begin(), shaderRegistry.end(), + ComputeShader::Match(numVertexElements, numVaryingElements)); + + _shader = NULL; + if (it != shaderRegistry.end()) { + _shader = &(*it); + } else { + shaderRegistry.push_back(ComputeShader()); + _shader = &shaderRegistry.back(); + _shader->Compile(numVertexElements, numVaryingElements); } - if (bVarying) { - _varyingBuffer = bVarying->GetGpuBuffer(); - bindTextureBuffer(_varyingUniform, _varyingBuffer, _varyingTexture, GL_R32F, 0); + _shader->UseProgram(); // need to bind textures + + // bind vertex texture + if (_currentVertexBuffer) { + bindTextureBuffer(_shader->GetVertexUniform(), _currentVertexBuffer->GetGpuBuffer(), _vertexTexture, GL_RGB32F, 0); } - glUseProgram(_prgKernel); - glUniform1i(_vertexUniform, 0); + if (_currentVaryingBuffer) { + bindTextureBuffer(_shader->GetVaryingUniform(), _currentVaryingBuffer->GetGpuBuffer(), _varyingTexture, GL_R32F, 1); + } #if 0 // experiment to use image store function glActiveTexture(GL_TEXTURE0 + 0); glBindImageTextureEXT(0, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F); if (_numVarying > 0) { - glUniform1i(_varyingUniform, 1); glBindImageTextureEXT(1, _vertexTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F); } #endif + CHECK_GL_ERROR("BindVertexBuffer \n"); + } void OsdGlslKernelDispatcher::UnbindVertexBuffer() { + if (_currentVertexBuffer) { + unbindTextureBuffer(0); + } + if (_currentVaryingBuffer) { + unbindTextureBuffer(1); + } + _currentVertexBuffer = NULL; + _currentVaryingBuffer = NULL; } - void OsdGlslKernelDispatcher::Synchronize() { glFinish(); @@ -263,14 +264,15 @@ OsdGlslKernelDispatcher::Synchronize() { void OsdGlslKernelDispatcher::bindTextureBuffer( GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const { - + if (sampler == -1) { OSD_ERROR("BindTextureError:: sampler = %d\n", sampler); return; } - OSD_DEBUG("BindTextureBuffer sampler=%d, buffer=%d, texture = %d, E%x\n", sampler, buffer, texture, glGetError()); + OSD_DEBUG("BindTextureBuffer unit=%d, sampler=%d, buffer=%d, texture = %d, E%x\n", unit, sampler, buffer, texture, glGetError()); glUniform1i(sampler, unit); + CHECK_GL_ERROR("BindTextureBuffer glUniform %d\n", unit); glActiveTexture(GL_TEXTURE0 + unit); CHECK_GL_ERROR("BindTextureBuffer glActiveTexture %d\n", unit); glBindTexture(GL_TEXTURE_BUFFER, texture); @@ -280,38 +282,137 @@ OsdGlslKernelDispatcher::bindTextureBuffer( glActiveTexture(GL_TEXTURE0); } -bool -OsdGlslKernelDispatcher::compile(const char *shaderSource, const char *shaderDefine) { - _prgKernel = glCreateProgram(); +void +OsdGlslKernelDispatcher::unbindTextureBuffer(int unit) const { + + glActiveTexture(GL_TEXTURE0 + unit); + glBindTexture(GL_TEXTURE_BUFFER, 0); +} + +void +OsdGlslKernelDispatcher::ApplyCatmarkFaceVerticesKernel( + FarMesh * mesh, int offset, int level, int start, int end, void * data) const { + + _shader->ApplyCatmarkFaceVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[F_IT][level-1], + _tableOffsets[F_ITa][level-1], + offset, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( + FarMesh * mesh, int offset, int level, int start, int end, void * data) const { + + _shader->ApplyCatmarkEdgeVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[E_IT][level-1], + _tableOffsets[E_W][level-1], + offset, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelB( + FarMesh * mesh, int offset, int level, int start, int end, void * data) const { + + _shader->ApplyCatmarkVertexVerticesKernelB(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[V_IT][level-1], + _tableOffsets[V_ITa][level-1], + _tableOffsets[V_W][level-1], + offset, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelA( + FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { + + _shader->ApplyCatmarkVertexVerticesKernelA(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[V_ITa][level-1], + _tableOffsets[V_W][level-1], + offset, pass, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyLoopEdgeVerticesKernel( + FarMesh * mesh, int offset, int level, int start, int end, void * data) const { + + _shader->ApplyLoopEdgeVerticesKernel(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[E_IT][level-1], + _tableOffsets[E_W][level-1], + offset, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelB( + FarMesh * mesh, int offset, int level, int start, int end, void * data) const { + + _shader->ApplyLoopVertexVerticesKernelB(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[V_IT][level-1], + _tableOffsets[V_ITa][level-1], + _tableOffsets[V_W][level-1], + offset, start, end); +} + +void +OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelA( + FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { + + _shader->ApplyLoopVertexVerticesKernelA(_currentVertexBuffer, _currentVaryingBuffer, + _tableOffsets[V_ITa][level-1], + _tableOffsets[V_W][level-1], + offset, pass, start, end); +} + +// ------------------------------------------------------------------------------- + +OsdGlslKernelDispatcher::ComputeShader::ComputeShader() : + _program(0) +{ +} + +OsdGlslKernelDispatcher::ComputeShader::~ComputeShader() +{ + if (_program) + glDeleteProgram(_program); +} + +bool +OsdGlslKernelDispatcher::ComputeShader::Compile(int numVertexElements, int numVaryingElements) { + + // XXX: NOTE: current GLSL only supports numVertexElements = 6!!! + assert(numVertexElements == 6); + + _numVertexElements = numVertexElements; + _numVaryingElements = numVaryingElements; + _program = glCreateProgram(); GLuint shader = glCreateShader(GL_VERTEX_SHADER); char constantDefine[256]; snprintf(constantDefine, 256, - "#define NUM_VARYING_ELEMENTS %d\n", _numVarying); + "#define NUM_VARYING_ELEMENTS %d\n", numVaryingElements); const char *shaderSources[3]; shaderSources[0] = constantDefine; - shaderSources[1] = shaderDefine; + shaderSources[1] = shaderDefines; shaderSources[2] = shaderSource; glShaderSource(shader, 3, shaderSources, NULL); glCompileShader(shader); - glAttachShader(_prgKernel, shader); + glAttachShader(_program, shader); const char *outputs[] = { "outPosition", "outNormal", "gl_NextBuffer", "outVaryingData" }; - int nOutputs = _numVarying > 0 ? 4 : 2; - glTransformFeedbackVaryings(_prgKernel, nOutputs, outputs, GL_INTERLEAVED_ATTRIBS); + int nOutputs = numVaryingElements > 0 ? 4 : 2; + + glTransformFeedbackVaryings(_program, nOutputs, outputs, GL_INTERLEAVED_ATTRIBS); CHECK_GL_ERROR("Transform feedback initialize \n"); GLint linked = 0; - glLinkProgram(_prgKernel); - glGetProgramiv(_prgKernel, GL_LINK_STATUS, &linked); + glLinkProgram(_program); + glGetProgramiv(_program, GL_LINK_STATUS, &linked); if (linked == GL_FALSE) { OSD_ERROR("Fail to link shader\n"); @@ -320,56 +421,79 @@ OsdGlslKernelDispatcher::compile(const char *shaderSource, const char *shaderDef glGetShaderInfoLog(shader, 1024, NULL, buffer); OSD_ERROR(buffer); - glGetProgramInfoLog(_prgKernel, 1024, NULL, buffer); + glGetProgramInfoLog(_program, 1024, NULL, buffer); OSD_ERROR(buffer); - glDeleteProgram(_prgKernel); - _prgKernel = 0; + glDeleteProgram(_program); + _program = 0; // XXX ERROR HANDLE return false; } glDeleteShader(shader); - _vertexUniform = glGetUniformLocation(_prgKernel, "vertex"); - _varyingUniform = glGetUniformLocation(_prgKernel, "varyingData"); + _vertexUniform = glGetUniformLocation(_program, "vertex"); + _varyingUniform = glGetUniformLocation(_program, "varyingData"); + + _subComputeFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeFace"); + _subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge"); + _subComputeVertexA = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexA"); + _subComputeVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeVertexB"); + _subComputeLoopVertexB = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "loopComputeVertexB"); + + _uniformVertexPass = glGetUniformLocation(_program, "vertexPass"); + _uniformIndexStart = glGetUniformLocation(_program, "indexStart"); + _uniformIndexOffset = glGetUniformLocation(_program, "indexOffset"); + + _tableUniforms.resize(TABLE_MAX); + _tableOffsetUniforms.resize(TABLE_MAX); + + _tableUniforms[F_IT] = glGetUniformLocation(_program, "_F0_IT"); + _tableUniforms[F_ITa] = glGetUniformLocation(_program, "_F0_ITa"); + _tableUniforms[E_IT] = glGetUniformLocation(_program, "_E0_IT"); + _tableUniforms[V_IT] = glGetUniformLocation(_program, "_V0_IT"); + _tableUniforms[V_ITa] = glGetUniformLocation(_program, "_V0_ITa"); + _tableUniforms[E_W] = glGetUniformLocation(_program, "_E0_S"); + _tableUniforms[V_W] = glGetUniformLocation(_program, "_V0_S"); + _tableOffsetUniforms[F_IT] = glGetUniformLocation(_program, "F_IT_ofs"); + _tableOffsetUniforms[F_ITa] = glGetUniformLocation(_program, "F_ITa_ofs"); + _tableOffsetUniforms[E_IT] = glGetUniformLocation(_program, "E_IT_ofs"); + _tableOffsetUniforms[V_IT] = glGetUniformLocation(_program, "V_IT_ofs"); + _tableOffsetUniforms[V_ITa] = glGetUniformLocation(_program, "V_ITa_ofs"); + _tableOffsetUniforms[E_W] = glGetUniformLocation(_program, "E_W_ofs"); + _tableOffsetUniforms[V_W] = glGetUniformLocation(_program, "V_W_ofs"); return true; } void -OsdGlslKernelDispatcher::unbindTextureBuffer(int unit) const { - glActiveTexture(GL_TEXTURE0 + unit); - glBindTexture(GL_TEXTURE_BUFFER, 0); -} - -void -OsdGlslKernelDispatcher::transformGpuBufferData(GLuint kernel, GLint offset, int start, int end, bool vertexPass) const { +OsdGlslKernelDispatcher::ComputeShader::transformGpuBufferData(OsdGpuVertexBuffer *vertexBuffer, OsdGpuVertexBuffer *varyingBuffer, + GLint offset, int start, int end) const { int count = end - start; if (count <= 0) return; - OSD_DEBUG("_transformGpuBufferData kernel=%d E%x, offset=%d, count=%d\n", kernel, glGetError(), offset, count); - - glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &kernel); - glUniform1i(uniformVertexPass, vertexPass); // XXX + OSD_DEBUG("_transformGpuBufferData offset=%d, count=%d\n", glGetError(), offset, count); // set batch range - glUniform1i(uniformIndexStart, start); - glUniform1i(uniformIndexOffset, offset); + glUniform1i(_uniformIndexStart, start); + glUniform1i(_uniformIndexOffset, offset); // XXX: end is not used here now CHECK_GL_ERROR("Uniform index set at offset=%d. start=%d\n", offset, start); // set transform feedback buffer - int vertexStride = _numVertexElements*sizeof(float); - int varyingStride = _numVarying*sizeof(float); - glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, _vertexBuffer, - (start+offset)*vertexStride, count*vertexStride); - CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n"); + if (vertexBuffer) { + int vertexStride = vertexBuffer->GetNumElements()*sizeof(float); + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vertexBuffer->GetGpuBuffer(), + (start+offset)*vertexStride, count*vertexStride); + } - if (_numVarying > 0){ - glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, _varyingBuffer, + if (varyingBuffer){ + int varyingStride = varyingBuffer->GetNumElements()*sizeof(float); + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 1, varyingBuffer->GetGpuBuffer(), (start+offset)*varyingStride, count*varyingStride); } + CHECK_GL_ERROR("transformGpuBufferData glBindBufferRange\n"); + glBeginTransformFeedback(GL_POINTS); CHECK_GL_ERROR("transformGpuBufferData glBeginTransformFeedback\n"); @@ -387,68 +511,84 @@ OsdGlslKernelDispatcher::transformGpuBufferData(GLuint kernel, GLint offset, int } void -OsdGlslKernelDispatcher::ApplyCatmarkFaceVerticesKernel( - FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - - glUniform1i(_tableOffsetUniforms[F_IT], _tableOffsets[F_IT][level-1]); - glUniform1i(_tableOffsetUniforms[F_ITa], _tableOffsets[F_ITa][level-1]); - transformGpuBufferData(subComputeFace, offset, start, end); +OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkFaceVerticesKernel( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int F_IT_ofs, int F_ITa_ofs, int offset, int start, int end) { + + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeFace); + glUniform1i(_tableOffsetUniforms[F_IT], F_IT_ofs); + glUniform1i(_tableOffsetUniforms[F_ITa], F_ITa_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyCatmarkEdgeVerticesKernel( - FarMesh * mesh, int offset, int level, int start, int end, void * data) const { - - glUniform1i(_tableOffsetUniforms[E_IT], _tableOffsets[E_IT][level-1]); - glUniform1i(_tableOffsetUniforms[E_W], _tableOffsets[E_W][level-1]); - transformGpuBufferData(subComputeEdge, offset, start, end); +OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkEdgeVerticesKernel( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int E_IT_ofs, int E_W_ofs, int offset, int start, int end) { + + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge); + glUniform1i(_tableOffsetUniforms[E_IT], E_IT_ofs); + glUniform1i(_tableOffsetUniforms[E_W], E_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelB( - FarMesh * mesh, int offset, int level, int start, int end, void * data) const { +OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkVertexVerticesKernelB( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end) { - glUniform1i(_tableOffsetUniforms[V_IT], _tableOffsets[V_IT][level-1]); - glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]); - glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]); - transformGpuBufferData(subComputeVertexB, offset, start, end); + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexB); + glUniform1i(_tableOffsetUniforms[V_IT], V_IT_ofs); + glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs); + glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyCatmarkVertexVerticesKernelA( - FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { +OsdGlslKernelDispatcher::ComputeShader::ApplyCatmarkVertexVerticesKernelA( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end) { - glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]); - glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]); - transformGpuBufferData(subComputeVertexA, offset, start, end, pass); + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA); + glUniform1i(_uniformVertexPass, pass ? 1 : 0); + glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs); + glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyLoopEdgeVerticesKernel( - FarMesh * mesh, int offset, int level, int start, int end, void * data) const { +OsdGlslKernelDispatcher::ComputeShader::ApplyLoopEdgeVerticesKernel( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int E_IT_ofs, int E_W_ofs, int offset, int start, int end) { - glUniform1i(_tableOffsetUniforms[E_IT], _tableOffsets[E_IT][level-1]); - glUniform1i(_tableOffsetUniforms[E_W], _tableOffsets[E_W][level-1]); - transformGpuBufferData(subComputeEdge, offset, start, end); + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeEdge); + glUniform1i(_tableOffsetUniforms[E_IT], E_IT_ofs); + glUniform1i(_tableOffsetUniforms[E_W], E_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelB( - FarMesh * mesh, int offset, int level, int start, int end, void * data) const { +OsdGlslKernelDispatcher::ComputeShader::ApplyLoopVertexVerticesKernelB( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end) { - glUniform1i(_tableOffsetUniforms[V_IT], _tableOffsets[V_IT][level-1]); - glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]); - glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]); - transformGpuBufferData(subComputeLoopVertexB, offset, start, end); + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeLoopVertexB); + glUniform1i(_tableOffsetUniforms[V_IT], V_IT_ofs); + glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs); + glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } void -OsdGlslKernelDispatcher::ApplyLoopVertexVerticesKernelA( - FarMesh * mesh, int offset, bool pass, int level, int start, int end, void * data) const { +OsdGlslKernelDispatcher::ComputeShader::ApplyLoopVertexVerticesKernelA( + OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end) { - glUniform1i(_tableOffsetUniforms[V_ITa], _tableOffsets[V_ITa][level-1]); - glUniform1i(_tableOffsetUniforms[V_W], _tableOffsets[V_W][level-1]); - transformGpuBufferData(subComputeVertexA, offset, start, end, pass); + glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeVertexA); + glUniform1i(_uniformVertexPass, pass ? 1 : 0); + glUniform1i(_tableOffsetUniforms[V_ITa], V_ITa_ofs); + glUniform1i(_tableOffsetUniforms[V_W], V_W_ofs); + transformGpuBufferData(vertex, varying, offset, start, end); } } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/glslDispatcher.h b/opensubdiv/osd/glslDispatcher.h index 7e2c007f..eefef98b 100644 --- a/opensubdiv/osd/glslDispatcher.h +++ b/opensubdiv/osd/glslDispatcher.h @@ -89,11 +89,11 @@ public: virtual void CopyTable(int tableIndex, size_t size, const void *ptr); - virtual void BeginLaunchKernel(); - - virtual void EndLaunchKernel(); + virtual void OnKernelLaunch(); - virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count); + virtual void OnKernelFinish(); + + virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int numVertices); virtual void BindVertexBuffer(OsdVertexBuffer *vertex, OsdVertexBuffer *varying); @@ -110,42 +110,100 @@ public: protected: + class ComputeShader { + public: + ComputeShader(); + ~ComputeShader(); + + bool Compile(int numVertexElements, int numVaryingElements); + + GLuint GetTableUniform(int table) const { + return _tableUniforms[table]; + } + GLuint GetVertexUniform() const { return _vertexUniform; } + GLuint GetVaryingUniform() const { return _varyingUniform; } + + void ApplyCatmarkFaceVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int F_IT_ofs, int F_ITa_ofs, int offset, int start, int end); + + void ApplyCatmarkEdgeVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int E_IT_ofs, int E_W_ofs, int offset, int start, int end); + + void ApplyCatmarkVertexVerticesKernelB(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end); + + void ApplyCatmarkVertexVerticesKernelA(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end); + + void ApplyLoopEdgeVerticesKernel(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int E_IT_ofs, int E_W_ofs, int offset, int start, int end); + + void ApplyLoopVertexVerticesKernelB(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_IT_ofs, int V_ITa_ofs, int V_W_ofs, int offset, int start, int end); + + void ApplyLoopVertexVerticesKernelA(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + int V_ITa_ofs, int V_W_ofs, int offset, bool pass, int start, int end); + + void UseProgram () const { + glUseProgram(_program); + } + + struct Match { + Match(int numVertexElements, int numVaryingElements) : + _numVertexElements(numVertexElements), _numVaryingElements(numVaryingElements) {} + bool operator() (const ComputeShader &shader) { + return (shader._numVertexElements == _numVertexElements + && shader._numVaryingElements == _numVaryingElements); + } + int _numVertexElements, _numVaryingElements; + }; + + friend class Match; + + private: + void transformGpuBufferData(OsdGpuVertexBuffer *vertex, OsdGpuVertexBuffer *varying, + GLint offset, int start, int end) const; + + int _numVertexElements; + int _numVaryingElements; + + GLuint _program; + + GLuint _uniformVertexPass; + GLuint _uniformIndexStart; + GLuint _uniformIndexOffset; + + GLuint _vertexUniform, + _varyingUniform; + + // shader locations + GLuint _subComputeFace, _subComputeEdge, _subComputeVertexA, _subComputeVertexB; + GLuint _subComputeLoopVertexB; + + std::vector _tableUniforms; + std::vector _tableOffsetUniforms; + + }; + void bindTextureBuffer(GLuint sampler, GLuint buffer, GLuint texture, GLenum type, int unit) const; void unbindTextureBuffer(int unit) const; - void transformGpuBufferData(GLuint kernel, GLint offset, int start, int end, bool vertexPass=false) const; - - bool compile(const char *shaderSource, const char *shaderDefine); - - GLuint _prgKernel; - - int _numVertexElements, - _numVarying; - - GLuint _vertexBuffer, - _varyingBuffer; + ComputeShader * _shader; // texture for vertex GLuint _vertexTexture, _varyingTexture; - - GLuint _vertexUniform, - _varyingUniform; + + OsdGpuVertexBuffer *_currentVertexBuffer, + *_currentVaryingBuffer; // table buffers std::vector _tableBuffers; std::vector _tableTextures; - std::vector _tableUniforms; - std::vector _tableOffsetUniforms; - GLuint uniformVertexPass; - GLuint uniformIndexStart; - GLuint uniformIndexOffset; - - // shader locations - GLuint subComputeFace, subComputeEdge, subComputeVertexA, subComputeVertexB; - GLuint subComputeLoopVertexB; + // static shader registry (XXX tentative..) + static std::vector shaderRegistry; }; } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/kernelDispatcher.h b/opensubdiv/osd/kernelDispatcher.h index a6b815ff..5c34cd2c 100644 --- a/opensubdiv/osd/kernelDispatcher.h +++ b/opensubdiv/osd/kernelDispatcher.h @@ -82,9 +82,9 @@ public: virtual void CopyTable(int tableIndex, size_t size, const void *ptr) = 0; - virtual void BeginLaunchKernel() = 0; + virtual void OnKernelLaunch() = 0; - virtual void EndLaunchKernel() = 0; + virtual void OnKernelFinish() = 0; virtual OsdVertexBuffer *InitializeVertexBuffer(int numElements, int count) = 0; diff --git a/opensubdiv/osd/mesh.cpp b/opensubdiv/osd/mesh.cpp index 74417de8..757b4649 100644 --- a/opensubdiv/osd/mesh.cpp +++ b/opensubdiv/osd/mesh.cpp @@ -102,7 +102,6 @@ OsdMesh::Create(OsdHbrMesh *hbrMesh, int level, const std::string &kernel) { OSD_DEBUG("PREP: NumCoarseVertex = %d\n", _mMesh->GetNumCoarseVertices()); OSD_DEBUG("PREP: NumVertex = %d\n", _mMesh->GetNumVertices()); - OSD_DEBUG("PREP: NumTables = %d\n", _mMesh->GetNumSubdivisionTables()); const FarSubdivisionTables* table = _mMesh->GetSubdivision(); @@ -145,11 +144,11 @@ OsdMesh::Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying) { _dispatcher->BindVertexBuffer(vertex, varying); - _dispatcher->BeginLaunchKernel(); + _dispatcher->OnKernelLaunch(); _mMesh->Subdivide(_level+1); - _dispatcher->EndLaunchKernel(); + _dispatcher->OnKernelFinish(); _dispatcher->UnbindVertexBuffer(); } diff --git a/opensubdiv/osd/mesh.h b/opensubdiv/osd/mesh.h index 6558f2f9..b2790231 100644 --- a/opensubdiv/osd/mesh.h +++ b/opensubdiv/osd/mesh.h @@ -95,7 +95,13 @@ public: OsdVertexBuffer *InitializeVertexBuffer(int numElements); - void Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying); + // for non-interleaved vertex data + void Subdivide(OsdVertexBuffer *vertex, OsdVertexBuffer *varying = NULL); + +/* + // for interleaved vertex data ? + template void Subdivide(T *vertex) { } +*/ void Synchronize(); diff --git a/opensubdiv/osd/vertexBuffer.h b/opensubdiv/osd/vertexBuffer.h index 9111dc16..dce37f68 100644 --- a/opensubdiv/osd/vertexBuffer.h +++ b/opensubdiv/osd/vertexBuffer.h @@ -9,69 +9,83 @@ namespace OPENSUBDIV_VERSION { class OsdVertexBuffer { public: + OsdVertexBuffer(int numElements) : _numElements(numElements) {} virtual ~OsdVertexBuffer() {} - virtual void UpdateData(const float *src, int count) = 0; + + virtual void UpdateData(const float *src, int numVertices) = 0; + virtual GLuint GetGpuBuffer() = 0; + + int GetNumElements() const { + return _numElements; + } + +protected: + int _numElements; }; class OsdGpuVertexBuffer : public OsdVertexBuffer { public: - OsdGpuVertexBuffer(int numElements, int count) : _vbo(0), _numElements(numElements) { - int stride = numElements * count * sizeof(float); + OsdGpuVertexBuffer(int numElements, int numVertices) : OsdVertexBuffer(numElements), _vbo(0) { + int size = numElements * numVertices * sizeof(float); glGenBuffers(1, &_vbo); glBindBuffer(GL_ARRAY_BUFFER, _vbo); - glBufferData(GL_ARRAY_BUFFER, stride, 0, GL_STREAM_DRAW); + glBufferData(GL_ARRAY_BUFFER, size, 0, GL_STREAM_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); } virtual ~OsdGpuVertexBuffer() { glDeleteBuffers(1, &_vbo); } - virtual void UpdateData(const float *src, int count) { + + virtual void UpdateData(const float *src, int numVertices) { glBindBuffer(GL_ARRAY_BUFFER, _vbo); float * pointer = (float*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); - memcpy(pointer, src, _numElements * count * sizeof(float)); + memcpy(pointer, src, _numElements * numVertices * sizeof(float)); glUnmapBuffer(GL_ARRAY_BUFFER); glBindBuffer(GL_ARRAY_BUFFER, 0); } + virtual GLuint GetGpuBuffer() { return _vbo; } -private: + +protected: GLuint _vbo; - int _numElements; }; class OsdCpuVertexBuffer : public OsdVertexBuffer { public: - OsdCpuVertexBuffer(int numElements, int count) : _cpuVbo(NULL), _vboSize(0), _numElements(numElements), _vbo(0) { - _cpuVbo = new float[numElements * count]; - _vboSize = numElements * count; + OsdCpuVertexBuffer(int numElements, int numVertices) : OsdVertexBuffer(numElements), _cpuVbo(NULL), _vboSize(0), _vbo(0) { + _vboSize = numElements * numVertices; + _cpuVbo = new float[numElements * numVertices]; } virtual ~OsdCpuVertexBuffer() { - if(_cpuVbo) delete[] _cpuVbo; - if(_vbo) glDeleteBuffers(1, &_vbo); + delete [] _cpuVbo; + if (_vbo) + glDeleteBuffers(1, &_vbo); } - virtual void UpdateData(const float *src, int count) { - memcpy(_cpuVbo, src, _numElements * count * sizeof(float)); + + virtual void UpdateData(const float *src, int numVertices) { + memcpy(_cpuVbo, src, _numElements * numVertices * sizeof(float)); } + float *GetCpuBuffer() { return _cpuVbo; } + + // XXX: this method name is missleading virtual GLuint GetGpuBuffer() { - if(!_vbo) glGenBuffers(1, &_vbo); + if (!_vbo) + glGenBuffers(1, &_vbo); glBindBuffer(GL_ARRAY_BUFFER, _vbo); glBufferData(GL_ARRAY_BUFFER, _vboSize * sizeof(float), _cpuVbo, GL_STREAM_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); return _vbo; } - int GetNumElements() const { - return _numElements; - } -private: +protected: float *_cpuVbo; int _vboSize; - int _numElements; GLuint _vbo; };