From 7954fbab37a775a13fc7d04d95ab461c4de71524 Mon Sep 17 00:00:00 2001 From: manuelk Date: Thu, 25 Dec 2014 13:03:53 -0800 Subject: [PATCH] Fix tangents in Osd::EvalLimitController - don't rotate (s,t) coordinates but rotate the patch instead ! - refactor osd/cpuEvalLimitKernels to share Far::PatchTables cubic spline interpolation functions : this replaces tensor product formulation with weight matrices, which does not really impact performance here, but would have to be replaced when implementing regular gridding functions. - fix OsdCpuEvalLimitController to not rotate coordinates and pass the rotation bitfields - expose Far::PatchTables spline interpolation API (protected -> public) - fix glEvalLimit tangent buffers (remove empty padding - see below) - change policy for tangent buffers : the output buffer descriptor is **NO LONGER APPLIED** to tangent output buffers. Tangent primvar data buffers are no longer applying the offset and stride from the descriptor (because it doesn't make sense to share it). If more flexiblity is required, we will consider adding independent descriptors for the tangent buffers. This change will impact existing code that generates tangents with the EvalLimit controller. fixes #370 --- examples/glEvalLimit/glEvalLimit.cpp | 24 +- opensubdiv/far/patchTables.cpp | 4 +- opensubdiv/far/patchTables.h | 26 +- opensubdiv/osd/cpuEvalLimitController.cpp | 71 ++- opensubdiv/osd/cpuEvalLimitController.h | 12 +- opensubdiv/osd/cpuEvalLimitKernel.cpp | 529 ++++++++-------------- opensubdiv/osd/cpuEvalLimitKernel.h | 16 +- 7 files changed, 277 insertions(+), 405 deletions(-) diff --git a/examples/glEvalLimit/glEvalLimit.cpp b/examples/glEvalLimit/glEvalLimit.cpp index d2fa1ac1..6facc01c 100644 --- a/examples/glEvalLimit/glEvalLimit.cpp +++ b/examples/glEvalLimit/glEvalLimit.cpp @@ -415,12 +415,12 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { } delete g_dQs; - g_dQs = Osd::CpuGLVertexBuffer::Create(6,g_nparticles); - memset( g_dQs->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float)); + g_dQs = Osd::CpuGLVertexBuffer::Create(3,g_nparticles); + memset( g_dQs->BindCpuBuffer(), 0, g_nparticles*3*sizeof(float)); delete g_dQt; - g_dQt = Osd::CpuGLVertexBuffer::Create(6,g_nparticles); - memset( g_dQt->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float)); + g_dQt = Osd::CpuGLVertexBuffer::Create(3,g_nparticles); + memset( g_dQt->BindCpuBuffer(), 0, g_nparticles*3*sizeof(float)); } updateGeom(); @@ -439,8 +439,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { } //------------------------------------------------------------------------------ -struct Program -{ +struct Program { GLuint program; GLuint uniformModelViewProjectionMatrix; GLuint attrPosition; @@ -449,8 +448,7 @@ struct Program //------------------------------------------------------------------------------ static void -checkGLErrors(std::string const & where = "") -{ +checkGLErrors(std::string const & where = "") { GLuint err; while ((err = glGetError()) != GL_NO_ERROR) { @@ -462,8 +460,7 @@ checkGLErrors(std::string const & where = "") //------------------------------------------------------------------------------ static GLuint -compileShader(GLenum shaderType, const char *source) -{ +compileShader(GLenum shaderType, const char *source) { GLuint shader = glCreateShader(shaderType); glShaderSource(shader, 1, &source, NULL); glCompileShader(shader); @@ -473,8 +470,8 @@ compileShader(GLenum shaderType, const char *source) //------------------------------------------------------------------------------ static bool -linkDefaultProgram() -{ +linkDefaultProgram() { + #if defined(GL_ARB_tessellation_shader) || defined(GL_VERSION_4_0) #define GLSL_VERSION_DEFINE "#version 400\n" #else @@ -537,8 +534,7 @@ linkDefaultProgram() //------------------------------------------------------------------------------ static inline void -setSharpnessColor(float s, float *r, float *g, float *b) -{ +setSharpnessColor(float s, float *r, float *g, float *b) { // 0.0 2.0 4.0 // green --- yellow --- red *r = std::min(1.0f, s * 0.5f); diff --git a/opensubdiv/far/patchTables.cpp b/opensubdiv/far/patchTables.cpp index 8a39e894..01c9c26d 100644 --- a/opensubdiv/far/patchTables.cpp +++ b/opensubdiv/far/patchTables.cpp @@ -144,10 +144,10 @@ getBoxSplineWeights(float v, float w, float B[12]) { } void -PatchTables::getBasisWeights(TensorBasis basis, PatchParam::BitField bits, +PatchTables::GetBasisWeights(TensorBasis basis, PatchParam::BitField bits, float s, float t, float point[16], float deriv1[16], float deriv2[16]) { - int const rots[4][16] = + static int const rots[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 12, 8, 4, 0, 13, 9, 5, 1, 14, 10, 6, 2, 15, 11, 7, 3 }, { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }, diff --git a/opensubdiv/far/patchTables.h b/opensubdiv/far/patchTables.h index b7a6e20b..cd233732 100644 --- a/opensubdiv/far/patchTables.h +++ b/opensubdiv/far/patchTables.h @@ -412,6 +412,16 @@ public: template void Limit(PatchHandle const & handle, float s, float t, T const & src, U & dst) const; + enum TensorBasis { + BASIS_BEZIER, ///< Bi-cubic bezier patch basis + BASIS_BSPLINE ///< Bi-cubic bspline patch basis + }; + + /// \brief Returns bi-cubic weights matrix for a given (s,t) location + /// on the patch + static void GetBasisWeights(TensorBasis basis, PatchParam::BitField bits, + float s, float t, float point[16], float deriv1[16], float deriv2[16]); + protected: friend class PatchTablesFactory; @@ -419,18 +429,6 @@ protected: // Factory constructor PatchTables(int maxvalence); - enum TensorBasis { - BASIS_BEZIER, - BASIS_BSPLINE - }; - - // Returns bi-cubic interpolation coefficients for a given (s,t) location - // on a b-spline patch - static void getBasisWeights(TensorBasis basis, PatchParam::BitField bits, - float s, float t, float point[16], float deriv1[16], float deriv2[16]); - -protected: - void reservePatchArrays(int numPatchArrays); void pushPatchArray(PatchDescriptor desc, @@ -752,7 +750,7 @@ PatchTables::Limit(PatchHandle const & handle, float s, float t, if (ptype>=PatchDescriptor::REGULAR and ptype<=PatchDescriptor::CORNER) { - getBasisWeights(BASIS_BSPLINE, bits, s, t, Q, Qd1, Qd2); + GetBasisWeights(BASIS_BSPLINE, bits, s, t, Q, Qd1, Qd2); ConstIndexArray cvs = GetPatchVertices(handle); @@ -781,7 +779,7 @@ PatchTables::Limit(PatchHandle const & handle, float s, float t, assert(_endcapStencilTables); - getBasisWeights(BASIS_BEZIER, bits, s, t, Q, Qd1, Qd2); + GetBasisWeights(BASIS_BEZIER, bits, s, t, Q, Qd1, Qd2); InterpolateGregoryPatch(_endcapStencilTables, handle.vertIndex, s, t, Q, Qd1, Qd2, src, dst); diff --git a/opensubdiv/osd/cpuEvalLimitController.cpp b/opensubdiv/osd/cpuEvalLimitController.cpp index 5bd6e348..ae72fade 100644 --- a/opensubdiv/osd/cpuEvalLimitController.cpp +++ b/opensubdiv/osd/cpuEvalLimitController.cpp @@ -67,68 +67,62 @@ CpuEvalLimitController::EvalLimitSample( LimitLocation const & coord, if (vertexData.in) { - float * out = outQ ? outQ + outDesc.offset : 0, - * outDu = outDQU ? outDQU + outDesc.offset : 0, - * outDv = outDQV ? outDQV + outDesc.offset : 0; - Far::PatchTables const & ptables = context->GetPatchTables(); - computeSubPatchCoords(ptables.GetPatchParam(*handle), s, t); + Far::PatchParam pparam = ptables.GetPatchParam(*handle); + pparam.bitField.Normalize(s, t); Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle); Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle); - switch( desc.GetType() ) { - case Desc::REGULAR : evalBSpline( t, s, cvs.begin(), + switch (desc.GetType()) { + case Desc::REGULAR : evalBSpline( pparam.bitField, s, t, cvs.begin(), vertexData.inDesc, vertexData.in, outDesc, - out, outDu, outDv ); + outQ, outDQU, outDQV ); break; - - case Desc::BOUNDARY : evalBoundary( t, s, cvs.begin(), + case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(), vertexData.inDesc, vertexData.in, outDesc, - out, outDu, outDv ); + outQ, outDQU, outDQV ); break; - - case Desc::CORNER : evalCorner( t, s, cvs.begin(), + case Desc::CORNER : evalCorner( pparam.bitField, s, t, cvs.begin(), vertexData.inDesc, vertexData.in, outDesc, - out, outDu, outDv ); + outQ, outDQU, outDQV ); break; - case Desc::GREGORY : evalGregory( t, s, cvs.begin(), + case Desc::GREGORY : evalGregory( pparam.bitField, t, s, cvs.begin(), &ptables.GetVertexValenceTable()[0], ptables.GetPatchQuadOffsets(*handle).begin(), ptables.GetMaxValence(), vertexData.inDesc, vertexData.in, outDesc, - out, outDu, outDv ); + outQ, outDQU, outDQV ); break; - - case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( t, s, cvs.begin(), + case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(), &ptables.GetVertexValenceTable()[0], ptables.GetPatchQuadOffsets(*handle).begin(), ptables.GetMaxValence(), vertexData.inDesc, vertexData.in, outDesc, - out, outDu, outDv ); + outQ, outDQU, outDQV ); break; case Desc::GREGORY_BASIS : { Far::StencilTables const * stencils = ptables.GetEndCapStencilTables(); assert(stencils and stencils->GetNumStencils()>0); - evalGregoryBasis( t, s, + evalGregoryBasis( pparam.bitField, s, t, *stencils, ptables.GetEndCapStencilIndex(*handle), vertexData.inDesc, vertexData.in, vertexData.outDesc, - out, outDu, outDv ); + outQ, outDQU, outDQV ); } break; default: assert(0); @@ -157,44 +151,46 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords, Far::PatchTables const & ptables = context->GetPatchTables(); + Far::PatchParam pparam = ptables.GetPatchParam(*handle); + pparam.bitField.Normalize(s, t); + Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle); Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle); if (vertexData.in) { - int offset = vertexData.outDesc.stride * index; + int offset = vertexData.outDesc.stride * index, + doffset = vertexData.outDesc.length * index; if (vertexData.out) { + // note : don't apply outDesc.offset here, it's done inside patch + // evaluation float * out = vertexData.out+offset, - * outDu = vertexData.outDu ? vertexData.outDu+offset : 0, - * outDv = vertexData.outDv ? vertexData.outDv+offset : 0; + * outDu = vertexData.outDu ? vertexData.outDu+doffset : 0, + * outDv = vertexData.outDv ? vertexData.outDv+doffset : 0; - computeSubPatchCoords(ptables.GetPatchParam(*handle), s, t); - - switch(desc.GetType()) { - case Desc::REGULAR : evalBSpline( t, s, cvs.begin(), + switch (desc.GetType()) { + case Desc::REGULAR : evalBSpline( pparam.bitField, s, t, cvs.begin(), vertexData.inDesc, vertexData.in, vertexData.outDesc, out, outDu, outDv ); break; - - case Desc::BOUNDARY : evalBoundary( t, s, cvs.begin(), + case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(), vertexData.inDesc, vertexData.in, vertexData.outDesc, out, outDu, outDv ); break; - - case Desc::CORNER : evalCorner( t, s, cvs.begin(), + case Desc::CORNER : evalCorner( pparam.bitField, s, t, cvs.begin(), vertexData.inDesc, vertexData.in, vertexData.outDesc, out, outDu, outDv ); break; - case Desc::GREGORY : evalGregory( t, s, cvs.begin(), + case Desc::GREGORY : evalGregory( pparam.bitField, t, s, cvs.begin(), &ptables.GetVertexValenceTable()[0], ptables.GetPatchQuadOffsets(*handle).begin(), ptables.GetMaxValence(), @@ -203,8 +199,7 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords, vertexData.outDesc, out, outDu, outDv ); break; - - case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( t, s, cvs.begin(), + case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(), &ptables.GetVertexValenceTable()[0], ptables.GetPatchQuadOffsets(*handle).begin(), ptables.GetMaxValence(), @@ -217,7 +212,7 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords, Far::StencilTables const * stencils = ptables.GetEndCapStencilTables(); assert(stencils and stencils->GetNumStencils()>0); - evalGregoryBasis( s, t, + evalGregoryBasis( pparam.bitField, s, t, *stencils, ptables.GetEndCapStencilIndex(*handle), vertexData.inDesc, @@ -231,6 +226,8 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords, } } + pparam.bitField.Rotate(s, t); + VaryingData const & varyingData = _currentBindState.varyingData; if (varyingData.in and varyingData.out) { @@ -282,7 +279,7 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords, // XXXX manuelk this assumes FVar data is ordered with 4 CVs / patch : // bi-cubic FVar interpolation will require proper topology // accessors in Far::PatchTables and this code will change - evalBilinear( t, s, zeroRing, + evalBilinear( s, t, zeroRing, facevaryingData.inDesc, &facevaryingData.in[handle->patchIndex*4*facevaryingData.outDesc.stride], facevaryingData.outDesc, diff --git a/opensubdiv/osd/cpuEvalLimitController.h b/opensubdiv/osd/cpuEvalLimitController.h index e3d290e4..432fe8ba 100644 --- a/opensubdiv/osd/cpuEvalLimitController.h +++ b/opensubdiv/osd/cpuEvalLimitController.h @@ -74,7 +74,9 @@ public: /// /// @param inQ input vertex data /// - /// @param oDesc data descriptor shared by all output data buffers + /// @param oDesc data descriptor for the outQ data buffer + /// -- derivative buffers do not have a descriptor and + /// cannot be offset or padded with a stride (yet ?) /// /// @param outQ output vertex data /// @@ -102,7 +104,7 @@ public: /// /// @param inQ input varying data /// - /// @param oDesc data descriptor shared by all output data buffers + /// @param oDesc data descriptor for the outQ data buffer /// /// @param outQ output varying data /// @@ -127,7 +129,7 @@ public: /// /// @param inQ input face-varying data /// - /// @param oDesc data descriptor shared by all output data buffers + /// @param oDesc data descriptor for the outQ data buffer /// /// @param outQ output face-varying data /// @@ -153,7 +155,9 @@ public: /// /// @param context the EvalLimitContext that the controller will evaluate /// - /// @param outDesc data descriptor (offset, length, stride) + /// @param outDesc data descriptor for the outQ data buffer + /// -- derivative buffers do not have a descriptor and + /// cannot be offset or padded with a stride (yet ?) /// /// @param outQ output vertex data /// diff --git a/opensubdiv/osd/cpuEvalLimitKernel.cpp b/opensubdiv/osd/cpuEvalLimitKernel.cpp index 69dc3448..ed8df6a9 100644 --- a/opensubdiv/osd/cpuEvalLimitKernel.cpp +++ b/opensubdiv/osd/cpuEvalLimitKernel.cpp @@ -23,6 +23,7 @@ // #include "../osd/cpuEvalLimitKernel.h" +#include "../far/patchTables.h" #include "../far/stencilTables.h" #include @@ -68,6 +69,30 @@ evalBilinear(float u, float v, } } +#ifdef TENSOR_PRODUCT_CUBIC_SPLINES + +// manuelk code was refactored to use the matrix formulation of cubic splines +// exposed in Far::PatchTables for consistency. I am keeping these temporarily +// for reference. + +inline void +evalCubicBezier(float u, float B[4], float BU[3]) { + float u2 = u*u, + w0 = 1.0f - u, + w2 = w0 * w0; + + B[0] = w0*w2; + B[1] = 3.0f * u * w2; + B[2] = 3.0f * u2 * w0; + B[3] = u*u2; + + if (BU) { + BU[0] = w2; + BU[1] = 2.0f * u * w0; + BU[2] = u2; + } +} + inline void evalCubicBSpline(float u, float B[4], float BU[4]) { float t = u; @@ -90,101 +115,107 @@ evalCubicBSpline(float u, float B[4], float BU[4]) { } } +inline void +univar4x4(float u, float B[4], float D[4]) { + float t = u; + float s = 1.0f - u; + + float A0 = s * s; + float A1 = 2 * s * t; + float A2 = t * t; + + B[0] = s * A0; + B[1] = t * A0 + s * A1; + B[2] = t * A1 + s * A2; + B[3] = t * A2; + + if (D) { + D[0] = - A0; + D[1] = A0 - A1; + D[2] = A1 - A2; + D[3] = A2; + } +} + +#endif void -evalBSpline(float u, float v, +evalBSpline(Far::PatchParam::BitField bits, + float s, float t, Far::Index const * vertexIndices, VertexBufferDescriptor const & inDesc, float const * inQ, VertexBufferDescriptor const & outDesc, float * outQ, - float * outDQU, - float * outDQV ) { + float * outDQ1, + float * outDQ2 ) { // make sure that we have enough space to store results assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) ); - bool evalDeriv = (outDQU or outDQV); - - float B[4], D[4], - *BU=(float*)alloca(inDesc.length*4*sizeof(float)), - *DU=(float*)alloca(inDesc.length*4*sizeof(float)); - - memset(BU, 0, inDesc.length*4*sizeof(float)); - memset(DU, 0, inDesc.length*4*sizeof(float)); - - evalCubicBSpline(u, B, evalDeriv ? D : 0); + float Q[16], dQ1[16], dQ2[16]; + Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BSPLINE, bits, s, t, + outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0); float const * inOffset = inQ + inDesc.offset; - for (int i=0; i<4; ++i) { - for (int j=0; j<4; ++j) { + outQ += outDesc.offset; - float const * in = inOffset + vertexIndices[i+j*4]*inDesc.stride; - - for (int k=0; k