From 7954fbab37a775a13fc7d04d95ab461c4de71524 Mon Sep 17 00:00:00 2001
From: manuelk <mkraemer@pixar.com>
Date: Thu, 25 Dec 2014 13:03:53 -0800
Subject: [PATCH] Fix tangents in Osd::EvalLimitController

- don't rotate (s,t) coordinates but rotate the patch instead !

- refactor osd/cpuEvalLimitKernels to share Far::PatchTables cubic spline
  interpolation functions : this replaces tensor product formulation with
  weight matrices, which does not really impact performance here, but would
  have to be replaced when implementing regular gridding functions.

- fix OsdCpuEvalLimitController to not rotate coordinates and pass the rotation bitfields

- expose Far::PatchTables spline interpolation API (protected -> public)

- fix glEvalLimit tangent buffers (remove empty padding - see below)

- change policy for tangent buffers : the output buffer descriptor is
  **NO LONGER APPLIED** to tangent output buffers. Tangent primvar data
  buffers are no longer applying the offset and stride from the descriptor
  (because it doesn't make sense to share it). If more flexiblity is
  required, we will consider adding independent descriptors for the tangent
  buffers. This change will impact existing code that generates tangents
  with the EvalLimit controller.

fixes #370
---
 examples/glEvalLimit/glEvalLimit.cpp      |  24 +-
 opensubdiv/far/patchTables.cpp            |   4 +-
 opensubdiv/far/patchTables.h              |  26 +-
 opensubdiv/osd/cpuEvalLimitController.cpp |  71 ++-
 opensubdiv/osd/cpuEvalLimitController.h   |  12 +-
 opensubdiv/osd/cpuEvalLimitKernel.cpp     | 529 ++++++++--------------
 opensubdiv/osd/cpuEvalLimitKernel.h       |  16 +-
 7 files changed, 277 insertions(+), 405 deletions(-)

diff --git a/examples/glEvalLimit/glEvalLimit.cpp b/examples/glEvalLimit/glEvalLimit.cpp
index d2fa1ac1..6facc01c 100644
--- a/examples/glEvalLimit/glEvalLimit.cpp
+++ b/examples/glEvalLimit/glEvalLimit.cpp
@@ -415,12 +415,12 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
         }
 
         delete g_dQs;
-        g_dQs = Osd::CpuGLVertexBuffer::Create(6,g_nparticles);
-        memset( g_dQs->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
+        g_dQs = Osd::CpuGLVertexBuffer::Create(3,g_nparticles);
+        memset( g_dQs->BindCpuBuffer(), 0, g_nparticles*3*sizeof(float));
 
         delete g_dQt;
-        g_dQt = Osd::CpuGLVertexBuffer::Create(6,g_nparticles);
-        memset( g_dQt->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
+        g_dQt = Osd::CpuGLVertexBuffer::Create(3,g_nparticles);
+        memset( g_dQt->BindCpuBuffer(), 0, g_nparticles*3*sizeof(float));
     }
 
     updateGeom();
@@ -439,8 +439,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
 }
 
 //------------------------------------------------------------------------------
-struct Program
-{
+struct Program {
     GLuint program;
     GLuint uniformModelViewProjectionMatrix;
     GLuint attrPosition;
@@ -449,8 +448,7 @@ struct Program
 
 //------------------------------------------------------------------------------
 static void
-checkGLErrors(std::string const & where = "")
-{
+checkGLErrors(std::string const & where = "") {
     GLuint err;
     while ((err = glGetError()) != GL_NO_ERROR) {
 
@@ -462,8 +460,7 @@ checkGLErrors(std::string const & where = "")
 
 //------------------------------------------------------------------------------
 static GLuint
-compileShader(GLenum shaderType, const char *source)
-{
+compileShader(GLenum shaderType, const char *source) {
     GLuint shader = glCreateShader(shaderType);
     glShaderSource(shader, 1, &source, NULL);
     glCompileShader(shader);
@@ -473,8 +470,8 @@ compileShader(GLenum shaderType, const char *source)
 
 //------------------------------------------------------------------------------
 static bool
-linkDefaultProgram()
-{
+linkDefaultProgram() {
+
 #if defined(GL_ARB_tessellation_shader) || defined(GL_VERSION_4_0)
     #define GLSL_VERSION_DEFINE "#version 400\n"
 #else
@@ -537,8 +534,7 @@ linkDefaultProgram()
 
 //------------------------------------------------------------------------------
 static inline void
-setSharpnessColor(float s, float *r, float *g, float *b)
-{
+setSharpnessColor(float s, float *r, float *g, float *b) {
     //  0.0       2.0       4.0
     // green --- yellow --- red
     *r = std::min(1.0f, s * 0.5f);
diff --git a/opensubdiv/far/patchTables.cpp b/opensubdiv/far/patchTables.cpp
index 8a39e894..01c9c26d 100644
--- a/opensubdiv/far/patchTables.cpp
+++ b/opensubdiv/far/patchTables.cpp
@@ -144,10 +144,10 @@ getBoxSplineWeights(float v, float w, float B[12]) {
 }
 
 void
-PatchTables::getBasisWeights(TensorBasis basis, PatchParam::BitField bits,
+PatchTables::GetBasisWeights(TensorBasis basis, PatchParam::BitField bits,
     float s, float t, float point[16], float deriv1[16], float deriv2[16]) {
 
-    int const rots[4][16] =
+    static int const rots[4][16] =
         { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
           { 12, 8, 4, 0, 13, 9, 5, 1, 14, 10, 6, 2, 15, 11, 7, 3 },
           { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 },
diff --git a/opensubdiv/far/patchTables.h b/opensubdiv/far/patchTables.h
index b7a6e20b..cd233732 100644
--- a/opensubdiv/far/patchTables.h
+++ b/opensubdiv/far/patchTables.h
@@ -412,6 +412,16 @@ public:
     template <class T, class U> void Limit(PatchHandle const & handle,
         float s, float t, T const & src, U & dst) const;
 
+    enum TensorBasis {
+        BASIS_BEZIER,    ///< Bi-cubic bezier patch basis
+        BASIS_BSPLINE    ///< Bi-cubic bspline patch basis
+    };
+
+    /// \brief Returns bi-cubic weights matrix for a given (s,t) location
+    /// on the patch
+    static void GetBasisWeights(TensorBasis basis, PatchParam::BitField bits,
+        float s, float t, float point[16], float deriv1[16], float deriv2[16]);
+
 protected:
 
     friend class PatchTablesFactory;
@@ -419,18 +429,6 @@ protected:
     // Factory constructor
     PatchTables(int maxvalence);
 
-    enum TensorBasis {
-        BASIS_BEZIER,
-        BASIS_BSPLINE
-    };
-
-    // Returns bi-cubic interpolation coefficients for a given (s,t) location
-    // on a b-spline patch
-    static void getBasisWeights(TensorBasis basis, PatchParam::BitField bits,
-        float s, float t, float point[16], float deriv1[16], float deriv2[16]);
-
-protected:
-
     void reservePatchArrays(int numPatchArrays);
 
     void pushPatchArray(PatchDescriptor desc,
@@ -752,7 +750,7 @@ PatchTables::Limit(PatchHandle const & handle, float s, float t,
 
     if (ptype>=PatchDescriptor::REGULAR and ptype<=PatchDescriptor::CORNER) {
 
-        getBasisWeights(BASIS_BSPLINE, bits, s, t, Q, Qd1, Qd2);
+        GetBasisWeights(BASIS_BSPLINE, bits, s, t, Q, Qd1, Qd2);
 
         ConstIndexArray cvs = GetPatchVertices(handle);
 
@@ -781,7 +779,7 @@ PatchTables::Limit(PatchHandle const & handle, float s, float t,
 
         assert(_endcapStencilTables);
 
-        getBasisWeights(BASIS_BEZIER, bits, s, t, Q, Qd1, Qd2);
+        GetBasisWeights(BASIS_BEZIER, bits, s, t, Q, Qd1, Qd2);
 
         InterpolateGregoryPatch(_endcapStencilTables, handle.vertIndex,
             s, t, Q, Qd1, Qd2, src, dst);
diff --git a/opensubdiv/osd/cpuEvalLimitController.cpp b/opensubdiv/osd/cpuEvalLimitController.cpp
index 5bd6e348..ae72fade 100644
--- a/opensubdiv/osd/cpuEvalLimitController.cpp
+++ b/opensubdiv/osd/cpuEvalLimitController.cpp
@@ -67,68 +67,62 @@ CpuEvalLimitController::EvalLimitSample( LimitLocation const & coord,
 
     if (vertexData.in) {
 
-        float * out   = outQ ? outQ + outDesc.offset : 0,
-              * outDu = outDQU ? outDQU + outDesc.offset : 0,
-              * outDv = outDQV ? outDQV + outDesc.offset : 0;
-
         Far::PatchTables const & ptables = context->GetPatchTables();
 
-        computeSubPatchCoords(ptables.GetPatchParam(*handle), s, t);
+        Far::PatchParam pparam = ptables.GetPatchParam(*handle);
+        pparam.bitField.Normalize(s, t);
 
         Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle);
 
         Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle);
-        switch( desc.GetType() ) {
-            case Desc::REGULAR  : evalBSpline( t, s, cvs.begin(),
+        switch (desc.GetType()) {
+            case Desc::REGULAR  : evalBSpline( pparam.bitField, s, t, cvs.begin(),
                                                vertexData.inDesc,
                                                vertexData.in,
                                                outDesc,
-                                               out, outDu, outDv );
+                                               outQ, outDQU, outDQV );
                                   break;
-
-            case Desc::BOUNDARY : evalBoundary( t, s, cvs.begin(),
+            case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(),
                                                 vertexData.inDesc,
                                                 vertexData.in,
                                                 outDesc,
-                                                out, outDu, outDv );
+                                                outQ, outDQU, outDQV );
                                   break;
-
-            case Desc::CORNER   : evalCorner( t, s, cvs.begin(),
+            case Desc::CORNER   : evalCorner( pparam.bitField, s, t, cvs.begin(),
                                               vertexData.inDesc,
                                               vertexData.in,
                                               outDesc,
-                                              out, outDu, outDv );
+                                              outQ, outDQU, outDQV );
                                   break;
-            case Desc::GREGORY  : evalGregory( t, s, cvs.begin(),
+            case Desc::GREGORY  : evalGregory( pparam.bitField, t, s, cvs.begin(),
                                                &ptables.GetVertexValenceTable()[0],
                                                ptables.GetPatchQuadOffsets(*handle).begin(),
                                                ptables.GetMaxValence(),
                                                vertexData.inDesc,
                                                vertexData.in,
                                                outDesc,
-                                               out, outDu, outDv );
+                                               outQ, outDQU, outDQV );
                                   break;
-
-            case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( t, s, cvs.begin(),
+            case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(),
                                                                &ptables.GetVertexValenceTable()[0],
                                                                ptables.GetPatchQuadOffsets(*handle).begin(),
                                                                ptables.GetMaxValence(),
                                                                vertexData.inDesc,
                                                                vertexData.in,
                                                                outDesc,
-                                                               out, outDu, outDv );
+                                                               outQ, outDQU, outDQV );
                                           break;
             case Desc::GREGORY_BASIS : {
                                            Far::StencilTables const * stencils =
                                                ptables.GetEndCapStencilTables();
                                            assert(stencils and stencils->GetNumStencils()>0);
-                                           evalGregoryBasis( t, s,
+                                           evalGregoryBasis( pparam.bitField, s, t,
                                                              *stencils,
                                                              ptables.GetEndCapStencilIndex(*handle),
                                                              vertexData.inDesc,
                                                              vertexData.in,
                                                              vertexData.outDesc,
-                                                             out, outDu, outDv );
+                                                             outQ, outDQU, outDQV );
                                        } break;
             default:
                 assert(0);
@@ -157,44 +151,46 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
 
     Far::PatchTables const & ptables = context->GetPatchTables();
 
+    Far::PatchParam pparam = ptables.GetPatchParam(*handle);
+    pparam.bitField.Normalize(s, t);
+
     Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle);
 
     Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle);
 
     if (vertexData.in) {
 
-        int offset = vertexData.outDesc.stride * index;
+        int offset = vertexData.outDesc.stride * index,
+            doffset = vertexData.outDesc.length * index;
 
         if (vertexData.out) {
 
+            // note : don't apply outDesc.offset here, it's done inside patch
+            // evaluation
             float * out   = vertexData.out+offset,
-                  * outDu = vertexData.outDu ? vertexData.outDu+offset : 0,
-                  * outDv = vertexData.outDv ? vertexData.outDv+offset : 0;
+                  * outDu = vertexData.outDu ? vertexData.outDu+doffset : 0,
+                  * outDv = vertexData.outDv ? vertexData.outDv+doffset : 0;
 
-            computeSubPatchCoords(ptables.GetPatchParam(*handle), s, t);
-
-            switch(desc.GetType()) {
-                case Desc::REGULAR  : evalBSpline( t, s, cvs.begin(),
+            switch (desc.GetType()) {
+                case Desc::REGULAR  : evalBSpline( pparam.bitField, s, t, cvs.begin(),
                                                    vertexData.inDesc,
                                                    vertexData.in,
                                                    vertexData.outDesc,
                                                    out, outDu, outDv );
                                       break;
-
-                case Desc::BOUNDARY : evalBoundary( t, s, cvs.begin(),
+                case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(),
                                                     vertexData.inDesc,
                                                     vertexData.in,
                                                     vertexData.outDesc,
                                                     out, outDu, outDv );
                                       break;
-
-                case Desc::CORNER   : evalCorner( t, s, cvs.begin(),
+                case Desc::CORNER   : evalCorner( pparam.bitField, s, t, cvs.begin(),
                                                   vertexData.inDesc,
                                                   vertexData.in,
                                                   vertexData.outDesc,
                                                   out, outDu, outDv );
                                       break;
-                case Desc::GREGORY  : evalGregory( t, s, cvs.begin(),
+                case Desc::GREGORY  : evalGregory( pparam.bitField, t, s, cvs.begin(),
                                                    &ptables.GetVertexValenceTable()[0],
                                                    ptables.GetPatchQuadOffsets(*handle).begin(),
                                                    ptables.GetMaxValence(),
@@ -203,8 +199,7 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
                                                    vertexData.outDesc,
                                                    out, outDu, outDv );
                                       break;
-
-                case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( t, s, cvs.begin(),
+                case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(),
                                                                    &ptables.GetVertexValenceTable()[0],
                                                                    ptables.GetPatchQuadOffsets(*handle).begin(),
                                                                    ptables.GetMaxValence(),
@@ -217,7 +212,7 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
                                                Far::StencilTables const * stencils =
                                                    ptables.GetEndCapStencilTables();
                                                assert(stencils and stencils->GetNumStencils()>0);
-                                               evalGregoryBasis( s, t,
+                                               evalGregoryBasis( pparam.bitField, s, t,
                                                                  *stencils,
                                                                  ptables.GetEndCapStencilIndex(*handle),
                                                                  vertexData.inDesc,
@@ -231,6 +226,8 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
         }
     }
 
+    pparam.bitField.Rotate(s, t);
+
     VaryingData const & varyingData = _currentBindState.varyingData;
 
     if (varyingData.in and varyingData.out) {
@@ -282,7 +279,7 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
             // XXXX manuelk this assumes FVar data is ordered with 4 CVs / patch :
             //              bi-cubic FVar interpolation will require proper topology
             //              accessors in Far::PatchTables and this code will change
-            evalBilinear( t, s, zeroRing,
+            evalBilinear( s, t, zeroRing,
                           facevaryingData.inDesc,
                           &facevaryingData.in[handle->patchIndex*4*facevaryingData.outDesc.stride],
                           facevaryingData.outDesc,
diff --git a/opensubdiv/osd/cpuEvalLimitController.h b/opensubdiv/osd/cpuEvalLimitController.h
index e3d290e4..432fe8ba 100644
--- a/opensubdiv/osd/cpuEvalLimitController.h
+++ b/opensubdiv/osd/cpuEvalLimitController.h
@@ -74,7 +74,9 @@ public:
     ///
     /// @param inQ     input vertex data
     ///
-    /// @param oDesc   data descriptor shared by all output data buffers
+    /// @param oDesc   data descriptor for the outQ data buffer
+    ///                -- derivative buffers do not have a descriptor and
+    ///                cannot be offset or padded with a stride (yet ?)
     ///
     /// @param outQ    output vertex data
     ///
@@ -102,7 +104,7 @@ public:
     ///
     /// @param inQ    input varying data
     ///
-    /// @param oDesc  data descriptor shared by all output data buffers
+    /// @param oDesc  data descriptor for the outQ data buffer
     ///
     /// @param outQ   output varying data
     ///
@@ -127,7 +129,7 @@ public:
     ///
     /// @param inQ    input face-varying data
     ///
-    /// @param oDesc  data descriptor shared by all output data buffers
+    /// @param oDesc  data descriptor for the outQ data buffer
     ///
     /// @param outQ   output face-varying data
     ///
@@ -153,7 +155,9 @@ public:
     ///
     /// @param context  the EvalLimitContext that the controller will evaluate
     ///
-    /// @param outDesc  data descriptor (offset, length, stride)
+    /// @param outDesc  data descriptor for the outQ data buffer
+    ///                 -- derivative buffers do not have a descriptor and
+    ///                 cannot be offset or padded with a stride (yet ?)
     ///
     /// @param outQ    output vertex data
     ///
diff --git a/opensubdiv/osd/cpuEvalLimitKernel.cpp b/opensubdiv/osd/cpuEvalLimitKernel.cpp
index 69dc3448..ed8df6a9 100644
--- a/opensubdiv/osd/cpuEvalLimitKernel.cpp
+++ b/opensubdiv/osd/cpuEvalLimitKernel.cpp
@@ -23,6 +23,7 @@
 //
 
 #include "../osd/cpuEvalLimitKernel.h"
+#include "../far/patchTables.h"
 #include "../far/stencilTables.h"
 
 #include <math.h>
@@ -68,6 +69,30 @@ evalBilinear(float u, float v,
     }
 }
 
+#ifdef TENSOR_PRODUCT_CUBIC_SPLINES
+
+// manuelk code was refactored to use the matrix formulation of cubic splines
+// exposed in Far::PatchTables for consistency. I am keeping these temporarily
+// for reference.
+
+inline void
+evalCubicBezier(float u, float B[4], float BU[3]) {
+    float u2 = u*u,
+          w0 = 1.0f - u,
+          w2 = w0 * w0;
+
+    B[0] = w0*w2;
+    B[1] = 3.0f * u * w2;
+    B[2] = 3.0f * u2 * w0;
+    B[3] = u*u2;
+
+    if (BU) {
+        BU[0] = w2;
+        BU[1] = 2.0f * u * w0;
+        BU[2] = u2;
+    }
+}
+
 inline void
 evalCubicBSpline(float u, float B[4], float BU[4]) {
     float t = u;
@@ -90,101 +115,107 @@ evalCubicBSpline(float u, float B[4], float BU[4]) {
     }
 }
 
+inline void
+univar4x4(float u, float B[4], float D[4]) {
 
+    float t = u;
+    float s = 1.0f - u;
+
+    float A0 = s * s;
+    float A1 = 2 * s * t;
+    float A2 = t * t;
+
+    B[0] = s * A0;
+    B[1] = t * A0 + s * A1;
+    B[2] = t * A1 + s * A2;
+    B[3] = t * A2;
+
+    if (D) {
+        D[0] =    - A0;
+        D[1] = A0 - A1;
+        D[2] = A1 - A2;
+        D[3] = A2;
+    }
+}
+
+#endif
 
 void
-evalBSpline(float u, float v,
+evalBSpline(Far::PatchParam::BitField bits,
+            float s, float t,
             Far::Index const * vertexIndices,
             VertexBufferDescriptor const & inDesc,
             float const * inQ,
             VertexBufferDescriptor const & outDesc,
             float * outQ,
-            float * outDQU,
-            float * outDQV ) {
+            float * outDQ1,
+            float * outDQ2 ) {
 
     // make sure that we have enough space to store results
     assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) );
 
-    bool evalDeriv = (outDQU or outDQV);
-
-    float B[4], D[4],
-          *BU=(float*)alloca(inDesc.length*4*sizeof(float)),
-          *DU=(float*)alloca(inDesc.length*4*sizeof(float));
-
-    memset(BU, 0, inDesc.length*4*sizeof(float));
-    memset(DU, 0, inDesc.length*4*sizeof(float));
-
-    evalCubicBSpline(u, B, evalDeriv ? D : 0);
+    float Q[16], dQ1[16], dQ2[16];
+    Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BSPLINE, bits, s, t,
+        outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0);
 
     float const * inOffset = inQ + inDesc.offset;
 
-    for (int i=0; i<4; ++i) {
-        for (int j=0; j<4; ++j) {
+    outQ += outDesc.offset;
 
-            float const * in = inOffset + vertexIndices[i+j*4]*inDesc.stride;
-
-            for (int k=0; k<inDesc.length; ++k) {
-
-                BU[i*inDesc.length+k] += in[k] * B[j];
-
-                if (evalDeriv)
-                    DU[i*inDesc.length+k] += in[k] * D[j];
-            }
-        }
+    memset(outQ, 0, inDesc.length*sizeof(float));
+    if (outDQ1) {
+        memset(outDQ1, 0, inDesc.length*sizeof(float));
+    }
+    if (outDQ2) {
+        memset(outDQ2, 0, inDesc.length*sizeof(float));
     }
 
-    evalCubicBSpline(v, B, evalDeriv ? D : 0);
 
-    float * Q = outQ + outDesc.offset,
-          * dQU = outDQU + outDesc.offset,
-          * dQV = outDQV + outDesc.offset;
+    for (int i=0; i<16; ++i) {
 
-    // clear result
-    memset(Q, 0, inDesc.length*sizeof(float));
-    if (evalDeriv) {
-        memset(dQU, 0, inDesc.length*sizeof(float));
-        memset(dQV, 0, inDesc.length*sizeof(float));
-    }
+        float const * in = inOffset + vertexIndices[i]*inDesc.stride;
 
-    for (int i=0; i<4; ++i) {
         for (int k=0; k<inDesc.length; ++k) {
-            Q[k] += BU[inDesc.length*i+k] * B[i];
-
-            if (evalDeriv) {
-                dQU[k] += DU[inDesc.length*i+k] * B[i];
-                dQV[k] += BU[inDesc.length*i+k] * D[i];
+            outQ[k] += Q[i] * in[k];
+            if (outDQ1) {
+                outDQ1[k] += dQ1[i] * in[k];
+            }
+            if (outDQ2) {
+                outDQ2[k] += dQ2[i] * in[k];
             }
         }
     }
 }
 
-
-
 void
-evalBoundary(float u, float v,
+evalBoundary(Far::PatchParam::BitField bits,
+             float s, float t,
              Far::Index const * vertexIndices,
              VertexBufferDescriptor const & inDesc,
              float const * inQ,
              VertexBufferDescriptor const & outDesc,
              float * outQ,
-             float * outDQU,
-             float * outDQV ) {
+             float * outDQ1,
+             float * outDQ2 ) {
 
+    // make sure that we have enough space to store results
     assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) );
 
-    bool evalDeriv = (outDQU or outDQV);
-
-    float B[4], D[4],
-          *BU=(float*)alloca(inDesc.length*4*sizeof(float)),
-          *DU=(float*)alloca(inDesc.length*4*sizeof(float));
-
-    memset(BU, 0, inDesc.length*4*sizeof(float));
-    memset(DU, 0, inDesc.length*4*sizeof(float));
-
-    evalCubicBSpline(u, B, evalDeriv ? D : 0);
+    float Q[16], dQ1[16], dQ2[16];
+    Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BSPLINE, bits, s, t,
+        outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0);
 
     float const * inOffset = inQ + inDesc.offset;
 
+    outQ += outDesc.offset;
+
+    memset(outQ, 0, inDesc.length*sizeof(float));
+    if (outDQ1) {
+        memset(outDQ1, 0, inDesc.length*sizeof(float));
+    }
+    if (outDQ2) {
+        memset(outDQ2, 0, inDesc.length*sizeof(float));
+    }
 
     // mirror the missing vertices (M)
     //
@@ -217,77 +248,52 @@ evalBoundary(float u, float v,
         M[3*inDesc.length+k] = 2.0f*v3[k] - v7[k];  // M4 = 2*v2 - v1
     }
 
-    for (int i=0; i<4; ++i) {
-        for (int j=0; j<4; ++j) {
+    for (int i=0; i<16; ++i) {
 
-            // swap the missing row of verts with our mirrored ones
-            float const * in = j==0 ? &M[i*inDesc.length] :
-                inOffset + vertexIndices[i+(j-1)*4]*inDesc.stride;
+        float const * in = i < 4 ?
+            M + i*inDesc.length : inOffset + vertexIndices[i-4]*inDesc.stride;
 
-            for (int k=0; k<inDesc.length; ++k) {
-
-                BU[i*inDesc.length+k] += in[k] * B[j];
-
-                if (evalDeriv)
-                    DU[i*inDesc.length+k] += in[k] * D[j];
-            }
-        }
-    }
-
-    evalCubicBSpline(v, B, evalDeriv ? D : 0);
-
-    float * Q = outQ + outDesc.offset,
-          * dQU = outDQU + outDesc.offset,
-          * dQV = outDQV + outDesc.offset;
-
-    // clear result
-    memset(Q, 0, inDesc.length*sizeof(float));
-    if (evalDeriv) {
-        memset(dQU, 0, inDesc.length*sizeof(float));
-        memset(dQV, 0, inDesc.length*sizeof(float));
-    }
-
-    for (int i=0; i<4; ++i) {
         for (int k=0; k<inDesc.length; ++k) {
-            Q[k] += BU[inDesc.length*i+k] * B[i];
-
-            if (evalDeriv) {
-                dQU[k] += DU[inDesc.length*i+k] * B[i];
-                dQV[k] += BU[inDesc.length*i+k] * D[i];
+            outQ[k] += Q[i] * in[k];
+            if (outDQ1) {
+                outDQ1[k] += dQ1[i] * in[k];
+            }
+            if (outDQ2) {
+                outDQ2[k] += dQ2[i] * in[k];
             }
         }
     }
 }
 
-
-
 void
-evalCorner(float u, float v,
+evalCorner(Far::PatchParam::BitField bits,
+           float s, float t,
            Far::Index const * vertexIndices,
            VertexBufferDescriptor const & inDesc,
            float const * inQ,
            VertexBufferDescriptor const & outDesc,
            float * outQ,
-           float * outDQU,
-           float * outDQV ) {
+           float * outDQ1,
+           float * outDQ2 ) {
 
+    // make sure that we have enough space to store results
     assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) );
 
-    int length = inDesc.length;
+    float Q[16], dQ1[16], dQ2[16];
+    Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BSPLINE, bits, s, t,
+        outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0);
 
-    bool evalDeriv = (outDQU or outDQV);
+    float const * inOffset = inQ + inDesc.offset;
 
-    float B[4], D[4],
-          *BU=(float*)alloca(length*4*sizeof(float)),
-          *DU=(float*)alloca(length*4*sizeof(float));
+    outQ += outDesc.offset;
 
-    memset(BU, 0, length*4*sizeof(float));
-    memset(DU, 0, length*4*sizeof(float));
-
-
-    evalCubicBSpline(u, B, evalDeriv ? D : 0);
-
-    float const *inOffset = inQ + inDesc.offset;
+    memset(outQ, 0, inDesc.length*sizeof(float));
+    if (outDQ1) {
+        memset(outDQ1, 0, inDesc.length*sizeof(float));
+    }
+    if (outDQ2) {
+        memset(outDQ2, 0, inDesc.length*sizeof(float));
+    }
 
     // mirror the missing vertices (M)
     //
@@ -302,7 +308,7 @@ evalCorner(float u, float v,
     //   |.....|.....|     |
     //  v6 -- v7 -- v8 -- M6
 
-    float *M = (float*)alloca(length*7*sizeof(float));
+    float *M = (float*)alloca(inDesc.length*7*sizeof(float));
 
     float const *v0 = inOffset + vertexIndices[0]*inDesc.stride,
                 *v1 = inOffset + vertexIndices[1]*inDesc.stride,
@@ -314,88 +320,47 @@ evalCorner(float u, float v,
                 *v8 = inOffset + vertexIndices[8]*inDesc.stride;
 
     for (int k=0; k<inDesc.length; ++k) {
-        M[0*length+k] = 2.0f*v0[k] - v3[k];  // M0 = 2*v0 - v3
-        M[1*length+k] = 2.0f*v1[k] - v4[k];  // M0 = 2*v1 - v4
-        M[2*length+k] = 2.0f*v2[k] - v5[k];  // M1 = 2*v2 - v5
+        M[0*inDesc.length+k] = 2.0f*v0[k] - v3[k];  // M0 = 2*v0 - v3
+        M[1*inDesc.length+k] = 2.0f*v1[k] - v4[k];  // M0 = 2*v1 - v4
+        M[2*inDesc.length+k] = 2.0f*v2[k] - v5[k];  // M1 = 2*v2 - v5
 
-        M[4*length+k] = 2.0f*v2[k] - v1[k];  // M4 = 2*v2 - v1
-        M[5*length+k] = 2.0f*v5[k] - v4[k];  // M5 = 2*v5 - v4
-        M[6*length+k] = 2.0f*v8[k] - v7[k];  // M6 = 2*v8 - v7
+        M[4*inDesc.length+k] = 2.0f*v2[k] - v1[k];  // M4 = 2*v2 - v1
+        M[5*inDesc.length+k] = 2.0f*v5[k] - v4[k];  // M5 = 2*v5 - v4
+        M[6*inDesc.length+k] = 2.0f*v8[k] - v7[k];  // M6 = 2*v8 - v7
 
         // M3 = 2*M2 - M1
-        M[3*length+k] = 2.0f*M[2*length+k] - M[1*length+k];
+        M[3*inDesc.length+k] = 2.0f*M[2*inDesc.length+k] - M[1*inDesc.length+k];
     }
 
     for (int i=0; i<4; ++i) {
         for (int j=0; j<4; ++j) {
 
-            float const * in = NULL;
-
+            float const * in = 0;
             if (j==0) { // (2)
-                in = &M[i*inDesc.length];
+                in = M + i*inDesc.length;
             } else if (i==3) {
-                in = &M[(j+3)*inDesc.length];
+                in = M + (j+3)*inDesc.length;
             } else {
                 in = inOffset + vertexIndices[i+(j-1)*3]*inDesc.stride;
             }
-
             assert(in);
 
-            for (int k=0; k<length; ++k) {
-
-                BU[i*length+k] += in[k] * B[j];
-
-                if (evalDeriv)
-                    DU[i*length+k] += in[k] * D[j];
+            int idx = j*4+i;
+            for (int k=0; k<inDesc.length; ++k) {
+                outQ[k] += Q[idx] * in[k];
+                if (outDQ1) {
+                    outDQ1[k] += dQ1[idx] * in[k];
+                }
+                if (outDQ2) {
+                    outDQ2[k] += dQ2[idx] * in[k];
+                }
             }
         }
     }
-
-    evalCubicBSpline(v, B, evalDeriv ? D : 0);
-
-    float * Q = outQ + outDesc.offset,
-          * dQU = outDQU + outDesc.offset,
-          * dQV = outDQV + outDesc.offset;
-
-    // clear result
-    memset(Q, 0, length*sizeof(float));
-    if (evalDeriv) {
-        memset(dQU, 0, length*sizeof(float));
-        memset(dQV, 0, length*sizeof(float));
-    }
-
-    for (int i=0; i<4; ++i) {
-        for (int k=0; k<length; ++k) {
-            Q[k] += BU[length*i+k] * B[i];
-
-            if (evalDeriv) {
-                dQU[k] += DU[length*i+k] * B[i];
-                dQV[k] += BU[length*i+k] * D[i];
-            }
-        }
-    }
-}
-
-inline void
-evalCubicBezier(float u, float B[4], float BU[3]) {
-    float u2 = u*u,
-          w0 = 1.0f - u,
-          w2 = w0 * w0;
-
-    B[0] = w0*w2;
-    B[1] = 3.0f * u * w2;
-    B[2] = 3.0f * u2 * w0;
-    B[3] = u*u2;
-
-    if (BU) {
-        BU[0] = w2;
-        BU[1] = 2.0f * u * w0;
-        BU[2] = u2;
-    }
 }
 
 void
-evalGregoryBasis(float u, float v,
+evalGregoryBasis(Far::PatchParam::BitField bits, float u, float v,
                  Far::StencilTables const & basisStencils,
                  int stencilIndex,
                  VertexBufferDescriptor const & inDesc,
@@ -409,54 +374,21 @@ evalGregoryBasis(float u, float v,
 
     int length = inDesc.length;
 
-    bool evalDeriv = (outDQU or outDQV);
-
-    float S[4], T[4], DS[3], DT[3];
-    evalCubicBezier(u, S, evalDeriv ? DS : 0);
-    evalCubicBezier(v, T, evalDeriv ? DT : 0);
-
     float BU[16], DU[16], DV[16];
-    memset(BU, 0, 16*sizeof(float));
-    for (int i=0; i<4; ++i) {
-        for (int j=0; j<4; ++j) {
-            BU[4*i+j] += S[j] * T[i];
-        }
-    }
-
-    if (evalDeriv) {
-        memset(DU, 0, 16*sizeof(float));
-        for (int i=0; i<4; ++i) {
-            float pw = 0.0f;
-            for (int j=0; j<3; ++j) {
-                float w = DS[j] * T[i];
-                DU[4*i+j] += pw - w;
-                pw = w;
-            }
-            DU[4*i+3]+=pw;
-        }
-        memset(DV, 0, 16*sizeof(float));
-        for (int j=0; j<4; ++j) {
-            float pw = 0.0f;
-            for (int i=0; i<3; ++i) {
-                float w = S[j] * DT[i];
-                DV[4*i+j] += pw - w;
-                pw = w;
-            }
-            DV[12+j]+=pw;
-        }
-    }
+    Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BEZIER, bits, u, v,
+        outQ ? BU : 0, outDQU ? DU : 0, outDQV ? DV : 0);
 
     float const *inOffset = inQ + inDesc.offset;
 
-    float * Q = outQ + outDesc.offset,
-          * dQU = outDQU + outDesc.offset,
-          * dQV = outDQV + outDesc.offset;
+    float * Q = outQ + outDesc.offset;
 
     // clear result
     memset(Q, 0, length*sizeof(float));
-    if (evalDeriv) {
-        memset(dQU, 0, length*sizeof(float));
-        memset(dQV, 0, length*sizeof(float));
+    if (outDQU) {
+        memset(outDQU, 0, length*sizeof(float));
+    }
+    if (outDQV) {
+        memset(outDQV, 0, length*sizeof(float));
     }
 
     float uu = 1-u,
@@ -531,12 +463,15 @@ evalGregoryBasis(float u, float v,
                     float const * in = inOffset + srcIndices[j]*inDesc.stride;
                     float w = BU[i] * w0 * srcWeights[j],
                           dw1 = DU[i] * w0 * srcWeights[j],
-                          dw2 = DV[i] * w0 * srcWeights[j];
+                          dw2 = DV[i] * w0 * srcWeights[
+j];
                     for (int k=0; k<length; ++k) {
                         Q[k] += in[k] * w;
-                        if (evalDeriv) {
-                            dQU[k] += in[k] * dw1;
-                            dQV[k] += in[k] * dw2;
+                        if (outDQU) {
+                            outDQU[k] += in[k] * dw1;
+                        }
+                        if (outDQV) {
+                            outDQV[k] += in[k] * dw2;
                         }
                     }
                 }
@@ -551,9 +486,11 @@ evalGregoryBasis(float u, float v,
                           dw2 = DV[i] * w1 * srcWeights[j];
                     for (int k=0; k<length; ++k) {
                         Q[k] += in[k] * w;
-                        if (evalDeriv) {
-                            dQU[k] += in[k] * dw1;
-                            dQV[k] += in[k] * dw2;
+                        if (outDQU) {
+                            outDQU[k] += in[k] * dw1;
+                        }
+                        if (outDQV) {
+                            outDQV[k] += in[k] * dw2;
                         }
                     }
                 }
@@ -570,9 +507,11 @@ evalGregoryBasis(float u, float v,
                       dw2 = DV[i] * srcWeights[j];
                 for (int k=0; k<length; ++k) {
                     Q[k] += in[k] * w;
-                    if (evalDeriv) {
-                        dQU[k] += in[k] * dw1;
-                        dQV[k] += in[k] * dw2;
+                    if (outDQU) {
+                        outDQU[k] += in[k] * dw1;
+                    }
+                    if (outDQV) {
+                        outDQV[k] += in[k] * dw2;
                     }
                 }
             }
@@ -580,7 +519,6 @@ evalGregoryBasis(float u, float v,
     }
 }
 
-
 /*
 static float ef[7] = {
     0.813008f, 0.500000f, 0.363636f, 0.287505f,
@@ -597,29 +535,6 @@ static float ef[27] = {
     0.0569311f, 0.0548745f, 0.0529621f
 };
 
-inline void
-univar4x4(float u, float B[4], float D[4]) {
-
-    float t = u;
-    float s = 1.0f - u;
-
-    float A0 = s * s;
-    float A1 = 2 * s * t;
-    float A2 = t * t;
-
-    B[0] = s * A0;
-    B[1] = t * A0 + s * A1;
-    B[2] = t * A1 + s * A2;
-    B[3] = t * A2;
-
-    if (D) {
-        D[0] =    - A0;
-        D[1] = A0 - A1;
-        D[2] = A1 - A2;
-        D[3] = A2;
-    }
-}
-
 inline float
 csf(Far::Index n, Far::Index j) {
     if (j%2 == 0) {
@@ -631,7 +546,7 @@ csf(Far::Index n, Far::Index j) {
 
 
 void
-evalGregory(float u, float v,
+evalGregory(Far::PatchParam::BitField bits, float u, float v,
             Far::Index const * vertexIndices,
             Far::Index const * vertexValenceBuffer,
             unsigned int const * quadOffsetBuffer,
@@ -640,16 +555,12 @@ evalGregory(float u, float v,
             float const * inQ,
             VertexBufferDescriptor const & outDesc,
             float * outQ,
-            float * outDQU,
-            float * outDQV ) {
-
-    // vertex
+            float * outDQ1,
+            float * outDQ2 ) {
 
     // make sure that we have enough space to store results
     assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) );
 
-    bool evalDeriv = (outDQU or outDQV);
-
     int valences[4], length=inDesc.length;
 
     float const * inOffset = inQ + inDesc.offset;
@@ -725,8 +636,6 @@ evalGregory(float u, float v,
         }
     }
 
-    // tess control
-
     // Control Vertices based on :
     // "Approximating Subdivision Surfaces with Gregory Patches for Hardware Tessellation"
     // Loop, Schaefer, Ni, Castafio (ACM ToG Siggraph Asia 2009)
@@ -839,49 +748,32 @@ evalGregory(float u, float v,
     memcpy(q+14*length, p[11], length*sizeof(float));
     memcpy(q+15*length, p[10], length*sizeof(float));
 
-    float B[4], D[4],
-          *BU=(float*)alloca(inDesc.length*4*sizeof(float)),
-          *DU=(float*)alloca(inDesc.length*4*sizeof(float));
-    memset(BU, 0, inDesc.length*4*sizeof(float));
-    memset(DU, 0, inDesc.length*4*sizeof(float));
+    float Q[16], dQ1[16], dQ2[16];
+    Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BEZIER, bits, u, v,
+        outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0);
 
-    univar4x4(u, B, evalDeriv ? D : 0);
+    outQ += outDesc.offset;
 
-    for (int i=0; i<4; ++i) {
-        for (int j=0; j<4; ++j) {
-
-            float const * in = q + (i+j*4)*length;
-
-            for (int k=0; k<inDesc.length; ++k) {
-
-                BU[i*inDesc.length+k] += in[k] * B[j];
-
-                if (evalDeriv)
-                    DU[i*inDesc.length+k] += in[k] * D[j];
-            }
-        }
+    memset(outQ, 0, inDesc.length*sizeof(float));
+    if (outDQ1) {
+        memset(outDQ1, 0, inDesc.length*sizeof(float));
+    }
+    if (outDQ2) {
+        memset(outDQ2, 0, inDesc.length*sizeof(float));
     }
 
-    univar4x4(v, B, evalDeriv ? D : 0);
 
-    float * Q = outQ + outDesc.offset;
-    float * dQU = outDQU + outDesc.offset;
-    float * dQV = outDQV + outDesc.offset;
+    for (int i=0; i<16; ++i) {
 
-    // clear result
-    memset(Q, 0, outDesc.length*sizeof(float));
-    if (evalDeriv) {
-        memset(dQU, 0, outDesc.length*sizeof(float));
-        memset(dQV, 0, outDesc.length*sizeof(float));
-    }
+        float const * in = q + i*length;
 
-    for (int i=0; i<4; ++i) {
         for (int k=0; k<inDesc.length; ++k) {
-            Q[k] += BU[inDesc.length*i+k] * B[i];
-
-            if (evalDeriv) {
-                dQU[k] += DU[inDesc.length*i+k] * B[i];
-                dQV[k] += BU[inDesc.length*i+k] * D[i];
+            outQ[k] += Q[i] * in[k];
+            if (outDQ1) {
+                outDQ1[k] += dQ1[i] * in[k];
+            }
+            if (outDQ2) {
+                outDQ2[k] += dQ2[i] * in[k];
             }
         }
     }
@@ -889,7 +781,7 @@ evalGregory(float u, float v,
 
 
 void
-evalGregoryBoundary(float u, float v,
+evalGregoryBoundary(Far::PatchParam::BitField bits, float u, float v,
                     Far::Index const * vertexIndices,
                     Far::Index const * vertexValenceBuffer,
                     unsigned int const * quadOffsetBuffer,
@@ -898,16 +790,14 @@ evalGregoryBoundary(float u, float v,
                     float const * inQ,
                     VertexBufferDescriptor const & outDesc,
                     float * outQ,
-                    float * outDQU,
-                    float * outDQV ) {
+                    float * outDQ1,
+                    float * outDQ2 ) {
 
     // vertex
 
     // make sure that we have enough space to store results
     assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) );
 
-    bool evalDeriv = (outDQU or outDQV);
-
     int valences[4], zerothNeighbors[4], length=inDesc.length;
 
     float const * inOffset = inQ + inDesc.offset;
@@ -1245,49 +1135,32 @@ evalGregoryBoundary(float u, float v,
     memcpy(q+14*length, p[11], length*sizeof(float));
     memcpy(q+15*length, p[10], length*sizeof(float));
 
-    float B[4], D[4],
-          *BU=(float*)alloca(inDesc.length*4*sizeof(float)),
-          *DU=(float*)alloca(inDesc.length*4*sizeof(float));
-    memset(BU, 0, inDesc.length*4*sizeof(float));
-    memset(DU, 0, inDesc.length*4*sizeof(float));
+    float Q[16], dQ1[16], dQ2[16];
+    Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BEZIER, bits, u, v,
+        outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0);
 
-    univar4x4(u, B, evalDeriv ? D : 0);
+    outQ += outDesc.offset;
 
-    for (int i=0; i<4; ++i) {
-        for (int j=0; j<4; ++j) {
-
-            float const * in = q + (i+j*4)*length;
-
-            for (int k=0; k<inDesc.length; ++k) {
-
-                BU[i*inDesc.length+k] += in[k] * B[j];
-
-                if (evalDeriv)
-                    DU[i*inDesc.length+k] += in[k] * D[j];
-            }
-        }
+    memset(outQ, 0, inDesc.length*sizeof(float));
+    if (outDQ1) {
+        memset(outDQ1, 0, inDesc.length*sizeof(float));
+    }
+    if (outDQ2) {
+        memset(outDQ2, 0, inDesc.length*sizeof(float));
     }
 
-    univar4x4(v, B, evalDeriv ? D : 0);
 
-    float * Q = outQ + outDesc.offset;
-    float * dQU = outDQU + outDesc.offset;
-    float * dQV = outDQV + outDesc.offset;
+    for (int i=0; i<16; ++i) {
 
-    // clear result
-    memset(Q, 0, outDesc.length*sizeof(float));
-    if (evalDeriv) {
-        memset(dQU, 0, outDesc.length*sizeof(float));
-        memset(dQV, 0, outDesc.length*sizeof(float));
-    }
+        float const * in = q + i*length;
 
-    for (int i=0; i<4; ++i) {
         for (int k=0; k<inDesc.length; ++k) {
-            Q[k] += BU[inDesc.length*i+k] * B[i];
-
-            if (evalDeriv) {
-                dQU[k] += DU[inDesc.length*i+k] * B[i];
-                dQV[k] += BU[inDesc.length*i+k] * D[i];
+            outQ[k] += Q[i] * in[k];
+            if (outDQ1) {
+                outDQ1[k] += dQ1[i] * in[k];
+            }
+            if (outDQ2) {
+                outDQ2[k] += dQ2[i] * in[k];
             }
         }
     }
diff --git a/opensubdiv/osd/cpuEvalLimitKernel.h b/opensubdiv/osd/cpuEvalLimitKernel.h
index f9d921da..e52cdab4 100644
--- a/opensubdiv/osd/cpuEvalLimitKernel.h
+++ b/opensubdiv/osd/cpuEvalLimitKernel.h
@@ -28,6 +28,7 @@
 #include "../version.h"
 
 #include "../osd/vertexDescriptor.h"
+#include "../far/patchParam.h"
 
 #include "../far/types.h"
 
@@ -49,7 +50,8 @@ evalBilinear(float u, float v,
              float * outQ);
 
 void
-evalBSpline(float u, float v,
+evalBSpline(Far::PatchParam::BitField bits,
+            float u, float v,
             Far::Index const * vertexIndices,
             VertexBufferDescriptor const & inDesc,
             float const * inQ,
@@ -59,7 +61,8 @@ evalBSpline(float u, float v,
             float * outDQV );
 
 void
-evalBoundary(float u, float v,
+evalBoundary(Far::PatchParam::BitField bits,
+             float u, float v,
              Far::Index const * vertexIndices,
              VertexBufferDescriptor const & inDesc,
              float const * inQ,
@@ -69,7 +72,8 @@ evalBoundary(float u, float v,
              float * outDQV );
 
 void
-evalCorner(float u, float v,
+evalCorner(Far::PatchParam::BitField bits,
+           float u, float v,
            Far::Index const * vertexIndices,
            VertexBufferDescriptor const & inDesc,
            float const * inQ,
@@ -79,7 +83,7 @@ evalCorner(float u, float v,
            float * outDQV );
 
 void
-evalGregoryBasis(float u, float v,
+evalGregoryBasis(Far::PatchParam::BitField bits, float u, float v,
                  Far::StencilTables const & basisStencils,
                  int stencilIndex,
                  VertexBufferDescriptor const & inDesc,
@@ -90,7 +94,7 @@ evalGregoryBasis(float u, float v,
                  float * outDQV );
 
 void
-evalGregory(float u, float v,
+evalGregory(Far::PatchParam::BitField bits, float u, float v,
             Far::Index const * vertexIndices,
             Far::Index const * vertexValenceBuffer,
             unsigned int const * quadOffsetBuffer,
@@ -103,7 +107,7 @@ evalGregory(float u, float v,
             float * outDQV );
 
 void
-evalGregoryBoundary(float u, float v,
+evalGregoryBoundary(Far::PatchParam::BitField bits, float u, float v,
                     Far::Index const * vertexIndices,
                     Far::Index const * vertexValenceBuffer,
                     unsigned int const * quadOffsetBuffer,