Merge pull request #513 from takahito-tejima/refactor2

implements Osd::CpuEvaluator::EvalPatches function.
2024-09-19 22:30:05 +00:00 · 2015-05-21 18:55:09 -07:00 · 2015-05-21 18:55:09 -07:00 · 2b33de38f8
commit 2b33de38f8
parent 71055dbdd9 2e26f932e1
3 changed files with 499 additions and 149 deletions
--- a/examples/glEvalLimit/glEvalLimit.cpp
+++ b/examples/glEvalLimit/glEvalLimit.cpp
@ -60,6 +60,7 @@ GLFWmonitor* g_primary=0;
 #include "../common/stopwatch.h"
 #include "../common/simple_math.h"
 #include "../common/glHud.h"
+#include "../common/glUtils.h"

 #include "init_shapes.h"
 #include "particles.h"
@ -71,10 +72,6 @@ GLFWmonitor* g_primary=0;
 #include <sstream>
 #include <stdlib.h>

-#ifdef OPENSUBDIV_HAS_OPENMP
-    #include <omp.h>
-#endif
-
 using namespace OpenSubdiv;

 //------------------------------------------------------------------------------
@ -91,9 +88,11 @@ std::vector<float> g_coarseEdgeSharpness;
 std::vector<float> g_coarseVertexSharpness;

 enum DrawMode { kRANDOM=0,
-                kUV=1,
-                kVARYING=2,
-                kFACEVARYING=3 };
+                kUV,
+                kVARYING,
+                kNORMAL,
+                kSHADE,
+                kFACEVARYING };

 int   g_running = 1,
      g_width = 1024,
@ -146,6 +145,18 @@ GLuint g_cageEdgeVAO = 0,

 GLhud g_hud;

+//------------------------------------------------------------------------------
+struct Program {
+    GLuint program;
+    GLuint uniformModelViewMatrix;
+    GLuint uniformProjectionMatrix;
+    GLuint uniformDrawMode;
+    GLuint attrPosition;
+    GLuint attrColor;
+    GLuint attrTangentU;
+    GLuint attrTangentV;
+} g_defaultProgram;
+
 //------------------------------------------------------------------------------
 static void
 createRandomColors(int nverts, int stride, float * colors) {
@ -193,26 +204,28 @@ createCoarseMesh(OpenSubdiv::Far::TopologyRefiner const & refiner) {
 //------------------------------------------------------------------------------
 Far::TopologyRefiner * g_topologyRefiner = 0;

-Osd::CpuVertexBuffer * g_vertexData = 0,
-                   * g_varyingData = 0;
-
 Far::StencilTables const * g_vertexStencils = NULL;
 Far::StencilTables const * g_varyingStencils = NULL;

 Far::PatchTables const * g_patchTables = NULL;
 Far::PatchMap const * g_patchMap = NULL;
-Osd::PatchCoordArray g_patchCoords;
+std::vector<Osd::PatchCoord> g_patchCoords;

-Osd::VertexBufferDescriptor g_idesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 3 ),
-                          g_odesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6 ),
-                          g_vdesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 6 ),
-                          g_fvidesc( /*offset*/ 0, /*legnth*/ 2, /*stride*/ 2 ),
-                          g_fvodesc( /*offset*/ 3, /*legnth*/ 2, /*stride*/ 6 );
+Osd::VertexBufferDescriptor g_idesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 3),
+                            g_odesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
+                            g_vdesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
+                            g_duDesc(/*offset*/ 0, /*legnth*/ 3, /*stride*/ 6),
+                            g_dvDesc(/*offset*/ 3, /*legnth*/ 3, /*stride*/ 6),
+                            g_fvidesc(/*offset*/ 0, /*legnth*/ 2, /*stride*/ 2),
+                            g_fvodesc(/*offset*/ 3, /*legnth*/ 2, /*stride*/ 6);

+// input vertex data (coarse + refined)
+Osd::CpuVertexBuffer * g_vertexData = 0;
+Osd::CpuVertexBuffer * g_varyingData = 0;

-Osd::CpuGLVertexBuffer * g_Q=0,
-                     * g_dQs=0,
-                     * g_dQt=0;
+// output vertex data (limit locations)
+Osd::CpuGLVertexBuffer * g_outVertexData = NULL;
+Osd::CpuGLVertexBuffer * g_outDerivatives = NULL;

 STParticles * g_particles=0;

@ -289,20 +302,44 @@ updateGeom() {
    }

    // Evaluate the positions of the samples on the limit surface
-    g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(g_vertexData, g_idesc,
-                                                     g_Q,          g_odesc,
-                                                     g_patchCoords,
-                                                     g_patchTables, NULL);
-
-    // varying
-    if (g_drawMode == kVARYING) {
-        Osd::CpuEvaluator::EvalPatches(g_varyingData, g_idesc,
-                                       g_Q,           g_vdesc,
-                                       g_patchCoords,
-                                       g_patchTables, NULL);
+    if (g_drawMode == kNORMAL || g_drawMode == kSHADE) {
+        // evaluate positions and derivatives
+        g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(
+            g_vertexData,      g_idesc,
+            g_outVertexData,   g_odesc,
+            g_outDerivatives,  g_duDesc,
+            g_outDerivatives,  g_dvDesc,
+            (int)g_patchCoords.size(),
+            &g_patchCoords[0],
+            g_patchTables, NULL);
+    } else {
+        // evaluate positions
+        g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(
+            g_vertexData,     g_idesc,
+            g_outVertexData,  g_odesc,
+            (int)g_patchCoords.size(),
+            &g_patchCoords[0],
+            g_patchTables, NULL);
    }

-    g_Q->BindVBO();
+    // color
+    if (g_drawMode == kUV) {
+        // store patchCoords as colors
+        float *p = g_outVertexData->BindCpuBuffer() + g_vdesc.offset;
+        for (int i = 0; i < (int)g_patchCoords.size(); ++i) {
+            p[0] = g_patchCoords[i].s;
+            p[1] = g_patchCoords[i].t;
+            p[2] = 0;
+            p += g_vdesc.stride;
+        }
+    } else if (g_drawMode == kVARYING) {
+        // XXX: is this really varying?
+        Osd::CpuEvaluator::EvalPatches(g_varyingData,   g_idesc,
+                                       g_outVertexData, g_vdesc,
+                                       (int)g_patchCoords.size(),
+                                       &g_patchCoords[0],
+                                       g_patchTables, NULL);
+    }

    s.Stop();

@ -422,45 +459,22 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
        }

        // Create output buffers for the limit samples (position & tangents)
-        delete g_Q;
-        g_Q = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
-        memset( g_Q->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
+        delete g_outVertexData;
+        g_outVertexData = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
+        memset(g_outVertexData->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
        if (g_drawMode==kRANDOM) {
-            createRandomColors(g_nparticles, 6, g_Q->BindCpuBuffer()+3);
+            createRandomColors(g_nparticles, 6, g_outVertexData->BindCpuBuffer()+3);
        }

-        delete g_dQs;
-        g_dQs = Osd::CpuGLVertexBuffer::Create(3,g_nparticles);
-        memset( g_dQs->BindCpuBuffer(), 0, g_nparticles*3*sizeof(float));
-
-        delete g_dQt;
-        g_dQt = Osd::CpuGLVertexBuffer::Create(3,g_nparticles);
-        memset( g_dQt->BindCpuBuffer(), 0, g_nparticles*3*sizeof(float));
+        delete g_outDerivatives;
+        g_outDerivatives = Osd::CpuGLVertexBuffer::Create(6, g_nparticles);
+        memset(g_outDerivatives->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
    }

    updateGeom();

-    // Bind g_Q as a GL_POINTS VBO
-    glBindVertexArray(g_samplesVAO);
-
-    glBindBuffer(GL_ARRAY_BUFFER, g_Q->BindVBO());
-
-    glEnableVertexAttribArray(0);
-    glEnableVertexAttribArray(1);
-    glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
-    glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
-
-    glBindVertexArray(0);
 }

-//------------------------------------------------------------------------------
-struct Program {
-    GLuint program;
-    GLuint uniformModelViewProjectionMatrix;
-    GLuint attrPosition;
-    GLuint attrColor;
-} g_defaultProgram;
-
 //------------------------------------------------------------------------------
 static void
 checkGLErrors(std::string const & where = "") {
@ -473,16 +487,6 @@ checkGLErrors(std::string const & where = "") {
    }
 }

-//------------------------------------------------------------------------------
-static GLuint
-compileShader(GLenum shaderType, const char *source) {
-    GLuint shader = glCreateShader(shaderType);
-    glShaderSource(shader, 1, &source, NULL);
-    glCompileShader(shader);
-    checkGLErrors("compileShader");
-    return shader;
-}
-
 //------------------------------------------------------------------------------
 static bool
 linkDefaultProgram() {
@ -497,31 +501,47 @@ linkDefaultProgram() {
        GLSL_VERSION_DEFINE
        "in vec3 position;\n"
        "in vec3 color;\n"
+        "in vec3 tangentU;\n"
+        "in vec3 tangentV;\n"
        "out vec4 fragColor;\n"
-        "uniform mat4 ModelViewProjectionMatrix;\n"
+        "out vec3 normal;\n"
+        "uniform mat4 ModelViewMatrix;\n"
+        "uniform mat4 ProjectionMatrix;\n"
        "void main() {\n"
        "  fragColor = vec4(color, 1);\n"
-        "  gl_Position = ModelViewProjectionMatrix * "
+        // XXX: fix the normal transform
+        "  normal = (ModelViewMatrix * vec4(normalize(cross(tangentU, tangentV)), 0)).xyz;\n"
+        "  gl_Position = ProjectionMatrix * ModelViewMatrix * "
        "                  vec4(position, 1);\n"
        "}\n";

    static const char *fsSrc =
        GLSL_VERSION_DEFINE
        "in vec4 fragColor;\n"
+        "in vec3 normal;\n"
+        "uniform int DrawMode;\n"
        "out vec4 color;\n"
        "void main() {\n"
-        "  color = fragColor;\n"
+        "  if (DrawMode == 3) {\n"
+        "    color = vec4(normal*0.5+vec3(0.5), 1);\n"
+        "  } else if (DrawMode == 4) {\n"
+        "    color = vec4(vec3(1)*dot(normal, vec3(0,0,1)), 1);\n"
+        "  } else {\n"
+        "    color = fragColor;\n"
+        "  }\n"
        "}\n";

    GLuint program = glCreateProgram();
-    GLuint vertexShader = compileShader(GL_VERTEX_SHADER, vsSrc);
-    GLuint fragmentShader = compileShader(GL_FRAGMENT_SHADER, fsSrc);
+    GLuint vertexShader = GLUtils::CompileShader(GL_VERTEX_SHADER, vsSrc);
+    GLuint fragmentShader = GLUtils::CompileShader(GL_FRAGMENT_SHADER, fsSrc);

    glAttachShader(program, vertexShader);
    glAttachShader(program, fragmentShader);

    glBindAttribLocation(program, 0, "position");
    glBindAttribLocation(program, 1, "color");
+    glBindAttribLocation(program, 2, "tangentU");
+    glBindAttribLocation(program, 3, "tangentV");
    glBindFragDataLocation(program, 0, "color");

    glLinkProgram(program);
@ -539,10 +559,16 @@ linkDefaultProgram() {
    }

    g_defaultProgram.program = program;
-    g_defaultProgram.uniformModelViewProjectionMatrix =
-        glGetUniformLocation(program, "ModelViewProjectionMatrix");
+    g_defaultProgram.uniformModelViewMatrix =
+        glGetUniformLocation(program, "ModelViewMatrix");
+    g_defaultProgram.uniformProjectionMatrix =
+        glGetUniformLocation(program, "ProjectionMatrix");
+    g_defaultProgram.uniformDrawMode =
+        glGetUniformLocation(program, "DrawMode");
    g_defaultProgram.attrPosition = glGetAttribLocation(program, "position");
    g_defaultProgram.attrColor = glGetAttribLocation(program, "color");
+    g_defaultProgram.attrTangentU = glGetAttribLocation(program, "tangentU");
+    g_defaultProgram.attrTangentV = glGetAttribLocation(program, "tangentV");

    return true;
 }
@ -562,8 +588,11 @@ static void
 drawCageEdges() {

    glUseProgram(g_defaultProgram.program);
-    glUniformMatrix4fv(g_defaultProgram.uniformModelViewProjectionMatrix,
-                       1, GL_FALSE, g_transformData.ModelViewProjectionMatrix);
+    glUniformMatrix4fv(g_defaultProgram.uniformModelViewMatrix,
+                       1, GL_FALSE, g_transformData.ModelViewMatrix);
+    glUniformMatrix4fv(g_defaultProgram.uniformProjectionMatrix,
+                       1, GL_FALSE, g_transformData.ProjectionMatrix);
+    glUniform1i(g_defaultProgram.uniformDrawMode, 0);

    std::vector<float> vbo;
    vbo.reserve(g_coarseEdges.size() * 6);
@ -588,6 +617,8 @@ drawCageEdges() {

    glEnableVertexAttribArray(g_defaultProgram.attrPosition);
    glEnableVertexAttribArray(g_defaultProgram.attrColor);
+    glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
+    glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
    glVertexAttribPointer(g_defaultProgram.attrPosition,
                          3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
    glVertexAttribPointer(g_defaultProgram.attrColor,
@ -604,8 +635,11 @@ static void
 drawCageVertices() {

    glUseProgram(g_defaultProgram.program);
-    glUniformMatrix4fv(g_defaultProgram.uniformModelViewProjectionMatrix,
-                       1, GL_FALSE, g_transformData.ModelViewProjectionMatrix);
+    glUniformMatrix4fv(g_defaultProgram.uniformModelViewMatrix,
+                       1, GL_FALSE, g_transformData.ModelViewMatrix);
+    glUniformMatrix4fv(g_defaultProgram.uniformProjectionMatrix,
+                       1, GL_FALSE, g_transformData.ProjectionMatrix);
+    glUniform1i(g_defaultProgram.uniformDrawMode, 0);

    int numPoints = (int)g_positions.size()/3;
    std::vector<float> vbo;
@ -642,6 +676,8 @@ drawCageVertices() {

    glEnableVertexAttribArray(g_defaultProgram.attrPosition);
    glEnableVertexAttribArray(g_defaultProgram.attrColor);
+    glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
+    glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
    glVertexAttribPointer(g_defaultProgram.attrPosition,
                          3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
    glVertexAttribPointer(g_defaultProgram.attrColor,
@ -658,19 +694,43 @@ drawCageVertices() {
 //------------------------------------------------------------------------------
 static void
 drawSamples() {
-
    glUseProgram(g_defaultProgram.program);

-    glUniformMatrix4fv(g_defaultProgram.uniformModelViewProjectionMatrix,
-                       1, GL_FALSE, g_transformData.ModelViewProjectionMatrix);
-
+    glUniformMatrix4fv(g_defaultProgram.uniformModelViewMatrix,
+                       1, GL_FALSE, g_transformData.ModelViewMatrix);
+    glUniformMatrix4fv(g_defaultProgram.uniformProjectionMatrix,
+                       1, GL_FALSE, g_transformData.ProjectionMatrix);
+    glUniform1i(g_defaultProgram.uniformDrawMode, g_drawMode);

    glBindVertexArray(g_samplesVAO);

+    glEnableVertexAttribArray(g_defaultProgram.attrPosition);
+    glEnableVertexAttribArray(g_defaultProgram.attrColor);
+    glEnableVertexAttribArray(g_defaultProgram.attrTangentU);
+    glEnableVertexAttribArray(g_defaultProgram.attrTangentV);
+
+    glBindBuffer(GL_ARRAY_BUFFER, g_outVertexData->BindVBO());
+    glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
+    glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
+
+    glBindBuffer(GL_ARRAY_BUFFER, g_outDerivatives->BindVBO());
+    glVertexAttribPointer(2, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, 0);
+    glVertexAttribPointer(3, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 6, (float*)12);
+
+    glEnableVertexAttribArray(g_defaultProgram.attrPosition);
+    glEnableVertexAttribArray(g_defaultProgram.attrColor);
+    glEnableVertexAttribArray(g_defaultProgram.attrTangentU);
+    glEnableVertexAttribArray(g_defaultProgram.attrTangentV);
+
    glPointSize(2.0f);
    glDrawArrays(GL_POINTS, 0, g_nparticles);
    glPointSize(1.0f);

+    glDisableVertexAttribArray(g_defaultProgram.attrPosition);
+    glDisableVertexAttribArray(g_defaultProgram.attrColor);
+    glDisableVertexAttribArray(g_defaultProgram.attrTangentU);
+    glDisableVertexAttribArray(g_defaultProgram.attrTangentV);
+
    glBindVertexArray(0);

    glUseProgram(0);
@ -728,7 +788,8 @@ display() {
        g_fpsTimer.Start();

        g_hud.DrawString(10, -150, "Particle Speed ([) (]): %.1f", g_particles->GetSpeed());
-        g_hud.DrawString(10, -120, "# Samples  : (%d/%d)", g_nsamplesFound, g_Q->GetNumVertices());
+        g_hud.DrawString(10, -120, "# Samples  : (%d/%d)",
+                         g_nsamplesFound, g_outVertexData->GetNumVertices());
        g_hud.DrawString(10, -100, "Compute    : %.3f ms", g_computeTime);
        g_hud.DrawString(10, -80,  "Eval       : %.3f ms", g_evalTime * 1000.f);
        g_hud.DrawString(10, -60,  "GPU Draw   : %.3f ms", drawGpuTime);
@ -944,6 +1005,8 @@ initHUD() {
    g_hud.AddPullDownButton(shading_pulldown, "Random", kRANDOM, g_drawMode==kRANDOM);
    g_hud.AddPullDownButton(shading_pulldown, "(u,v)", kUV, g_drawMode==kUV);
    g_hud.AddPullDownButton(shading_pulldown, "Varying", kVARYING, g_drawMode==kVARYING);
+    g_hud.AddPullDownButton(shading_pulldown, "Normal", kNORMAL, g_drawMode==kNORMAL);
+    g_hud.AddPullDownButton(shading_pulldown, "Shade", kSHADE, g_drawMode==kSHADE);
    g_hud.AddPullDownButton(shading_pulldown, "FaceVarying", kFACEVARYING, g_drawMode==kFACEVARYING);

    for (int i = 1; i < 11; ++i) {
--- a/opensubdiv/osd/cpuEvaluator.cpp
+++ b/opensubdiv/osd/cpuEvaluator.cpp
@ -59,10 +59,10 @@ CpuEvaluator::EvalStencils(const float *src,
                           VertexBufferDescriptor const &srcDesc,
                           float *dst,
                           VertexBufferDescriptor const &dstDesc,
-                           float *dstDu,
-                           VertexBufferDescriptor const &dstDuDesc,
-                           float *dstDv,
-                           VertexBufferDescriptor const &dstDvDesc,
+                           float *dstDs,
+                           VertexBufferDescriptor const &dstDsDesc,
+                           float *dstDt,
+                           VertexBufferDescriptor const &dstDtDesc,
                           const int * sizes,
                           const int * offsets,
                           const int * indices,
@ -72,13 +72,13 @@ CpuEvaluator::EvalStencils(const float *src,
                           int start, int end) {
    if (end <= start) return true;
    if (srcDesc.length != dstDesc.length) return false;
-    if (srcDesc.length != dstDuDesc.length) return false;
-    if (srcDesc.length != dstDvDesc.length) return false;
+    if (srcDesc.length != dstDsDesc.length) return false;
+    if (srcDesc.length != dstDtDesc.length) return false;

    CpuEvalStencils(src, srcDesc,
                    dst, dstDesc,
-                    dstDu, dstDuDesc,
-                    dstDv, dstDvDesc,
+                    dstDs, dstDsDesc,
+                    dstDt, dstDtDesc,
                    sizes, offsets, indices,
                    weights, duWeights, dvWeights,
                    start, end);
@ -93,19 +93,21 @@ struct BufferAdapter {
    void Clear() {
        for (int i = 0; i < _length; ++i) _p[i] = 0;
    }
-    void AddWithWeight(T const *src, float w, float wu, float wv) {
-        (void)wu;
-        (void)wv;
-        // TODO: derivatives.
-        for (int i = 0; i < _length; ++i) {
-            _p[i] += src[i] * w;
+    void AddWithWeight(T const *src, float w) {
+        if (_p) {
+            // TODO: derivatives.
+            for (int i = 0; i < _length; ++i) {
+                _p[i] += src[i] * w;
+            }
        }
    }
    const T *operator[] (int index) const {
        return _p + _stride * index;
    }
    BufferAdapter<T> & operator ++() {
-        _p += _stride;
+        if (_p) {
+            _p += _stride;
+        }
        return *this;
    }

@ -115,24 +117,23 @@ struct BufferAdapter {
 };

 /* static */
-int
+bool
 CpuEvaluator::EvalPatches(const float *src,
                          VertexBufferDescriptor const &srcDesc,
                          float *dst,
                          VertexBufferDescriptor const &dstDesc,
-                          PatchCoordArray const &patchCoords,
+                          int numPatchCoords,
+                          PatchCoord const *patchCoords,
                          Far::PatchTables const *patchTable) {
    src += srcDesc.offset;
-    dst += dstDesc.offset;
-    int count = 0;
+    if (dst) dst += dstDesc.offset;

-    // XXX: this implementaion is temporary.
    BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
    BufferAdapter<float>       dstT(dst, dstDesc.length, dstDesc.stride);

    float wP[20], wDs[20], wDt[20];

-    for (size_t i = 0; i < patchCoords.size(); ++i) {
+    for (int i = 0; i < numPatchCoords; ++i) {
        PatchCoord const &coords = patchCoords[i];

        patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt);
@ -141,13 +142,58 @@ CpuEvaluator::EvalPatches(const float *src,

        dstT.Clear();
        for (int j = 0; j < cvs.size(); ++j) {
-            dstT.AddWithWeight(srcT[cvs[j]], wP[j], wDs[j], wDt[j]);
+            dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
        }
-
-        ++count;
        ++dstT;
    }
-    return count;
+    return true;
+}
+
+/* static */
+bool
+CpuEvaluator::EvalPatches(const float *src,
+                          VertexBufferDescriptor const &srcDesc,
+                          float *dst,
+                          VertexBufferDescriptor const &dstDesc,
+                          float *dstDs,
+                          VertexBufferDescriptor const &dstDsDesc,
+                          float *dstDt,
+                          VertexBufferDescriptor const &dstDtDesc,
+                          int numPatchCoords,
+                          PatchCoord const *patchCoords,
+                          Far::PatchTables const *patchTable) {
+    src += srcDesc.offset;
+    if (dst) dst += dstDesc.offset;
+    if (dstDs) dstDs += dstDsDesc.offset;
+    if (dstDt) dstDt += dstDtDesc.offset;
+
+    BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
+    BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
+    BufferAdapter<float> dstDsT(dstDs, dstDsDesc.length, dstDsDesc.stride);
+    BufferAdapter<float> dstDtT(dstDt, dstDtDesc.length, dstDtDesc.stride);
+
+    float wP[20], wDs[20], wDt[20];
+
+    for (int i = 0; i < numPatchCoords; ++i) {
+        PatchCoord const &coords = patchCoords[i];
+
+        patchTable->EvaluateBasis(coords.handle, coords.s, coords.t, wP, wDs, wDt);
+
+        Far::ConstIndexArray cvs = patchTable->GetPatchVertices(coords.handle);
+
+        dstT.Clear();
+        dstDsT.Clear();
+        dstDtT.Clear();
+        for (int j = 0; j < cvs.size(); ++j) {
+            dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
+            dstDsT.AddWithWeight(srcT[cvs[j]], wDs[j]);
+            dstDtT.AddWithWeight(srcT[cvs[j]], wDt[j]);
+        }
+        ++dstT;
+        ++dstDsT;
+        ++dstDtT;
+    }
+    return true;
 }


--- a/opensubdiv/osd/cpuEvaluator.h
+++ b/opensubdiv/osd/cpuEvaluator.h
@ -56,14 +56,17 @@ struct PatchCoord {
    float s, t;              ///< parametric location on patch
 };

-typedef std::vector<PatchCoord> PatchCoordArray;
-
-
 class CpuEvaluator {
 public:
+    /// ----------------------------------------------------------------------
+    ///
+    ///   Stencil evaluations with StencilTable
+    ///
+    /// ----------------------------------------------------------------------
+
    /// \brief Generic static eval stencils function. This function has a same
    ///        signature as other device kernels have so that it can be called
-    ///        transparently from OsdMesh template interface.
+    ///        in the same way from OsdMesh template interface.
    ///
    /// @param srcBuffer      Input primvar buffer.
    ///                       must have BindCpuBuffer() method returning a
@ -108,7 +111,28 @@ public:
                            /*end   = */ stencilTable->GetNumStencils());
    }

-    /// stencil evaluate function.
+    /// \brief Static eval stencils function which takes raw CPU pointers for
+    ///        input and output.
+    ///
+    /// @param src            Input primvar pointer. An offset of srcDesc
+    ///                       will be applied internally (i.e. the pointer
+    ///                       should not include the offset)
+    ///
+    /// @param srcDesc        vertex buffer descriptor for the input buffer
+    ///
+    /// @param dst            Output primvar pointer. An offset of dstDesc
+    ///                       will be applied internally.
+    ///
+    /// @param dstDesc        vertex buffer descriptor for the output buffer
+    ///
+    /// @param stencilTable   stencil table to be applied.
+    ///
+    /// @param instance       not used in the cpu kernel
+    ///                       (declared as a typed pointer to prevent
+    ///                        undesirable template resolution)
+    ///
+    /// @param deviceContext  not used in the cpu kernel
+    ///
    static bool EvalStencils(const float *src,
                             VertexBufferDescriptor const &srcDesc,
                             float *dst,
@ -120,15 +144,52 @@ public:
                             int start,
                             int end);

+    /// \brief Generic static eval stencils function with derivatives.
+    ///        This function has a same signature as other device kernels
+    ///        have so that it can be called in the same way from OsdMesh
+    ///        template interface.
+    ///
+    /// @param srcBuffer      Input primvar buffer.
+    ///                       must have BindCpuBuffer() method returning a
+    ///                       const float pointer for read
+    ///
+    /// @param srcDesc        vertex buffer descriptor for the input buffer
+    ///
+    /// @param dstBuffer      Output primvar buffer
+    ///                       must have BindCpuBuffer() method returning a
+    ///                       float pointer for write
+    ///
+    /// @param dstDesc        vertex buffer descriptor for the output buffer
+    ///
+    /// @param dstDsBuffer    Output s-derivative buffer
+    ///                       must have BindCpuBuffer() method returning a
+    ///                       float pointer for write
+    ///
+    /// @param dstDsDesc      vertex buffer descriptor for the output buffer
+    ///
+    /// @param dstDtBuffer    Output t-derivative buffer
+    ///                       must have BindCpuBuffer() method returning a
+    ///                       float pointer for write
+    ///
+    /// @param dstDtDesc      vertex buffer descriptor for the output buffer
+    ///
+    /// @param stencilTable   stencil table to be applied.
+    ///
+    /// @param instance       not used in the cpu kernel
+    ///                       (declared as a typed pointer to prevent
+    ///                        undesirable template resolution)
+    ///
+    /// @param deviceContext  not used in the cpu kernel
+    ///
    template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
    static bool EvalStencils(SRC_BUFFER *srcBuffer,
                             VertexBufferDescriptor const &srcDesc,
                             DST_BUFFER *dstBuffer,
                             VertexBufferDescriptor const &dstDesc,
-                             DST_BUFFER *dstDuBuffer,
-                             VertexBufferDescriptor const &dstDuDesc,
-                             DST_BUFFER *dstDvBuffer,
-                             VertexBufferDescriptor const &dstDvDesc,
+                             DST_BUFFER *dstDsBuffer,
+                             VertexBufferDescriptor const &dstDsDesc,
+                             DST_BUFFER *dstDtBuffer,
+                             VertexBufferDescriptor const &dstDtDesc,
                             STENCIL_TABLE const *stencilTable,
                             const CpuEvaluator *evaluator = NULL,
                             void * deviceContext = NULL) {
@ -139,10 +200,10 @@ public:
                            srcDesc,
                            dstBuffer->BindCpuBuffer(),
                            dstDesc,
-                            dstDuBuffer->BindCpuBuffer(),
-                            dstDuDesc,
-                            dstDvBuffer->BindCpuBuffer(),
-                            dstDvDesc,
+                            dstDsBuffer->BindCpuBuffer(),
+                            dstDsDesc,
+                            dstDtBuffer->BindCpuBuffer(),
+                            dstDtDesc,
                            &stencilTable->GetSizes()[0],
                            &stencilTable->GetOffsets()[0],
                            &stencilTable->GetControlIndices()[0],
@ -153,14 +214,46 @@ public:
                            /*end   = */ stencilTable->GetNumStencils());
    }

+    /// \brief Static eval stencils function with derivatives, which takes
+    ///        raw CPU pointers for input and output.
+    ///
+    /// @param src            Input primvar pointer. An offset of srcDesc
+    ///                       will be applied internally (i.e. the pointer
+    ///                       should not include the offset)
+    ///
+    /// @param srcDesc        vertex buffer descriptor for the input buffer
+    ///
+    /// @param dst            Output primvar pointer. An offset of dstDesc
+    ///                       will be applied internally.
+    ///
+    /// @param dstDesc        vertex buffer descriptor for the output buffer
+    ///
+    /// @param dstDs          Output s-derivatives pointer. An offset of
+    ///                       dstDsDesc will be applied internally.
+    ///
+    /// @param dstDsDesc      vertex buffer descriptor for the output buffer
+    ///
+    /// @param dstDt          Output t-derivatives pointer. An offset of
+    ///                       dstDtDesc will be applied internally.
+    ///
+    /// @param dstDtDesc      vertex buffer descriptor for the output buffer
+    ///
+    /// @param stencilTable   stencil table to be applied.
+    ///
+    /// @param instance       not used in the cpu kernel
+    ///                       (declared as a typed pointer to prevent
+    ///                        undesirable template resolution)
+    ///
+    /// @param deviceContext  not used in the cpu kernel
+    ///
    static bool EvalStencils(const float *src,
                             VertexBufferDescriptor const &srcDesc,
                             float *dst,
                             VertexBufferDescriptor const &dstDesc,
-                             float *dstDu,
-                             VertexBufferDescriptor const &dstDuDesc,
-                             float *dstDv,
-                             VertexBufferDescriptor const &dstDvDesc,
+                             float *dstDs,
+                             VertexBufferDescriptor const &dstDsDesc,
+                             float *dstDt,
+                             VertexBufferDescriptor const &dstDtDesc,
                             const int * sizes,
                             const int * offsets,
                             const int * indices,
@ -170,11 +263,15 @@ public:
                             int start,
                             int end);

+    /// ----------------------------------------------------------------------
+    ///
+    ///   Limit evaluations with PatchTable
+    ///
+    /// ----------------------------------------------------------------------
+
    /// \brief Generic limit eval function. This function has a same
    ///        signature as other device kernels have so that it can be called
-    ///        transparently.
-    ///
-    ///       XXX: This interface is still work in progress. XXX
+    ///        in the same way.
    ///
    /// @param srcBuffer        Input primvar buffer.
    ///                         must have BindCpuBuffer() method returning a
@ -188,23 +285,26 @@ public:
    ///
    /// @param dstDesc          vertex buffer descriptor for the output buffer
    ///
-    /// @param patchCoord       array of locations to be evaluated.
+    /// @param numPatchCoords   number of patchCoords.
+    ///
+    /// @param patchCoords      array of locations to be evaluated.
    ///
    /// @param patchTable       Far::PatchTable
    ///
-    /// @param instanced        not used in the cpu evaluator
+    /// @param instance         not used in the cpu evaluator
    ///
    /// @param deviceContext    not used in the cpu evaluator
    ///
    template <typename SRC_BUFFER, typename DST_BUFFER>
-    static int EvalPatches(SRC_BUFFER *srcBuffer,
-                           VertexBufferDescriptor const &srcDesc,
-                           DST_BUFFER *dstBuffer,
-                           VertexBufferDescriptor const &dstDesc,
-                           PatchCoordArray const &patchCoords,
-                           Far::PatchTables const *patchTable,
-                           CpuEvaluator const *instance,
-                           void * deviceContext = NULL) {
+    static bool EvalPatches(SRC_BUFFER *srcBuffer,
+                            VertexBufferDescriptor const &srcDesc,
+                            DST_BUFFER *dstBuffer,
+                            VertexBufferDescriptor const &dstDesc,
+                            int numPatchCoords,
+                            PatchCoord const *patchCoords,
+                            Far::PatchTables const *patchTable,
+                            CpuEvaluator const *instance,
+                            void * deviceContext = NULL) {
        (void)instance;   // unused
        (void)deviceContext;   // unused

@ -212,17 +312,158 @@ public:
                           srcDesc,
                           dstBuffer->BindCpuBuffer(),
                           dstDesc,
+                           numPatchCoords,
                           patchCoords,
                           patchTable);
    }

-    /// \brief limit eval function.
-    static int EvalPatches(const float *src,
-                           VertexBufferDescriptor const &srcDesc,
-                           float *dst,
-                           VertexBufferDescriptor const &dstDesc,
-                           PatchCoordArray const &patchCoords,
-                           Far::PatchTables const *patchTable);
+    /// \brief Generic limit eval function with derivatives. This function has
+    ///        a same signature as other device kernels have so that it can be
+    ///        called in the same way.
+    ///
+    /// @param srcBuffer        Input primvar buffer.
+    ///                         must have BindCpuBuffer() method returning a
+    ///                         const float pointer for read
+    ///
+    /// @param srcDesc          vertex buffer descriptor for the input buffer
+    ///
+    /// @param dstBuffer        Output primvar buffer
+    ///                         must have BindCpuBuffer() method returning a
+    ///                         float pointer for write
+    ///
+    /// @param dstDesc          vertex buffer descriptor for the output buffer
+    ///
+    /// @param dstDsBuffer      Output s-derivatives buffer
+    ///                         must have BindCpuBuffer() method returning a
+    ///                         float pointer for write
+    ///
+    /// @param dstDsDesc        vertex buffer descriptor for the dstDsBuffer
+    ///
+    /// @param dstDtBuffer      Output t-derivatives buffer
+    ///                         must have BindCpuBuffer() method returning a
+    ///                         float pointer for write
+    ///
+    /// @param dstDtDesc        vertex buffer descriptor for the dstDtBuffer
+    ///
+    /// @param numPatchCoords   number of patchCoords.
+    ///
+    /// @param patchCoords      array of locations to be evaluated.
+    ///
+    /// @param patchTable       Far::PatchTable
+    ///
+    /// @param instance         not used in the cpu evaluator
+    ///
+    /// @param deviceContext    not used in the cpu evaluator
+    ///
+    template <typename SRC_BUFFER, typename DST_BUFFER>
+    static bool EvalPatches(SRC_BUFFER *srcBuffer,
+                            VertexBufferDescriptor const &srcDesc,
+                            DST_BUFFER *dstBuffer,
+                            VertexBufferDescriptor const &dstDesc,
+                            DST_BUFFER *dstDsBuffer,
+                            VertexBufferDescriptor const &dstDsDesc,
+                            DST_BUFFER *dstDtBuffer,
+                            VertexBufferDescriptor const &dstDtDesc,
+                            int numPatchCoords,
+                            PatchCoord const *patchCoords,
+                            Far::PatchTables const *patchTable,
+                            CpuEvaluator const *instance,
+                            void * deviceContext = NULL) {
+        (void)instance;   // unused
+        (void)deviceContext;   // unused
+
+        return EvalPatches(srcBuffer->BindCpuBuffer(),
+                           srcDesc,
+                           dstBuffer->BindCpuBuffer(),
+                           dstDesc,
+                           dstDsBuffer->BindCpuBuffer(),
+                           dstDsDesc,
+                           dstDtBuffer->BindCpuBuffer(),
+                           dstDtDesc,
+                           numPatchCoords,
+                           patchCoords,
+                           patchTable);
+    }
+
+    /// \brief Static limit eval function. It takes an array of PatchCoord
+    ///        and evaluate limit values on given PatchTable.
+    ///
+    /// @param src              Input primvar pointer. An offset of srcDesc
+    ///                         will be applied internally (i.e. the pointer
+    ///                         should not include the offset)
+    ///
+    /// @param srcDesc          vertex buffer descriptor for the input buffer
+    ///
+    /// @param dst              Output primvar pointer. An offset of dstDesc
+    ///                         will be applied internally.
+    ///
+    /// @param dstDesc          vertex buffer descriptor for the output buffer
+    ///
+    /// @param numPatchCoords   number of patchCoords.
+    ///
+    /// @param patchCoords      array of locations to be evaluated.
+    ///
+    /// @param patchTable       Far::PatchTable on which primvars are evaluated
+    ///                         for the patchCoords
+    ///
+    /// @param instance         not used in the cpu evaluator
+    ///
+    /// @param deviceContext    not used in the cpu evaluator
+    ///
+    static bool EvalPatches(const float *src,
+                            VertexBufferDescriptor const &srcDesc,
+                            float *dst,
+                            VertexBufferDescriptor const &dstDesc,
+                            int numPatchCoords,
+                            PatchCoord const *patchCoords,
+                            Far::PatchTables const *patchTable);
+
+    /// \brief Static limit eval function. It takes an array of PatchCoord
+    ///        and evaluate limit values on given PatchTable.
+    ///
+    /// @param src              Input primvar pointer. An offset of srcDesc
+    ///                         will be applied internally (i.e. the pointer
+    ///                         should not include the offset)
+    ///
+    /// @param srcDesc          vertex buffer descriptor for the input buffer
+    ///
+    /// @param dst              Output primvar pointer. An offset of dstDesc
+    ///                         will be applied internally.
+    ///
+    /// @param dstDesc          vertex buffer descriptor for the output buffer
+    ///
+    /// @param dstDs            Output s-derivatives pointer. An offset of
+    ///                         dstDsDesc will be applied internally.
+    ///
+    /// @param dstDsDesc        vertex buffer descriptor for the dstDs buffer
+    ///
+    /// @param dstDt            Output t-derivatives pointer. An offset of
+    ///                         dstDtDesc will be applied internally.
+    ///
+    /// @param dstDtDesc        vertex buffer descriptor for the dstDt buffer
+    ///
+    /// @param numPatchCoords   number of patchCoords.
+    ///
+    /// @param patchCoords      array of locations to be evaluated.
+    ///
+    /// @param patchTable       Far::PatchTable on which primvars are evaluated
+    ///                         for the patchCoords
+    ///
+    /// @param instance         not used in the cpu evaluator
+    ///
+    /// @param deviceContext    not used in the cpu evaluator
+    ///
+    static bool EvalPatches(const float *src,
+                            VertexBufferDescriptor const &srcDesc,
+                            float *dst,
+                            VertexBufferDescriptor const &dstDesc,
+                            float *dstDs,
+                            VertexBufferDescriptor const &dstDsDesc,
+                            float *dstDt,
+                            VertexBufferDescriptor const &dstDtDesc,
+                            int numPatchCoords,
+                            PatchCoord const *patchCoords,
+                            Far::PatchTables const *patchTable);

    /// \brief synchronize all asynchronous computation invoked on this device.
    static void Synchronize(void * /*deviceContext = NULL*/) {