Fix tangents in Osd::EvalLimitController

- don't rotate (s,t) coordinates but rotate the patch instead !

- refactor osd/cpuEvalLimitKernels to share Far::PatchTables cubic spline
  interpolation functions : this replaces tensor product formulation with
  weight matrices, which does not really impact performance here, but would
  have to be replaced when implementing regular gridding functions.

- fix OsdCpuEvalLimitController to not rotate coordinates and pass the rotation bitfields

- expose Far::PatchTables spline interpolation API (protected -> public)

- fix glEvalLimit tangent buffers (remove empty padding - see below)

- change policy for tangent buffers : the output buffer descriptor is
  **NO LONGER APPLIED** to tangent output buffers. Tangent primvar data
  buffers are no longer applying the offset and stride from the descriptor
  (because it doesn't make sense to share it). If more flexiblity is
  required, we will consider adding independent descriptors for the tangent
  buffers. This change will impact existing code that generates tangents
  with the EvalLimit controller.

fixes #370
This commit is contained in:
manuelk 2014-12-25 13:03:53 -08:00
parent 5944ada0f9
commit 7954fbab37
7 changed files with 277 additions and 405 deletions

View File

@ -415,12 +415,12 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
}
delete g_dQs;
g_dQs = Osd::CpuGLVertexBuffer::Create(6,g_nparticles);
memset( g_dQs->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
g_dQs = Osd::CpuGLVertexBuffer::Create(3,g_nparticles);
memset( g_dQs->BindCpuBuffer(), 0, g_nparticles*3*sizeof(float));
delete g_dQt;
g_dQt = Osd::CpuGLVertexBuffer::Create(6,g_nparticles);
memset( g_dQt->BindCpuBuffer(), 0, g_nparticles*6*sizeof(float));
g_dQt = Osd::CpuGLVertexBuffer::Create(3,g_nparticles);
memset( g_dQt->BindCpuBuffer(), 0, g_nparticles*3*sizeof(float));
}
updateGeom();
@ -439,8 +439,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
}
//------------------------------------------------------------------------------
struct Program
{
struct Program {
GLuint program;
GLuint uniformModelViewProjectionMatrix;
GLuint attrPosition;
@ -449,8 +448,7 @@ struct Program
//------------------------------------------------------------------------------
static void
checkGLErrors(std::string const & where = "")
{
checkGLErrors(std::string const & where = "") {
GLuint err;
while ((err = glGetError()) != GL_NO_ERROR) {
@ -462,8 +460,7 @@ checkGLErrors(std::string const & where = "")
//------------------------------------------------------------------------------
static GLuint
compileShader(GLenum shaderType, const char *source)
{
compileShader(GLenum shaderType, const char *source) {
GLuint shader = glCreateShader(shaderType);
glShaderSource(shader, 1, &source, NULL);
glCompileShader(shader);
@ -473,8 +470,8 @@ compileShader(GLenum shaderType, const char *source)
//------------------------------------------------------------------------------
static bool
linkDefaultProgram()
{
linkDefaultProgram() {
#if defined(GL_ARB_tessellation_shader) || defined(GL_VERSION_4_0)
#define GLSL_VERSION_DEFINE "#version 400\n"
#else
@ -537,8 +534,7 @@ linkDefaultProgram()
//------------------------------------------------------------------------------
static inline void
setSharpnessColor(float s, float *r, float *g, float *b)
{
setSharpnessColor(float s, float *r, float *g, float *b) {
// 0.0 2.0 4.0
// green --- yellow --- red
*r = std::min(1.0f, s * 0.5f);

View File

@ -144,10 +144,10 @@ getBoxSplineWeights(float v, float w, float B[12]) {
}
void
PatchTables::getBasisWeights(TensorBasis basis, PatchParam::BitField bits,
PatchTables::GetBasisWeights(TensorBasis basis, PatchParam::BitField bits,
float s, float t, float point[16], float deriv1[16], float deriv2[16]) {
int const rots[4][16] =
static int const rots[4][16] =
{ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 12, 8, 4, 0, 13, 9, 5, 1, 14, 10, 6, 2, 15, 11, 7, 3 },
{ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 },

View File

@ -412,6 +412,16 @@ public:
template <class T, class U> void Limit(PatchHandle const & handle,
float s, float t, T const & src, U & dst) const;
enum TensorBasis {
BASIS_BEZIER, ///< Bi-cubic bezier patch basis
BASIS_BSPLINE ///< Bi-cubic bspline patch basis
};
/// \brief Returns bi-cubic weights matrix for a given (s,t) location
/// on the patch
static void GetBasisWeights(TensorBasis basis, PatchParam::BitField bits,
float s, float t, float point[16], float deriv1[16], float deriv2[16]);
protected:
friend class PatchTablesFactory;
@ -419,18 +429,6 @@ protected:
// Factory constructor
PatchTables(int maxvalence);
enum TensorBasis {
BASIS_BEZIER,
BASIS_BSPLINE
};
// Returns bi-cubic interpolation coefficients for a given (s,t) location
// on a b-spline patch
static void getBasisWeights(TensorBasis basis, PatchParam::BitField bits,
float s, float t, float point[16], float deriv1[16], float deriv2[16]);
protected:
void reservePatchArrays(int numPatchArrays);
void pushPatchArray(PatchDescriptor desc,
@ -752,7 +750,7 @@ PatchTables::Limit(PatchHandle const & handle, float s, float t,
if (ptype>=PatchDescriptor::REGULAR and ptype<=PatchDescriptor::CORNER) {
getBasisWeights(BASIS_BSPLINE, bits, s, t, Q, Qd1, Qd2);
GetBasisWeights(BASIS_BSPLINE, bits, s, t, Q, Qd1, Qd2);
ConstIndexArray cvs = GetPatchVertices(handle);
@ -781,7 +779,7 @@ PatchTables::Limit(PatchHandle const & handle, float s, float t,
assert(_endcapStencilTables);
getBasisWeights(BASIS_BEZIER, bits, s, t, Q, Qd1, Qd2);
GetBasisWeights(BASIS_BEZIER, bits, s, t, Q, Qd1, Qd2);
InterpolateGregoryPatch(_endcapStencilTables, handle.vertIndex,
s, t, Q, Qd1, Qd2, src, dst);

View File

@ -67,68 +67,62 @@ CpuEvalLimitController::EvalLimitSample( LimitLocation const & coord,
if (vertexData.in) {
float * out = outQ ? outQ + outDesc.offset : 0,
* outDu = outDQU ? outDQU + outDesc.offset : 0,
* outDv = outDQV ? outDQV + outDesc.offset : 0;
Far::PatchTables const & ptables = context->GetPatchTables();
computeSubPatchCoords(ptables.GetPatchParam(*handle), s, t);
Far::PatchParam pparam = ptables.GetPatchParam(*handle);
pparam.bitField.Normalize(s, t);
Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle);
Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle);
switch( desc.GetType() ) {
case Desc::REGULAR : evalBSpline( t, s, cvs.begin(),
switch (desc.GetType()) {
case Desc::REGULAR : evalBSpline( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
outDesc,
out, outDu, outDv );
outQ, outDQU, outDQV );
break;
case Desc::BOUNDARY : evalBoundary( t, s, cvs.begin(),
case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
outDesc,
out, outDu, outDv );
outQ, outDQU, outDQV );
break;
case Desc::CORNER : evalCorner( t, s, cvs.begin(),
case Desc::CORNER : evalCorner( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
outDesc,
out, outDu, outDv );
outQ, outDQU, outDQV );
break;
case Desc::GREGORY : evalGregory( t, s, cvs.begin(),
case Desc::GREGORY : evalGregory( pparam.bitField, t, s, cvs.begin(),
&ptables.GetVertexValenceTable()[0],
ptables.GetPatchQuadOffsets(*handle).begin(),
ptables.GetMaxValence(),
vertexData.inDesc,
vertexData.in,
outDesc,
out, outDu, outDv );
outQ, outDQU, outDQV );
break;
case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( t, s, cvs.begin(),
case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(),
&ptables.GetVertexValenceTable()[0],
ptables.GetPatchQuadOffsets(*handle).begin(),
ptables.GetMaxValence(),
vertexData.inDesc,
vertexData.in,
outDesc,
out, outDu, outDv );
outQ, outDQU, outDQV );
break;
case Desc::GREGORY_BASIS : {
Far::StencilTables const * stencils =
ptables.GetEndCapStencilTables();
assert(stencils and stencils->GetNumStencils()>0);
evalGregoryBasis( t, s,
evalGregoryBasis( pparam.bitField, s, t,
*stencils,
ptables.GetEndCapStencilIndex(*handle),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
outQ, outDQU, outDQV );
} break;
default:
assert(0);
@ -157,44 +151,46 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
Far::PatchTables const & ptables = context->GetPatchTables();
Far::PatchParam pparam = ptables.GetPatchParam(*handle);
pparam.bitField.Normalize(s, t);
Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle);
Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle);
if (vertexData.in) {
int offset = vertexData.outDesc.stride * index;
int offset = vertexData.outDesc.stride * index,
doffset = vertexData.outDesc.length * index;
if (vertexData.out) {
// note : don't apply outDesc.offset here, it's done inside patch
// evaluation
float * out = vertexData.out+offset,
* outDu = vertexData.outDu ? vertexData.outDu+offset : 0,
* outDv = vertexData.outDv ? vertexData.outDv+offset : 0;
* outDu = vertexData.outDu ? vertexData.outDu+doffset : 0,
* outDv = vertexData.outDv ? vertexData.outDv+doffset : 0;
computeSubPatchCoords(ptables.GetPatchParam(*handle), s, t);
switch(desc.GetType()) {
case Desc::REGULAR : evalBSpline( t, s, cvs.begin(),
switch (desc.GetType()) {
case Desc::REGULAR : evalBSpline( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
break;
case Desc::BOUNDARY : evalBoundary( t, s, cvs.begin(),
case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
break;
case Desc::CORNER : evalCorner( t, s, cvs.begin(),
case Desc::CORNER : evalCorner( pparam.bitField, s, t, cvs.begin(),
vertexData.inDesc,
vertexData.in,
vertexData.outDesc,
out, outDu, outDv );
break;
case Desc::GREGORY : evalGregory( t, s, cvs.begin(),
case Desc::GREGORY : evalGregory( pparam.bitField, t, s, cvs.begin(),
&ptables.GetVertexValenceTable()[0],
ptables.GetPatchQuadOffsets(*handle).begin(),
ptables.GetMaxValence(),
@ -203,8 +199,7 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
vertexData.outDesc,
out, outDu, outDv );
break;
case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( t, s, cvs.begin(),
case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(),
&ptables.GetVertexValenceTable()[0],
ptables.GetPatchQuadOffsets(*handle).begin(),
ptables.GetMaxValence(),
@ -217,7 +212,7 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
Far::StencilTables const * stencils =
ptables.GetEndCapStencilTables();
assert(stencils and stencils->GetNumStencils()>0);
evalGregoryBasis( s, t,
evalGregoryBasis( pparam.bitField, s, t,
*stencils,
ptables.GetEndCapStencilIndex(*handle),
vertexData.inDesc,
@ -231,6 +226,8 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
}
}
pparam.bitField.Rotate(s, t);
VaryingData const & varyingData = _currentBindState.varyingData;
if (varyingData.in and varyingData.out) {
@ -282,7 +279,7 @@ CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords,
// XXXX manuelk this assumes FVar data is ordered with 4 CVs / patch :
// bi-cubic FVar interpolation will require proper topology
// accessors in Far::PatchTables and this code will change
evalBilinear( t, s, zeroRing,
evalBilinear( s, t, zeroRing,
facevaryingData.inDesc,
&facevaryingData.in[handle->patchIndex*4*facevaryingData.outDesc.stride],
facevaryingData.outDesc,

View File

@ -74,7 +74,9 @@ public:
///
/// @param inQ input vertex data
///
/// @param oDesc data descriptor shared by all output data buffers
/// @param oDesc data descriptor for the outQ data buffer
/// -- derivative buffers do not have a descriptor and
/// cannot be offset or padded with a stride (yet ?)
///
/// @param outQ output vertex data
///
@ -102,7 +104,7 @@ public:
///
/// @param inQ input varying data
///
/// @param oDesc data descriptor shared by all output data buffers
/// @param oDesc data descriptor for the outQ data buffer
///
/// @param outQ output varying data
///
@ -127,7 +129,7 @@ public:
///
/// @param inQ input face-varying data
///
/// @param oDesc data descriptor shared by all output data buffers
/// @param oDesc data descriptor for the outQ data buffer
///
/// @param outQ output face-varying data
///
@ -153,7 +155,9 @@ public:
///
/// @param context the EvalLimitContext that the controller will evaluate
///
/// @param outDesc data descriptor (offset, length, stride)
/// @param outDesc data descriptor for the outQ data buffer
/// -- derivative buffers do not have a descriptor and
/// cannot be offset or padded with a stride (yet ?)
///
/// @param outQ output vertex data
///

View File

@ -23,6 +23,7 @@
//
#include "../osd/cpuEvalLimitKernel.h"
#include "../far/patchTables.h"
#include "../far/stencilTables.h"
#include <math.h>
@ -68,6 +69,30 @@ evalBilinear(float u, float v,
}
}
#ifdef TENSOR_PRODUCT_CUBIC_SPLINES
// manuelk code was refactored to use the matrix formulation of cubic splines
// exposed in Far::PatchTables for consistency. I am keeping these temporarily
// for reference.
inline void
evalCubicBezier(float u, float B[4], float BU[3]) {
float u2 = u*u,
w0 = 1.0f - u,
w2 = w0 * w0;
B[0] = w0*w2;
B[1] = 3.0f * u * w2;
B[2] = 3.0f * u2 * w0;
B[3] = u*u2;
if (BU) {
BU[0] = w2;
BU[1] = 2.0f * u * w0;
BU[2] = u2;
}
}
inline void
evalCubicBSpline(float u, float B[4], float BU[4]) {
float t = u;
@ -90,101 +115,107 @@ evalCubicBSpline(float u, float B[4], float BU[4]) {
}
}
inline void
univar4x4(float u, float B[4], float D[4]) {
float t = u;
float s = 1.0f - u;
float A0 = s * s;
float A1 = 2 * s * t;
float A2 = t * t;
B[0] = s * A0;
B[1] = t * A0 + s * A1;
B[2] = t * A1 + s * A2;
B[3] = t * A2;
if (D) {
D[0] = - A0;
D[1] = A0 - A1;
D[2] = A1 - A2;
D[3] = A2;
}
}
#endif
void
evalBSpline(float u, float v,
evalBSpline(Far::PatchParam::BitField bits,
float s, float t,
Far::Index const * vertexIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV ) {
float * outDQ1,
float * outDQ2 ) {
// make sure that we have enough space to store results
assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) );
bool evalDeriv = (outDQU or outDQV);
float B[4], D[4],
*BU=(float*)alloca(inDesc.length*4*sizeof(float)),
*DU=(float*)alloca(inDesc.length*4*sizeof(float));
memset(BU, 0, inDesc.length*4*sizeof(float));
memset(DU, 0, inDesc.length*4*sizeof(float));
evalCubicBSpline(u, B, evalDeriv ? D : 0);
float Q[16], dQ1[16], dQ2[16];
Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BSPLINE, bits, s, t,
outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0);
float const * inOffset = inQ + inDesc.offset;
for (int i=0; i<4; ++i) {
for (int j=0; j<4; ++j) {
outQ += outDesc.offset;
float const * in = inOffset + vertexIndices[i+j*4]*inDesc.stride;
for (int k=0; k<inDesc.length; ++k) {
BU[i*inDesc.length+k] += in[k] * B[j];
if (evalDeriv)
DU[i*inDesc.length+k] += in[k] * D[j];
}
}
memset(outQ, 0, inDesc.length*sizeof(float));
if (outDQ1) {
memset(outDQ1, 0, inDesc.length*sizeof(float));
}
if (outDQ2) {
memset(outDQ2, 0, inDesc.length*sizeof(float));
}
evalCubicBSpline(v, B, evalDeriv ? D : 0);
float * Q = outQ + outDesc.offset,
* dQU = outDQU + outDesc.offset,
* dQV = outDQV + outDesc.offset;
for (int i=0; i<16; ++i) {
// clear result
memset(Q, 0, inDesc.length*sizeof(float));
if (evalDeriv) {
memset(dQU, 0, inDesc.length*sizeof(float));
memset(dQV, 0, inDesc.length*sizeof(float));
}
float const * in = inOffset + vertexIndices[i]*inDesc.stride;
for (int i=0; i<4; ++i) {
for (int k=0; k<inDesc.length; ++k) {
Q[k] += BU[inDesc.length*i+k] * B[i];
if (evalDeriv) {
dQU[k] += DU[inDesc.length*i+k] * B[i];
dQV[k] += BU[inDesc.length*i+k] * D[i];
outQ[k] += Q[i] * in[k];
if (outDQ1) {
outDQ1[k] += dQ1[i] * in[k];
}
if (outDQ2) {
outDQ2[k] += dQ2[i] * in[k];
}
}
}
}
void
evalBoundary(float u, float v,
evalBoundary(Far::PatchParam::BitField bits,
float s, float t,
Far::Index const * vertexIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV ) {
float * outDQ1,
float * outDQ2 ) {
// make sure that we have enough space to store results
assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) );
bool evalDeriv = (outDQU or outDQV);
float B[4], D[4],
*BU=(float*)alloca(inDesc.length*4*sizeof(float)),
*DU=(float*)alloca(inDesc.length*4*sizeof(float));
memset(BU, 0, inDesc.length*4*sizeof(float));
memset(DU, 0, inDesc.length*4*sizeof(float));
evalCubicBSpline(u, B, evalDeriv ? D : 0);
float Q[16], dQ1[16], dQ2[16];
Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BSPLINE, bits, s, t,
outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0);
float const * inOffset = inQ + inDesc.offset;
outQ += outDesc.offset;
memset(outQ, 0, inDesc.length*sizeof(float));
if (outDQ1) {
memset(outDQ1, 0, inDesc.length*sizeof(float));
}
if (outDQ2) {
memset(outDQ2, 0, inDesc.length*sizeof(float));
}
// mirror the missing vertices (M)
//
@ -217,77 +248,52 @@ evalBoundary(float u, float v,
M[3*inDesc.length+k] = 2.0f*v3[k] - v7[k]; // M4 = 2*v2 - v1
}
for (int i=0; i<4; ++i) {
for (int j=0; j<4; ++j) {
for (int i=0; i<16; ++i) {
// swap the missing row of verts with our mirrored ones
float const * in = j==0 ? &M[i*inDesc.length] :
inOffset + vertexIndices[i+(j-1)*4]*inDesc.stride;
float const * in = i < 4 ?
M + i*inDesc.length : inOffset + vertexIndices[i-4]*inDesc.stride;
for (int k=0; k<inDesc.length; ++k) {
BU[i*inDesc.length+k] += in[k] * B[j];
if (evalDeriv)
DU[i*inDesc.length+k] += in[k] * D[j];
}
}
}
evalCubicBSpline(v, B, evalDeriv ? D : 0);
float * Q = outQ + outDesc.offset,
* dQU = outDQU + outDesc.offset,
* dQV = outDQV + outDesc.offset;
// clear result
memset(Q, 0, inDesc.length*sizeof(float));
if (evalDeriv) {
memset(dQU, 0, inDesc.length*sizeof(float));
memset(dQV, 0, inDesc.length*sizeof(float));
}
for (int i=0; i<4; ++i) {
for (int k=0; k<inDesc.length; ++k) {
Q[k] += BU[inDesc.length*i+k] * B[i];
if (evalDeriv) {
dQU[k] += DU[inDesc.length*i+k] * B[i];
dQV[k] += BU[inDesc.length*i+k] * D[i];
outQ[k] += Q[i] * in[k];
if (outDQ1) {
outDQ1[k] += dQ1[i] * in[k];
}
if (outDQ2) {
outDQ2[k] += dQ2[i] * in[k];
}
}
}
}
void
evalCorner(float u, float v,
evalCorner(Far::PatchParam::BitField bits,
float s, float t,
Far::Index const * vertexIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV ) {
float * outDQ1,
float * outDQ2 ) {
// make sure that we have enough space to store results
assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) );
int length = inDesc.length;
float Q[16], dQ1[16], dQ2[16];
Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BSPLINE, bits, s, t,
outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0);
bool evalDeriv = (outDQU or outDQV);
float const * inOffset = inQ + inDesc.offset;
float B[4], D[4],
*BU=(float*)alloca(length*4*sizeof(float)),
*DU=(float*)alloca(length*4*sizeof(float));
outQ += outDesc.offset;
memset(BU, 0, length*4*sizeof(float));
memset(DU, 0, length*4*sizeof(float));
evalCubicBSpline(u, B, evalDeriv ? D : 0);
float const *inOffset = inQ + inDesc.offset;
memset(outQ, 0, inDesc.length*sizeof(float));
if (outDQ1) {
memset(outDQ1, 0, inDesc.length*sizeof(float));
}
if (outDQ2) {
memset(outDQ2, 0, inDesc.length*sizeof(float));
}
// mirror the missing vertices (M)
//
@ -302,7 +308,7 @@ evalCorner(float u, float v,
// |.....|.....| |
// v6 -- v7 -- v8 -- M6
float *M = (float*)alloca(length*7*sizeof(float));
float *M = (float*)alloca(inDesc.length*7*sizeof(float));
float const *v0 = inOffset + vertexIndices[0]*inDesc.stride,
*v1 = inOffset + vertexIndices[1]*inDesc.stride,
@ -314,88 +320,47 @@ evalCorner(float u, float v,
*v8 = inOffset + vertexIndices[8]*inDesc.stride;
for (int k=0; k<inDesc.length; ++k) {
M[0*length+k] = 2.0f*v0[k] - v3[k]; // M0 = 2*v0 - v3
M[1*length+k] = 2.0f*v1[k] - v4[k]; // M0 = 2*v1 - v4
M[2*length+k] = 2.0f*v2[k] - v5[k]; // M1 = 2*v2 - v5
M[0*inDesc.length+k] = 2.0f*v0[k] - v3[k]; // M0 = 2*v0 - v3
M[1*inDesc.length+k] = 2.0f*v1[k] - v4[k]; // M0 = 2*v1 - v4
M[2*inDesc.length+k] = 2.0f*v2[k] - v5[k]; // M1 = 2*v2 - v5
M[4*length+k] = 2.0f*v2[k] - v1[k]; // M4 = 2*v2 - v1
M[5*length+k] = 2.0f*v5[k] - v4[k]; // M5 = 2*v5 - v4
M[6*length+k] = 2.0f*v8[k] - v7[k]; // M6 = 2*v8 - v7
M[4*inDesc.length+k] = 2.0f*v2[k] - v1[k]; // M4 = 2*v2 - v1
M[5*inDesc.length+k] = 2.0f*v5[k] - v4[k]; // M5 = 2*v5 - v4
M[6*inDesc.length+k] = 2.0f*v8[k] - v7[k]; // M6 = 2*v8 - v7
// M3 = 2*M2 - M1
M[3*length+k] = 2.0f*M[2*length+k] - M[1*length+k];
M[3*inDesc.length+k] = 2.0f*M[2*inDesc.length+k] - M[1*inDesc.length+k];
}
for (int i=0; i<4; ++i) {
for (int j=0; j<4; ++j) {
float const * in = NULL;
float const * in = 0;
if (j==0) { // (2)
in = &M[i*inDesc.length];
in = M + i*inDesc.length;
} else if (i==3) {
in = &M[(j+3)*inDesc.length];
in = M + (j+3)*inDesc.length;
} else {
in = inOffset + vertexIndices[i+(j-1)*3]*inDesc.stride;
}
assert(in);
for (int k=0; k<length; ++k) {
BU[i*length+k] += in[k] * B[j];
if (evalDeriv)
DU[i*length+k] += in[k] * D[j];
int idx = j*4+i;
for (int k=0; k<inDesc.length; ++k) {
outQ[k] += Q[idx] * in[k];
if (outDQ1) {
outDQ1[k] += dQ1[idx] * in[k];
}
if (outDQ2) {
outDQ2[k] += dQ2[idx] * in[k];
}
}
}
}
evalCubicBSpline(v, B, evalDeriv ? D : 0);
float * Q = outQ + outDesc.offset,
* dQU = outDQU + outDesc.offset,
* dQV = outDQV + outDesc.offset;
// clear result
memset(Q, 0, length*sizeof(float));
if (evalDeriv) {
memset(dQU, 0, length*sizeof(float));
memset(dQV, 0, length*sizeof(float));
}
for (int i=0; i<4; ++i) {
for (int k=0; k<length; ++k) {
Q[k] += BU[length*i+k] * B[i];
if (evalDeriv) {
dQU[k] += DU[length*i+k] * B[i];
dQV[k] += BU[length*i+k] * D[i];
}
}
}
}
inline void
evalCubicBezier(float u, float B[4], float BU[3]) {
float u2 = u*u,
w0 = 1.0f - u,
w2 = w0 * w0;
B[0] = w0*w2;
B[1] = 3.0f * u * w2;
B[2] = 3.0f * u2 * w0;
B[3] = u*u2;
if (BU) {
BU[0] = w2;
BU[1] = 2.0f * u * w0;
BU[2] = u2;
}
}
void
evalGregoryBasis(float u, float v,
evalGregoryBasis(Far::PatchParam::BitField bits, float u, float v,
Far::StencilTables const & basisStencils,
int stencilIndex,
VertexBufferDescriptor const & inDesc,
@ -409,54 +374,21 @@ evalGregoryBasis(float u, float v,
int length = inDesc.length;
bool evalDeriv = (outDQU or outDQV);
float S[4], T[4], DS[3], DT[3];
evalCubicBezier(u, S, evalDeriv ? DS : 0);
evalCubicBezier(v, T, evalDeriv ? DT : 0);
float BU[16], DU[16], DV[16];
memset(BU, 0, 16*sizeof(float));
for (int i=0; i<4; ++i) {
for (int j=0; j<4; ++j) {
BU[4*i+j] += S[j] * T[i];
}
}
if (evalDeriv) {
memset(DU, 0, 16*sizeof(float));
for (int i=0; i<4; ++i) {
float pw = 0.0f;
for (int j=0; j<3; ++j) {
float w = DS[j] * T[i];
DU[4*i+j] += pw - w;
pw = w;
}
DU[4*i+3]+=pw;
}
memset(DV, 0, 16*sizeof(float));
for (int j=0; j<4; ++j) {
float pw = 0.0f;
for (int i=0; i<3; ++i) {
float w = S[j] * DT[i];
DV[4*i+j] += pw - w;
pw = w;
}
DV[12+j]+=pw;
}
}
Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BEZIER, bits, u, v,
outQ ? BU : 0, outDQU ? DU : 0, outDQV ? DV : 0);
float const *inOffset = inQ + inDesc.offset;
float * Q = outQ + outDesc.offset,
* dQU = outDQU + outDesc.offset,
* dQV = outDQV + outDesc.offset;
float * Q = outQ + outDesc.offset;
// clear result
memset(Q, 0, length*sizeof(float));
if (evalDeriv) {
memset(dQU, 0, length*sizeof(float));
memset(dQV, 0, length*sizeof(float));
if (outDQU) {
memset(outDQU, 0, length*sizeof(float));
}
if (outDQV) {
memset(outDQV, 0, length*sizeof(float));
}
float uu = 1-u,
@ -531,12 +463,15 @@ evalGregoryBasis(float u, float v,
float const * in = inOffset + srcIndices[j]*inDesc.stride;
float w = BU[i] * w0 * srcWeights[j],
dw1 = DU[i] * w0 * srcWeights[j],
dw2 = DV[i] * w0 * srcWeights[j];
dw2 = DV[i] * w0 * srcWeights[
j];
for (int k=0; k<length; ++k) {
Q[k] += in[k] * w;
if (evalDeriv) {
dQU[k] += in[k] * dw1;
dQV[k] += in[k] * dw2;
if (outDQU) {
outDQU[k] += in[k] * dw1;
}
if (outDQV) {
outDQV[k] += in[k] * dw2;
}
}
}
@ -551,9 +486,11 @@ evalGregoryBasis(float u, float v,
dw2 = DV[i] * w1 * srcWeights[j];
for (int k=0; k<length; ++k) {
Q[k] += in[k] * w;
if (evalDeriv) {
dQU[k] += in[k] * dw1;
dQV[k] += in[k] * dw2;
if (outDQU) {
outDQU[k] += in[k] * dw1;
}
if (outDQV) {
outDQV[k] += in[k] * dw2;
}
}
}
@ -570,9 +507,11 @@ evalGregoryBasis(float u, float v,
dw2 = DV[i] * srcWeights[j];
for (int k=0; k<length; ++k) {
Q[k] += in[k] * w;
if (evalDeriv) {
dQU[k] += in[k] * dw1;
dQV[k] += in[k] * dw2;
if (outDQU) {
outDQU[k] += in[k] * dw1;
}
if (outDQV) {
outDQV[k] += in[k] * dw2;
}
}
}
@ -580,7 +519,6 @@ evalGregoryBasis(float u, float v,
}
}
/*
static float ef[7] = {
0.813008f, 0.500000f, 0.363636f, 0.287505f,
@ -597,29 +535,6 @@ static float ef[27] = {
0.0569311f, 0.0548745f, 0.0529621f
};
inline void
univar4x4(float u, float B[4], float D[4]) {
float t = u;
float s = 1.0f - u;
float A0 = s * s;
float A1 = 2 * s * t;
float A2 = t * t;
B[0] = s * A0;
B[1] = t * A0 + s * A1;
B[2] = t * A1 + s * A2;
B[3] = t * A2;
if (D) {
D[0] = - A0;
D[1] = A0 - A1;
D[2] = A1 - A2;
D[3] = A2;
}
}
inline float
csf(Far::Index n, Far::Index j) {
if (j%2 == 0) {
@ -631,7 +546,7 @@ csf(Far::Index n, Far::Index j) {
void
evalGregory(float u, float v,
evalGregory(Far::PatchParam::BitField bits, float u, float v,
Far::Index const * vertexIndices,
Far::Index const * vertexValenceBuffer,
unsigned int const * quadOffsetBuffer,
@ -640,16 +555,12 @@ evalGregory(float u, float v,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV ) {
// vertex
float * outDQ1,
float * outDQ2 ) {
// make sure that we have enough space to store results
assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) );
bool evalDeriv = (outDQU or outDQV);
int valences[4], length=inDesc.length;
float const * inOffset = inQ + inDesc.offset;
@ -725,8 +636,6 @@ evalGregory(float u, float v,
}
}
// tess control
// Control Vertices based on :
// "Approximating Subdivision Surfaces with Gregory Patches for Hardware Tessellation"
// Loop, Schaefer, Ni, Castafio (ACM ToG Siggraph Asia 2009)
@ -839,49 +748,32 @@ evalGregory(float u, float v,
memcpy(q+14*length, p[11], length*sizeof(float));
memcpy(q+15*length, p[10], length*sizeof(float));
float B[4], D[4],
*BU=(float*)alloca(inDesc.length*4*sizeof(float)),
*DU=(float*)alloca(inDesc.length*4*sizeof(float));
memset(BU, 0, inDesc.length*4*sizeof(float));
memset(DU, 0, inDesc.length*4*sizeof(float));
float Q[16], dQ1[16], dQ2[16];
Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BEZIER, bits, u, v,
outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0);
univar4x4(u, B, evalDeriv ? D : 0);
outQ += outDesc.offset;
for (int i=0; i<4; ++i) {
for (int j=0; j<4; ++j) {
float const * in = q + (i+j*4)*length;
for (int k=0; k<inDesc.length; ++k) {
BU[i*inDesc.length+k] += in[k] * B[j];
if (evalDeriv)
DU[i*inDesc.length+k] += in[k] * D[j];
}
}
memset(outQ, 0, inDesc.length*sizeof(float));
if (outDQ1) {
memset(outDQ1, 0, inDesc.length*sizeof(float));
}
if (outDQ2) {
memset(outDQ2, 0, inDesc.length*sizeof(float));
}
univar4x4(v, B, evalDeriv ? D : 0);
float * Q = outQ + outDesc.offset;
float * dQU = outDQU + outDesc.offset;
float * dQV = outDQV + outDesc.offset;
for (int i=0; i<16; ++i) {
// clear result
memset(Q, 0, outDesc.length*sizeof(float));
if (evalDeriv) {
memset(dQU, 0, outDesc.length*sizeof(float));
memset(dQV, 0, outDesc.length*sizeof(float));
}
float const * in = q + i*length;
for (int i=0; i<4; ++i) {
for (int k=0; k<inDesc.length; ++k) {
Q[k] += BU[inDesc.length*i+k] * B[i];
if (evalDeriv) {
dQU[k] += DU[inDesc.length*i+k] * B[i];
dQV[k] += BU[inDesc.length*i+k] * D[i];
outQ[k] += Q[i] * in[k];
if (outDQ1) {
outDQ1[k] += dQ1[i] * in[k];
}
if (outDQ2) {
outDQ2[k] += dQ2[i] * in[k];
}
}
}
@ -889,7 +781,7 @@ evalGregory(float u, float v,
void
evalGregoryBoundary(float u, float v,
evalGregoryBoundary(Far::PatchParam::BitField bits, float u, float v,
Far::Index const * vertexIndices,
Far::Index const * vertexValenceBuffer,
unsigned int const * quadOffsetBuffer,
@ -898,16 +790,14 @@ evalGregoryBoundary(float u, float v,
float const * inQ,
VertexBufferDescriptor const & outDesc,
float * outQ,
float * outDQU,
float * outDQV ) {
float * outDQ1,
float * outDQ2 ) {
// vertex
// make sure that we have enough space to store results
assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) );
bool evalDeriv = (outDQU or outDQV);
int valences[4], zerothNeighbors[4], length=inDesc.length;
float const * inOffset = inQ + inDesc.offset;
@ -1245,49 +1135,32 @@ evalGregoryBoundary(float u, float v,
memcpy(q+14*length, p[11], length*sizeof(float));
memcpy(q+15*length, p[10], length*sizeof(float));
float B[4], D[4],
*BU=(float*)alloca(inDesc.length*4*sizeof(float)),
*DU=(float*)alloca(inDesc.length*4*sizeof(float));
memset(BU, 0, inDesc.length*4*sizeof(float));
memset(DU, 0, inDesc.length*4*sizeof(float));
float Q[16], dQ1[16], dQ2[16];
Far::PatchTables::GetBasisWeights(Far::PatchTables::BASIS_BEZIER, bits, u, v,
outQ ? Q : 0, outDQ1 ? dQ1 : 0, outDQ2 ? dQ2 : 0);
univar4x4(u, B, evalDeriv ? D : 0);
outQ += outDesc.offset;
for (int i=0; i<4; ++i) {
for (int j=0; j<4; ++j) {
float const * in = q + (i+j*4)*length;
for (int k=0; k<inDesc.length; ++k) {
BU[i*inDesc.length+k] += in[k] * B[j];
if (evalDeriv)
DU[i*inDesc.length+k] += in[k] * D[j];
}
}
memset(outQ, 0, inDesc.length*sizeof(float));
if (outDQ1) {
memset(outDQ1, 0, inDesc.length*sizeof(float));
}
if (outDQ2) {
memset(outDQ2, 0, inDesc.length*sizeof(float));
}
univar4x4(v, B, evalDeriv ? D : 0);
float * Q = outQ + outDesc.offset;
float * dQU = outDQU + outDesc.offset;
float * dQV = outDQV + outDesc.offset;
for (int i=0; i<16; ++i) {
// clear result
memset(Q, 0, outDesc.length*sizeof(float));
if (evalDeriv) {
memset(dQU, 0, outDesc.length*sizeof(float));
memset(dQV, 0, outDesc.length*sizeof(float));
}
float const * in = q + i*length;
for (int i=0; i<4; ++i) {
for (int k=0; k<inDesc.length; ++k) {
Q[k] += BU[inDesc.length*i+k] * B[i];
if (evalDeriv) {
dQU[k] += DU[inDesc.length*i+k] * B[i];
dQV[k] += BU[inDesc.length*i+k] * D[i];
outQ[k] += Q[i] * in[k];
if (outDQ1) {
outDQ1[k] += dQ1[i] * in[k];
}
if (outDQ2) {
outDQ2[k] += dQ2[i] * in[k];
}
}
}

View File

@ -28,6 +28,7 @@
#include "../version.h"
#include "../osd/vertexDescriptor.h"
#include "../far/patchParam.h"
#include "../far/types.h"
@ -49,7 +50,8 @@ evalBilinear(float u, float v,
float * outQ);
void
evalBSpline(float u, float v,
evalBSpline(Far::PatchParam::BitField bits,
float u, float v,
Far::Index const * vertexIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
@ -59,7 +61,8 @@ evalBSpline(float u, float v,
float * outDQV );
void
evalBoundary(float u, float v,
evalBoundary(Far::PatchParam::BitField bits,
float u, float v,
Far::Index const * vertexIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
@ -69,7 +72,8 @@ evalBoundary(float u, float v,
float * outDQV );
void
evalCorner(float u, float v,
evalCorner(Far::PatchParam::BitField bits,
float u, float v,
Far::Index const * vertexIndices,
VertexBufferDescriptor const & inDesc,
float const * inQ,
@ -79,7 +83,7 @@ evalCorner(float u, float v,
float * outDQV );
void
evalGregoryBasis(float u, float v,
evalGregoryBasis(Far::PatchParam::BitField bits, float u, float v,
Far::StencilTables const & basisStencils,
int stencilIndex,
VertexBufferDescriptor const & inDesc,
@ -90,7 +94,7 @@ evalGregoryBasis(float u, float v,
float * outDQV );
void
evalGregory(float u, float v,
evalGregory(Far::PatchParam::BitField bits, float u, float v,
Far::Index const * vertexIndices,
Far::Index const * vertexValenceBuffer,
unsigned int const * quadOffsetBuffer,
@ -103,7 +107,7 @@ evalGregory(float u, float v,
float * outDQV );
void
evalGregoryBoundary(float u, float v,
evalGregoryBoundary(Far::PatchParam::BitField bits, float u, float v,
Far::Index const * vertexIndices,
Far::Index const * vertexValenceBuffer,
unsigned int const * quadOffsetBuffer,