Added a common patchBasis implementation for Osd

This is used to compute patch basis weights for
the Osd::*Evaluator classes that are unable to
use the C++ implementation from far/patchBasis.h,
e.g. the GLSL, HLSL, OpenCL, and CUDA kernels.

Instead of duplicating this code for each different
kernel language, we share a single implementation
which is minimally adapted to accommodate specific
language restrictions and syntax.

This implementation can also be used by client
shader code executed while drawing, e.g. to
compute patch basis weights for evaluating varying
and face-varying patches.
This commit is contained in:
David G. Yu 2016-09-29 09:53:40 -07:00
parent 5e83b995d1
commit 552331f6d9
6 changed files with 566 additions and 0 deletions

View File

@ -52,6 +52,10 @@ set(PUBLIC_HEADER_FILES
types.h
)
list(APPEND KERNEL_FILES
patchBasisCommon.h
)
set(DOXY_HEADER_FILES ${PUBLIC_HEADER_FILES})
#-------------------------------------------------------------------------------

View File

@ -34,6 +34,9 @@ namespace Osd {
static const char *commonShaderSource =
#include "glslPatchCommon.gen.h"
;
static const char *patchBasisShaderSource =
#include "patchBasisCommon.gen.h"
;
static const char *bsplineShaderSource =
#include "glslPatchBSpline.gen.h"
;
@ -50,6 +53,12 @@ GLSLPatchShaderSource::GetCommonShaderSource() {
return std::string(commonShaderSource);
}
/*static*/
std::string
GLSLPatchShaderSource::GetPatchBasisShaderSource() {
return std::string(patchBasisShaderSource);
}
/*static*/
std::string
GLSLPatchShaderSource::GetVertexShaderSource(Far::PatchDescriptor::Type type) {

View File

@ -38,6 +38,8 @@ class GLSLPatchShaderSource {
public:
static std::string GetCommonShaderSource();
static std::string GetPatchBasisShaderSource();
static std::string GetVertexShaderSource(
Far::PatchDescriptor::Type type);

View File

@ -36,6 +36,9 @@ namespace Osd {
static const char *commonShaderSource =
#include "hlslPatchCommon.gen.h"
;
static const char *patchBasisShaderSource =
#include "patchBasisCommon.gen.h"
;
static const char *bsplineShaderSource =
#include "hlslPatchBSpline.gen.h"
;
@ -52,6 +55,12 @@ HLSLPatchShaderSource::GetCommonShaderSource() {
return std::string(commonShaderSource);
}
/*static*/
std::string
HLSLPatchShaderSource::GetPatchBasisShaderSource() {
return std::string(patchBasisShaderSource);
}
/*static*/
std::string
HLSLPatchShaderSource::GetVertexShaderSource(Far::PatchDescriptor::Type type) {

View File

@ -38,6 +38,8 @@ class HLSLPatchShaderSource {
public:
static std::string GetCommonShaderSource();
static std::string GetPatchBasisShaderSource();
static std::string GetVertexShaderSource(Far::PatchDescriptor::Type type);
static std::string GetHullShaderSource(Far::PatchDescriptor::Type type);

View File

@ -0,0 +1,540 @@
//
// Copyright 2016 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_PATCH_BASIS_COMMON_H
#define OPENSUBDIV3_OSD_PATCH_BASIS_COMMON_H
#if defined(OSD_PATCH_BASIS_GLSL)
#define OSD_FUNCTION_STORAGE_CLASS
#define OSD_DATA_STORAGE_CLASS
#define OSD_OPTIONAL(a) true
#define OSD_OPTIONAL_INIT(a,b) b
#define OSD_OUT out
#define OSD_INOUT inout
#define OSD_ARRAY_8(elementType,a0,a1,a2,a3,a4,a5,a6,a7) \
elementType[](a0,a1,a2,a3,a4,a5,a6,a7)
#define OSD_ARRAY_12(elementType,a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11) \
elementType[](a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11)
#elif defined(OSD_PATCH_BASIS_HLSL)
#define OSD_FUNCTION_STORAGE_CLASS
#define OSD_DATA_STORAGE_CLASS
#define OSD_OPTIONAL(a) true
#define OSD_OPTIONAL_INIT(a,b) b
#define OSD_OUT out
#define OSD_INOUT inout
#define OSD_ARRAY_8(elementType,a0,a1,a2,a3,a4,a5,a6,a7) \
{a0,a1,a2,a3,a4,a5,a6,a7}
#define OSD_ARRAY_12(elementType,a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11) \
{a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11}
#elif defined(OSD_PATCH_BASIS_CUDA)
#define OSD_FUNCTION_STORAGE_CLASS __device__
#define OSD_DATA_STORAGE_CLASS static
#define OSD_OPTIONAL(a) true
#define OSD_OPTIONAL_INIT(a,b) b
#define OSD_OUT
#define OSD_INOUT
#define OSD_ARRAY_8(elementType,a0,a1,a2,a3,a4,a5,a6,a7) \
{a0,a1,a2,a3,a4,a5,a6,a7}
#define OSD_ARRAY_12(elementType,a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11) \
{a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11}
#elif defined(OSD_PATCH_BASIS_OPENCL)
#define OSD_FUNCTION_STORAGE_CLASS static
#define OSD_DATA_STORAGE_CLASS
#define OSD_OPTIONAL(a) true
#define OSD_OPTIONAL_INIT(a,b) b
#define OSD_OUT
#define OSD_INOUT
#define OSD_ARRAY_8(elementType,a0,a1,a2,a3,a4,a5,a6,a7) \
{a0,a1,a2,a3,a4,a5,a6,a7}
#define OSD_ARRAY_12(elementType,a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11) \
{a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11}
#else
#define OSD_FUNCTION_STORAGE_CLASS static inline
#define OSD_DATA_STORAGE_CLASS static
#define OSD_OPTIONAL(a) (a)
#define OSD_OPTIONAL_INIT(a,b) (a ? b : 0)
#define OSD_OUT
#define OSD_INOUT
#define OSD_ARRAY_8(elementType,a0,a1,a2,a3,a4,a5,a6,a7) \
{a0,a1,a2,a3,a4,a5,a6,a7}
#define OSD_ARRAY_12(elementType,a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11) \
{a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11}
#endif
OSD_FUNCTION_STORAGE_CLASS
void
OsdGetBezierWeights(
float t, OSD_OUT float wP[4], OSD_OUT float wDP[4], OSD_OUT float wDP2[4]) {
// The four uniform cubic Bezier basis functions (in terms of t and its
// complement tC) evaluated at t:
float t2 = t*t;
float tC = 1.0f - t;
float tC2 = tC * tC;
wP[0] = tC2 * tC;
wP[1] = tC2 * t * 3.0f;
wP[2] = t2 * tC * 3.0f;
wP[3] = t2 * t;
// Derivatives of the above four basis functions at t:
if (OSD_OPTIONAL(wDP)) {
wDP[0] = -3.0f * tC2;
wDP[1] = 9.0f * t2 - 12.0f * t + 3.0f;
wDP[2] = -9.0f * t2 + 6.0f * t;
wDP[3] = 3.0f * t2;
}
// Second derivatives of the basis functions at t:
if (OSD_OPTIONAL(wDP2)) {
wDP2[0] = 6.0f * tC;
wDP2[1] = 18.0f * t - 12.0f;
wDP2[2] = -18.0f * t + 6.0f;
wDP2[3] = 6.0f * t;
}
}
OSD_FUNCTION_STORAGE_CLASS
void
OsdGetBSplineWeights(
float t, OSD_OUT float wP[4], OSD_OUT float wDP[4], OSD_OUT float wDP2[4]) {
// The four uniform cubic B-Spline basis functions evaluated at t:
const float one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
wP[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
wP[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
wP[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
wP[3] = one6th * ( t3);
// Derivatives of the above four basis functions at t:
if (OSD_OPTIONAL(wDP)) {
wDP[0] = -0.5f*t2 + t - 0.5f;
wDP[1] = 1.5f*t2 - 2.0f*t;
wDP[2] = -1.5f*t2 + t + 0.5f;
wDP[3] = 0.5f*t2;
}
// Second derivatives of the basis functions at t:
if (OSD_OPTIONAL(wDP2)) {
wDP2[0] = - t + 1.0f;
wDP2[1] = 3.0f * t - 2.0f;
wDP2[2] = -3.0f * t + 1.0f;
wDP2[3] = t;
}
}
OSD_FUNCTION_STORAGE_CLASS
void
OsdGetBoxSplineWeights(float v, float w, OSD_OUT float wP[12]) {
float u = 1.0f - v - w;
//
// The 12 basis functions of the quartic box spline (unscaled by their common
// factor of 1/12 until later, and formatted to make it easy to spot any
// typing errors):
//
// 15 terms for the 3 points above the triangle corners
// 9 terms for the 3 points on faces opposite the triangle edges
// 2 terms for the 6 points on faces opposite the triangle corners
//
// Powers of each variable for notational convenience:
float u2 = u*u;
float u3 = u*u2;
float u4 = u*u3;
float v2 = v*v;
float v3 = v*v2;
float v4 = v*v3;
float w2 = w*w;
float w3 = w*w2;
float w4 = w*w3;
// And now the basis functions:
wP[ 0] = u4 + 2.0f*u3*v;
wP[ 1] = u4 + 2.0f*u3*w;
wP[ 8] = w4 + 2.0f*w3*u;
wP[11] = w4 + 2.0f*w3*v;
wP[ 9] = v4 + 2.0f*v3*w;
wP[ 5] = v4 + 2.0f*v3*u;
wP[ 2] = u4 + 2.0f*u3*w + 6.0f*u3*v + 6.0f*u2*v*w + 12.0f*u2*v2 +
v4 + 2.0f*v3*w + 6.0f*v3*u + 6.0f*v2*u*w;
wP[ 4] = w4 + 2.0f*w3*v + 6.0f*w3*u + 6.0f*w2*u*v + 12.0f*w2*u2 +
u4 + 2.0f*u3*v + 6.0f*u3*w + 6.0f*u2*v*w;
wP[10] = v4 + 2.0f*v3*u + 6.0f*v3*w + 6.0f*v2*w*u + 12.0f*v2*w2 +
w4 + 2.0f*w3*u + 6.0f*w3*v + 6.0f*w3*u*v;
wP[ 3] = v4 + 6*v3*w + 8*v3*u + 36*v2*w*u + 24*v2*u2 + 24*v*u3 +
w4 + 6*w3*v + 8*w3*u + 36*w2*v*u + 24*w2*u2 + 24*w*u3 + 6*u4 + 60*u2*v*w + 12*v2*w2;
wP[ 6] = w4 + 6*w3*u + 8*w3*v + 36*w2*u*v + 24*w2*v2 + 24*w*v3 +
u4 + 6*u3*w + 8*u3*v + 36*u2*v*w + 24*u2*v2 + 24*u*v3 + 6*v4 + 60*v2*w*u + 12*w2*u2;
wP[ 7] = u4 + 6*u3*v + 8*u3*w + 36*u2*v*w + 24*u2*w2 + 24*u*w3 +
v4 + 6*v3*u + 8*v3*w + 36*v2*u*w + 24*v2*w2 + 24*v*w3 + 6*w4 + 60*w2*u*v + 12*u2*v2;
for (int i = 0; i < 12; ++i) {
wP[i] *= 1.0f / 12.0f;
}
}
OSD_FUNCTION_STORAGE_CLASS
void
OsdGetBilinearPatchWeights(
float s, float t, float dScale,
OSD_OUT float wP[4], OSD_OUT float wDs[4], OSD_OUT float wDt[4],
OSD_OUT float wDss[4], OSD_OUT float wDst[4], OSD_OUT float wDtt[4]) {
float sC = 1.0f - s,
tC = 1.0f - t;
if (OSD_OPTIONAL(wP)) {
wP[0] = sC * tC;
wP[1] = s * tC;
wP[2] = s * t;
wP[3] = sC * t;
}
if (OSD_OPTIONAL(derivS && derivT)) {
wDs[0] = -tC * dScale;
wDs[1] = tC * dScale;
wDs[2] = t * dScale;
wDs[3] = -t * dScale;
wDt[0] = -sC * dScale;
wDt[1] = -s * dScale;
wDt[2] = s * dScale;
wDt[3] = sC * dScale;
if (OSD_OPTIONAL(derivSS && derivST && derivTT)) {
float d2Scale = dScale * dScale;
for(int i=0;i<4;i++) {
wDss[i] = 0;
wDtt[i] = 0;
}
wDst[0] = d2Scale;
wDst[1] = -d2Scale;
wDst[2] = -d2Scale;
wDst[3] = d2Scale;
}
}
}
OSD_FUNCTION_STORAGE_CLASS
void OsdAdjustBoundaryWeights(
int boundary,
OSD_INOUT float sWeights[4], OSD_INOUT float tWeights[4]) {
if ((boundary & 1) != 0) {
tWeights[2] -= tWeights[0];
tWeights[1] += 2*tWeights[0];
tWeights[0] = 0;
}
if ((boundary & 2) != 0) {
sWeights[1] -= sWeights[3];
sWeights[2] += 2*sWeights[3];
sWeights[3] = 0;
}
if ((boundary & 4) != 0) {
tWeights[1] -= tWeights[3];
tWeights[2] += 2*tWeights[3];
tWeights[3] = 0;
}
if ((boundary & 8) != 0) {
sWeights[2] -= sWeights[0];
sWeights[1] += 2*sWeights[0];
sWeights[0] = 0;
}
}
OSD_FUNCTION_STORAGE_CLASS
void OsdComputeTensorProductPatchWeights(float dScale, int boundary,
float sWeights[4], float tWeights[4],
float dsWeights[4], float dtWeights[4],
float dssWeights[4], float dttWeights[4],
OSD_OUT float wP[16], OSD_OUT float wDs[16], OSD_OUT float wDt[16],
OSD_OUT float wDss[16], OSD_OUT float wDst[16], OSD_OUT float wDtt[16]) {
if (OSD_OPTIONAL(wP)) {
// Compute the tensor product weight of the (s,t) basis function
// corresponding to each control vertex:
OsdAdjustBoundaryWeights(boundary, sWeights, tWeights);
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
wP[4*i+j] = sWeights[j] * tWeights[i];
}
}
}
if (OSD_OPTIONAL(derivS && derivT)) {
// Compute the tensor product weight of the differentiated (s,t) basis
// function corresponding to each control vertex (scaled accordingly):
OsdAdjustBoundaryWeights(boundary, dsWeights, dtWeights);
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
wDs[4*i+j] = dsWeights[j] * tWeights[i] * dScale;
wDt[4*i+j] = sWeights[j] * dtWeights[i] * dScale;
}
}
if (OSD_OPTIONAL(derivSS && derivST && derivTT)) {
// Compute the tensor product weight of appropriate differentiated
// (s,t) basis functions for each control vertex (scaled accordingly):
float d2Scale = dScale * dScale;
OsdAdjustBoundaryWeights(boundary, dssWeights, dttWeights);
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 4; ++j) {
wDss[4*i+j] = dssWeights[j] * tWeights[i] * d2Scale;
wDst[4*i+j] = dsWeights[j] * dtWeights[i] * d2Scale;
wDtt[4*i+j] = sWeights[j] * dttWeights[i] * d2Scale;
}
}
}
}
}
OSD_FUNCTION_STORAGE_CLASS
void OsdGetBezierPatchWeights(
float s, float t, float dScale,
OSD_OUT float wP[16], OSD_OUT float wDS[16], OSD_OUT float wDT[16],
OSD_OUT float wDSS[16], OSD_OUT float wDST[16], OSD_OUT float wDTT[16]) {
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4], dssWeights[4], dttWeights[4];
OsdGetBezierWeights(s, OSD_OPTIONAL_INIT(wP, sWeights), OSD_OPTIONAL_INIT(wDS, dsWeights), OSD_OPTIONAL_INIT(wDSS, dssWeights));
OsdGetBezierWeights(t, OSD_OPTIONAL_INIT(wP, tWeights), OSD_OPTIONAL_INIT(wDT, dtWeights), OSD_OPTIONAL_INIT(wDTT, dttWeights));
OsdComputeTensorProductPatchWeights(dScale, /*boundary=*/0, sWeights, tWeights, dsWeights, dtWeights, dssWeights, dttWeights, wP, wDS, wDT, wDSS, wDST, wDTT);
}
OSD_FUNCTION_STORAGE_CLASS
void OsdGetBSplinePatchWeights(
float s, float t, float dScale, int boundary,
OSD_OUT float wP[16], OSD_OUT float wDs[16], OSD_OUT float wDt[16],
OSD_OUT float wDss[16], OSD_OUT float wDst[16], OSD_OUT float wDtt[16]) {
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4], dssWeights[4], dttWeights[4];
OsdGetBSplineWeights(s, sWeights, OSD_OPTIONAL_INIT(wDS, dsWeights), OSD_OPTIONAL_INIT(wDSS, dssWeights));
OsdGetBSplineWeights(t, tWeights, OSD_OPTIONAL_INIT(wDT, dtWeights), OSD_OPTIONAL_INIT(wDTT, dttWeights));
OsdComputeTensorProductPatchWeights(dScale, boundary, sWeights, tWeights, dsWeights, dtWeights, dssWeights, dttWeights, wP, wDs, wDt, wDss, wDst, wDtt);
}
OSD_FUNCTION_STORAGE_CLASS
void OsdGetGregoryPatchWeights(
float s, float t, float dScale,
OSD_OUT float wP[20], OSD_OUT float wDs[20], OSD_OUT float wDt[20],
OSD_OUT float wDss[20], OSD_OUT float wDst[20], OSD_OUT float wDtt[20]) {
//
// P3 e3- e2+ P2
// 15------17-------11--------10
// | | | |
// | | | |
// | | f3- | f2+ |
// | 19 13 |
// e3+ 16-----18 14-----12 e2-
// | f3+ f2- |
// | |
// | |
// | f0- f1+ |
// e0- 2------4 8------6 e1+
// | 3 9 |
// | | f0+ | f1- |
// | | | |
// | | | |
// O--------1--------7--------5
// P0 e0+ e1- P1
//
// Indices of boundary and interior points and their corresponding Bezier points
// (this can be reduced with more direct indexing and unrolling of loops):
//
OSD_DATA_STORAGE_CLASS const int boundaryGregory[12] = OSD_ARRAY_12(int, 0, 1, 7, 5, 2, 6, 16, 12, 15, 17, 11, 10 );
OSD_DATA_STORAGE_CLASS const int boundaryBezSCol[12] = OSD_ARRAY_12(int, 0, 1, 2, 3, 0, 3, 0, 3, 0, 1, 2, 3 );
OSD_DATA_STORAGE_CLASS const int boundaryBezTRow[12] = OSD_ARRAY_12(int, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 );
OSD_DATA_STORAGE_CLASS const int interiorGregory[8] = OSD_ARRAY_8(int, 3, 4, 8, 9, 13, 14, 18, 19 );
OSD_DATA_STORAGE_CLASS const int interiorBezSCol[8] = OSD_ARRAY_8(int, 1, 1, 2, 2, 2, 2, 1, 1 );
OSD_DATA_STORAGE_CLASS const int interiorBezTRow[8] = OSD_ARRAY_8(int, 1, 1, 1, 1, 2, 2, 2, 2 );
//
// Bezier basis functions are denoted with B while the rational multipliers for the
// interior points will be denoted G -- so we have B(s), B(t) and G(s,t):
//
// Directional Bezier basis functions B at s and t:
float Bs[4], Bds[4], Bdss[4];
float Bt[4], Bdt[4], Bdtt[4];
OsdGetBezierWeights(s, Bs, OSD_OPTIONAL_INIT(wDs, Bds), OSD_OPTIONAL_INIT(wDss, Bdss));
OsdGetBezierWeights(t, Bt, OSD_OPTIONAL_INIT(wDt, Bdt), OSD_OPTIONAL_INIT(wDtt, Bdtt));
// Rational multipliers G at s and t:
float sC = 1.0f - s;
float tC = 1.0f - t;
// Use <= here to avoid compiler warnings -- the sums should always be non-negative:
float df0 = s + t; df0 = (df0 <= 0.0f) ? 1.0f : (1.0f / df0);
float df1 = sC + t; df1 = (df1 <= 0.0f) ? 1.0f : (1.0f / df1);
float df2 = sC + tC; df2 = (df2 <= 0.0f) ? 1.0f : (1.0f / df2);
float df3 = s + tC; df3 = (df3 <= 0.0f) ? 1.0f : (1.0f / df3);
float G[8] = OSD_ARRAY_8(float, s*df0, t*df0, t*df1, sC*df1, sC*df2, tC*df2, tC*df3, s*df3 );
// Combined weights for boundary and interior points:
for (int i = 0; i < 12; ++i) {
wP[boundaryGregory[i]] = Bs[boundaryBezSCol[i]] * Bt[boundaryBezTRow[i]];
}
for (int i = 0; i < 8; ++i) {
wP[interiorGregory[i]] = Bs[interiorBezSCol[i]] * Bt[interiorBezTRow[i]] * G[i];
}
//
// For derivatives, the basis functions for the interior points are rational and ideally
// require appropriate differentiation, i.e. product rule for the combination of B and G
// and the quotient rule for the rational G itself. As initially proposed by Loop et al
// though, the approximation using the 16 Bezier points arising from the G(s,t) has
// proved adequate (and is what the GPU shaders use) so we continue to use that here.
//
// An implementation of the true derivatives is provided for future reference -- it is
// unclear if the approximations will hold up under surface analysis involving higher
// order differentiation.
//
if (OSD_OPTIONAL(wDs && wDt)) {
bool find_second_partials = OSD_OPTIONAL(wDs && wDst && wDtt);
// Remember to include derivative scaling in all assignments below:
float d2Scale = dScale * dScale;
// Combined weights for boundary points -- simple (scaled) tensor products:
for (int i = 0; i < 12; ++i) {
int iDst = boundaryGregory[i];
int tRow = boundaryBezTRow[i];
int sCol = boundaryBezSCol[i];
wDs[iDst] = Bds[sCol] * Bt[tRow] * dScale;
wDt[iDst] = Bdt[tRow] * Bs[sCol] * dScale;
if (find_second_partials) {
wDss[iDst] = Bdss[sCol] * Bt[tRow] * d2Scale;
wDst[iDst] = Bds[sCol] * Bdt[tRow] * d2Scale;
wDtt[iDst] = Bs[sCol] * Bdtt[tRow] * d2Scale;
}
}
//#define _USE_BEZIER_PSEUDO_DERIVATIVES
// dclyde's note: skipping half of the product rule like this does seem to change the result a lot in my tests.
// This is not a runtime bottleneck for cloth sims anyway so I'm just using the accurate version.
#ifdef _USE_BEZIER_PSEUDO_DERIVATIVES
// Approximation to the true Gregory derivatives by differentiating the Bezier patch
// unique to the given (s,t), i.e. having F = (g^+ * f^+) + (g^- * f^-) as its four
// interior points:
//
// Combined weights for interior points -- (scaled) tensor products with G+ or G-:
for (int i = 0; i < 8; ++i) {
int iDst = interiorGregory[i];
int tRow = interiorBezTRow[i];
int sCol = interiorBezSCol[i];
wDs[iDst] = Bds[sCol] * Bt[tRow] * G[i] * dScale;
wDt[iDst] = Bdt[tRow] * Bs[sCol] * G[i] * dScale;
if (find_second_partials) {
wDss[iDst] = Bdss[sCol] * Bt[tRow] * G[i] * d2Scale;
wDst[iDst] = Bds[sCol] * Bdt[tRow] * G[i] * d2Scale;
wDtt[iDst] = Bs[sCol] * Bdtt[tRow] * G[i] * d2Scale;
}
}
#else
// True Gregory derivatives using appropriate differentiation of composite functions:
//
// Note that for G(s,t) = N(s,t) / D(s,t), all N' and D' are trivial constants (which
// simplifies things for higher order derivatives). And while each pair of functions
// G (i.e. the G+ and G- corresponding to points f+ and f-) must sum to 1 to ensure
// Bezier equivalence (when f+ = f-), the pairs of G' must similarly sum to 0. So we
// can potentially compute only one of the pair and negate the result for the other
// (and with 4 or 8 computations involving these constants, this is all very SIMD
// friendly...) but for now we treat all 8 independently for simplicity.
//
//float N[8] = OSD_ARRAY_8(float, s, t, t, sC, sC, tC, tC, s );
float D[8] = OSD_ARRAY_8(float, df0, df0, df1, df1, df2, df2, df3, df3 );
OSD_DATA_STORAGE_CLASS const float Nds[8] = OSD_ARRAY_8(float, 1.0f, 0.0f, 0.0f, -1.0f, -1.0f, 0.0f, 0.0f, 1.0f );
OSD_DATA_STORAGE_CLASS const float Ndt[8] = OSD_ARRAY_8(float, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, -1.0f, -1.0f, 0.0f );
OSD_DATA_STORAGE_CLASS const float Dds[8] = OSD_ARRAY_8(float, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f );
OSD_DATA_STORAGE_CLASS const float Ddt[8] = OSD_ARRAY_8(float, 1.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f );
// Combined weights for interior points -- (scaled) combinations of B, B', G and G':
for (int i = 0; i < 8; ++i) {
int iDst = interiorGregory[i];
int tRow = interiorBezTRow[i];
int sCol = interiorBezSCol[i];
// Quotient rule for G' (re-expressed in terms of G to simplify (and D = 1/D)):
float Gds = (Nds[i] - Dds[i] * G[i]) * D[i];
float Gdt = (Ndt[i] - Ddt[i] * G[i]) * D[i];
// Product rule combining B and B' with G and G' (and scaled):
wDs[iDst] = (Bds[sCol] * G[i] + Bs[sCol] * Gds) * Bt[tRow] * dScale;
wDt[iDst] = (Bdt[tRow] * G[i] + Bt[tRow] * Gdt) * Bs[sCol] * dScale;
if (find_second_partials) {
float Dsqr_inv = D[i]*D[i];
float Gdss = 2.0f * Dds[i] * Dsqr_inv * (G[i] * Dds[i] - Nds[i]);
float Gdst = Dsqr_inv * (2.0f * G[i] * Dds[i] * Ddt[i] - Nds[i] * Ddt[i] - Ndt[i] * Dds[i]);
float Gdtt = 2.0f * Ddt[i] * Dsqr_inv * (G[i] * Ddt[i] - Ndt[i]);
wDss[iDst] = (Bdss[sCol] * G[i] + 2.0f * Bds[sCol] * Gds + Bs[sCol] * Gdss) * Bt[tRow] * d2Scale;
wDst[iDst] = (Bt[tRow] * (Bs[sCol] * Gdst + Bds[sCol] * Gdt) + Bdt[tRow] * (Bds[sCol] * G[i] + Bs[sCol] * Gds)) * d2Scale;
wDtt[iDst] = (Bdtt[tRow] * G[i] + 2.0f * Bdt[tRow] * Gdt + Bt[tRow] * Gdtt) * Bs[sCol] * d2Scale;
}
}
#endif
}
}
#endif /* OPENSUBDIV3_OSD_PATCH_BASIS_COMMON_H */