Added missing Osd Evaluator methods for deriv eval

Now that Far::LimitStencilTable and Far::PatchTable
support evaluation of 1st and 2nd derivatives the
Osd Evaluator API for evaluating stencils and patches
has been updated to match.
This commit is contained in:
David G Yu 2017-01-26 14:36:30 -08:00
parent 432b6b235f
commit fe38ad8cda
26 changed files with 8482 additions and 242 deletions

View File

@ -49,6 +49,10 @@ static const char *patchBasisSource =
template <class T> cl_mem
createCLBuffer(std::vector<T> const & src, cl_context clContext) {
if (src.empty()) {
return NULL;
}
cl_int errNum = 0;
cl_mem devicePtr = clCreateBuffer(clContext,
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
@ -76,9 +80,11 @@ CLStencilTable::CLStencilTable(Far::StencilTable const *stencilTable,
clContext);
_weights = createCLBuffer(stencilTable->GetWeights(), clContext);
_duWeights = _dvWeights = NULL;
_duuWeights = _duvWeights = _dvvWeights = NULL;
} else {
_sizes = _offsets = _indices = _weights = NULL;
_duWeights = _dvWeights = NULL;
_duuWeights = _duvWeights = _dvvWeights = NULL;
}
}
@ -96,9 +102,16 @@ CLStencilTable::CLStencilTable(Far::LimitStencilTable const *limitStencilTable,
limitStencilTable->GetDuWeights(), clContext);
_dvWeights = createCLBuffer(
limitStencilTable->GetDvWeights(), clContext);
_duuWeights = createCLBuffer(
limitStencilTable->GetDuuWeights(), clContext);
_duvWeights = createCLBuffer(
limitStencilTable->GetDuvWeights(), clContext);
_dvvWeights = createCLBuffer(
limitStencilTable->GetDvvWeights(), clContext);
} else {
_sizes = _offsets = _indices = _weights = NULL;
_duWeights = _dvWeights = NULL;
_duuWeights = _duvWeights = _dvvWeights = NULL;
}
}
@ -109,6 +122,9 @@ CLStencilTable::~CLStencilTable() {
if (_weights) clReleaseMemObject(_weights);
if (_duWeights) clReleaseMemObject(_duWeights);
if (_dvWeights) clReleaseMemObject(_dvWeights);
if (_duuWeights) clReleaseMemObject(_duuWeights);
if (_duvWeights) clReleaseMemObject(_duvWeights);
if (_dvvWeights) clReleaseMemObject(_dvvWeights);
}
// ---------------------------------------------------------------------------
@ -130,7 +146,10 @@ bool
CLEvaluator::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const & /*duDesc*/,
BufferDescriptor const & /*dvDesc*/) {
BufferDescriptor const & /*dvDesc*/,
BufferDescriptor const & /*duuDesc*/,
BufferDescriptor const & /*duvDesc*/,
BufferDescriptor const & /*dvvDesc*/) {
if (srcDesc.length > dstDesc.length) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"srcDesc length must be less than or equal to "
@ -263,6 +282,7 @@ CLEvaluator::EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
size_t globalWorkSize = (size_t)(end - start);
BufferDescriptor empty;
clSetKernelArg(_stencilDerivKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_stencilDerivKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_stencilDerivKernel, 2, sizeof(cl_mem), &dst);
@ -273,14 +293,26 @@ CLEvaluator::EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
clSetKernelArg(_stencilDerivKernel, 7, sizeof(cl_mem), &dv);
clSetKernelArg(_stencilDerivKernel, 8, sizeof(int), &dvDesc.offset);
clSetKernelArg(_stencilDerivKernel, 9, sizeof(int), &dvDesc.stride);
clSetKernelArg(_stencilDerivKernel, 10, sizeof(cl_mem), &sizes);
clSetKernelArg(_stencilDerivKernel, 11, sizeof(cl_mem), &offsets);
clSetKernelArg(_stencilDerivKernel, 12, sizeof(cl_mem), &indices);
clSetKernelArg(_stencilDerivKernel, 13, sizeof(cl_mem), &weights);
clSetKernelArg(_stencilDerivKernel, 14, sizeof(cl_mem), &duWeights);
clSetKernelArg(_stencilDerivKernel, 15, sizeof(cl_mem), &dvWeights);
clSetKernelArg(_stencilDerivKernel, 16, sizeof(int), &start);
clSetKernelArg(_stencilDerivKernel, 17, sizeof(int), &end);
clSetKernelArg(_stencilDerivKernel, 10, sizeof(cl_mem), NULL);
clSetKernelArg(_stencilDerivKernel, 11, sizeof(int), &empty.offset);
clSetKernelArg(_stencilDerivKernel, 12, sizeof(int), &empty.stride);
clSetKernelArg(_stencilDerivKernel, 13, sizeof(cl_mem), NULL);
clSetKernelArg(_stencilDerivKernel, 14, sizeof(int), &empty.offset);
clSetKernelArg(_stencilDerivKernel, 15, sizeof(int), &empty.stride);
clSetKernelArg(_stencilDerivKernel, 16, sizeof(cl_mem), NULL);
clSetKernelArg(_stencilDerivKernel, 17, sizeof(int), &empty.offset);
clSetKernelArg(_stencilDerivKernel, 18, sizeof(int), &empty.stride);
clSetKernelArg(_stencilDerivKernel, 19, sizeof(cl_mem), &sizes);
clSetKernelArg(_stencilDerivKernel, 20, sizeof(cl_mem), &offsets);
clSetKernelArg(_stencilDerivKernel, 21, sizeof(cl_mem), &indices);
clSetKernelArg(_stencilDerivKernel, 22, sizeof(cl_mem), &weights);
clSetKernelArg(_stencilDerivKernel, 23, sizeof(cl_mem), &duWeights);
clSetKernelArg(_stencilDerivKernel, 24, sizeof(cl_mem), &dvWeights);
clSetKernelArg(_stencilDerivKernel, 25, sizeof(cl_mem), NULL);
clSetKernelArg(_stencilDerivKernel, 26, sizeof(cl_mem), NULL);
clSetKernelArg(_stencilDerivKernel, 27, sizeof(cl_mem), NULL);
clSetKernelArg(_stencilDerivKernel, 28, sizeof(int), &start);
clSetKernelArg(_stencilDerivKernel, 29, sizeof(int), &end);
cl_int errNum = clEnqueueNDRangeKernel(
_clCommandQueue, _stencilDerivKernel, 1, NULL,
@ -292,9 +324,80 @@ CLEvaluator::EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
return false;
}
if (endEvent == NULL)
{
clFinish(_clCommandQueue);
if (endEvent == NULL) {
clFinish(_clCommandQueue);
}
return true;
}
bool
CLEvaluator::EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
cl_mem dst, BufferDescriptor const &dstDesc,
cl_mem du, BufferDescriptor const &duDesc,
cl_mem dv, BufferDescriptor const &dvDesc,
cl_mem duu, BufferDescriptor const &duuDesc,
cl_mem duv, BufferDescriptor const &duvDesc,
cl_mem dvv, BufferDescriptor const &dvvDesc,
cl_mem sizes,
cl_mem offsets,
cl_mem indices,
cl_mem weights,
cl_mem duWeights,
cl_mem dvWeights,
cl_mem duuWeights,
cl_mem duvWeights,
cl_mem dvvWeights,
int start, int end,
unsigned int numStartEvents,
const cl_event* startEvents,
cl_event* endEvent) const {
if (end <= start) return true;
size_t globalWorkSize = (size_t)(end - start);
clSetKernelArg(_stencilDerivKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_stencilDerivKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_stencilDerivKernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(_stencilDerivKernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(_stencilDerivKernel, 4, sizeof(cl_mem), &du);
clSetKernelArg(_stencilDerivKernel, 5, sizeof(int), &duDesc.offset);
clSetKernelArg(_stencilDerivKernel, 6, sizeof(int), &duDesc.stride);
clSetKernelArg(_stencilDerivKernel, 7, sizeof(cl_mem), &dv);
clSetKernelArg(_stencilDerivKernel, 8, sizeof(int), &dvDesc.offset);
clSetKernelArg(_stencilDerivKernel, 9, sizeof(int), &dvDesc.stride);
clSetKernelArg(_stencilDerivKernel, 10, sizeof(cl_mem), &duu);
clSetKernelArg(_stencilDerivKernel, 11, sizeof(int), &duuDesc.offset);
clSetKernelArg(_stencilDerivKernel, 12, sizeof(int), &duuDesc.stride);
clSetKernelArg(_stencilDerivKernel, 13, sizeof(cl_mem), &duv);
clSetKernelArg(_stencilDerivKernel, 14, sizeof(int), &duvDesc.offset);
clSetKernelArg(_stencilDerivKernel, 15, sizeof(int), &duvDesc.stride);
clSetKernelArg(_stencilDerivKernel, 16, sizeof(cl_mem), &dvv);
clSetKernelArg(_stencilDerivKernel, 17, sizeof(int), &dvvDesc.offset);
clSetKernelArg(_stencilDerivKernel, 18, sizeof(int), &dvvDesc.stride);
clSetKernelArg(_stencilDerivKernel, 19, sizeof(cl_mem), &sizes);
clSetKernelArg(_stencilDerivKernel, 20, sizeof(cl_mem), &offsets);
clSetKernelArg(_stencilDerivKernel, 21, sizeof(cl_mem), &indices);
clSetKernelArg(_stencilDerivKernel, 22, sizeof(cl_mem), &weights);
clSetKernelArg(_stencilDerivKernel, 23, sizeof(cl_mem), &duWeights);
clSetKernelArg(_stencilDerivKernel, 24, sizeof(cl_mem), &dvWeights);
clSetKernelArg(_stencilDerivKernel, 25, sizeof(cl_mem), &duuWeights);
clSetKernelArg(_stencilDerivKernel, 26, sizeof(cl_mem), &duvWeights);
clSetKernelArg(_stencilDerivKernel, 27, sizeof(cl_mem), &dvvWeights);
clSetKernelArg(_stencilDerivKernel, 28, sizeof(int), &start);
clSetKernelArg(_stencilDerivKernel, 29, sizeof(int), &end);
cl_int errNum = clEnqueueNDRangeKernel(
_clCommandQueue, _stencilDerivKernel, 1, NULL,
&globalWorkSize, NULL, numStartEvents, startEvents, endEvent);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyStencilKernel (%d) ", errNum);
return false;
}
if (endEvent == NULL) {
clFinish(_clCommandQueue);
}
return true;
}
@ -315,6 +418,66 @@ CLEvaluator::EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
size_t globalWorkSize = (size_t)(numPatchCoords);
BufferDescriptor empty;
clSetKernelArg(_patchKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst);
clSetKernelArg(_patchKernel, 3, sizeof(int), &dstDesc.offset);
clSetKernelArg(_patchKernel, 4, sizeof(cl_mem), &du);
clSetKernelArg(_patchKernel, 5, sizeof(int), &duDesc.offset);
clSetKernelArg(_patchKernel, 6, sizeof(int), &duDesc.stride);
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
clSetKernelArg(_patchKernel, 10, sizeof(cl_mem), NULL);
clSetKernelArg(_patchKernel, 11, sizeof(int), &empty.offset);
clSetKernelArg(_patchKernel, 12, sizeof(int), &empty.stride);
clSetKernelArg(_patchKernel, 13, sizeof(cl_mem), NULL);
clSetKernelArg(_patchKernel, 14, sizeof(int), &empty.offset);
clSetKernelArg(_patchKernel, 15, sizeof(int), &empty.stride);
clSetKernelArg(_patchKernel, 16, sizeof(cl_mem), NULL);
clSetKernelArg(_patchKernel, 17, sizeof(int), &empty.offset);
clSetKernelArg(_patchKernel, 18, sizeof(int), &empty.stride);
clSetKernelArg(_patchKernel, 19, sizeof(cl_mem), &patchCoordsBuffer);
clSetKernelArg(_patchKernel, 20, sizeof(cl_mem), &patchArrayBuffer);
clSetKernelArg(_patchKernel, 21, sizeof(cl_mem), &patchIndexBuffer);
clSetKernelArg(_patchKernel, 22, sizeof(cl_mem), &patchParamBuffer);
cl_int errNum = clEnqueueNDRangeKernel(
_clCommandQueue, _patchKernel, 1, NULL,
&globalWorkSize, NULL, numStartEvents, startEvents, endEvent);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyPatchKernel (%d) ", errNum);
return false;
}
if (endEvent == NULL) {
clFinish(_clCommandQueue);
}
return true;
}
bool
CLEvaluator::EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
cl_mem dst, BufferDescriptor const &dstDesc,
cl_mem du, BufferDescriptor const &duDesc,
cl_mem dv, BufferDescriptor const &dvDesc,
cl_mem duu, BufferDescriptor const &duuDesc,
cl_mem duv, BufferDescriptor const &duvDesc,
cl_mem dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
cl_mem patchCoordsBuffer,
cl_mem patchArrayBuffer,
cl_mem patchIndexBuffer,
cl_mem patchParamBuffer,
unsigned int numStartEvents,
const cl_event* startEvents,
cl_event* endEvent) const {
size_t globalWorkSize = (size_t)(numPatchCoords);
clSetKernelArg(_patchKernel, 0, sizeof(cl_mem), &src);
clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset);
clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst);
@ -325,10 +488,19 @@ CLEvaluator::EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
clSetKernelArg(_patchKernel, 10, sizeof(cl_mem), &patchCoordsBuffer);
clSetKernelArg(_patchKernel, 11, sizeof(cl_mem), &patchArrayBuffer);
clSetKernelArg(_patchKernel, 12, sizeof(cl_mem), &patchIndexBuffer);
clSetKernelArg(_patchKernel, 13, sizeof(cl_mem), &patchParamBuffer);
clSetKernelArg(_patchKernel, 10, sizeof(cl_mem), &duu);
clSetKernelArg(_patchKernel, 11, sizeof(int), &duuDesc.offset);
clSetKernelArg(_patchKernel, 12, sizeof(int), &duuDesc.stride);
clSetKernelArg(_patchKernel, 13, sizeof(cl_mem), &duv);
clSetKernelArg(_patchKernel, 14, sizeof(int), &duvDesc.offset);
clSetKernelArg(_patchKernel, 15, sizeof(int), &duvDesc.stride);
clSetKernelArg(_patchKernel, 16, sizeof(cl_mem), &dvv);
clSetKernelArg(_patchKernel, 17, sizeof(int), &dvvDesc.offset);
clSetKernelArg(_patchKernel, 18, sizeof(int), &dvvDesc.stride);
clSetKernelArg(_patchKernel, 19, sizeof(cl_mem), &patchCoordsBuffer);
clSetKernelArg(_patchKernel, 20, sizeof(cl_mem), &patchArrayBuffer);
clSetKernelArg(_patchKernel, 21, sizeof(cl_mem), &patchIndexBuffer);
clSetKernelArg(_patchKernel, 22, sizeof(cl_mem), &patchParamBuffer);
cl_int errNum = clEnqueueNDRangeKernel(
_clCommandQueue, _patchKernel, 1, NULL,
@ -340,15 +512,13 @@ CLEvaluator::EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
return false;
}
if (endEvent == NULL)
{
clFinish(_clCommandQueue);
if (endEvent == NULL) {
clFinish(_clCommandQueue);
}
return true;
}
/* static */
void
CLEvaluator::Synchronize(cl_command_queue clCommandQueue) {

File diff suppressed because it is too large Load Diff

View File

@ -99,12 +99,18 @@ __kernel void computeStencilsDerivatives(
__global float * dst, int dstOffset,
__global float * du, int duOffset, int duStride,
__global float * dv, int dvOffset, int dvStride,
__global float * duu, int duuOffset, int duuStride,
__global float * duv, int duvOffset, int duvStride,
__global float * dvv, int dvvOffset, int dvvStride,
__global int * sizes,
__global int * offsets,
__global int * indices,
__global float * weights,
__global float * duWeights,
__global float * dvWeights,
__global float * duuWeights,
__global float * duvWeights,
__global float * dvvWeights,
int batchStart, int batchEnd) {
int current = get_global_id(0) + batchStart;
@ -113,10 +119,13 @@ __kernel void computeStencilsDerivatives(
return;
}
struct Vertex v, vdu, vdv;
struct Vertex v, vdu, vdv, vduu, vduv, vdvv;
clear(&v);
clear(&vdu);
clear(&vdv);
clear(&vduu);
clear(&vduv);
clear(&vdvv);
int size = sizes[current],
offset = offsets[current];
@ -125,6 +134,9 @@ __kernel void computeStencilsDerivatives(
if (dst) dst += dstOffset;
if (du) du += duOffset;
if (dv) dv += dvOffset;
if (duu) duu += duuOffset;
if (duv) duv += duvOffset;
if (dvv) dvv += dvvOffset;
for (int i=0; i<size; ++i) {
int ofs = offset + i;
@ -132,11 +144,17 @@ __kernel void computeStencilsDerivatives(
if (weights) addWithWeight( &v, src, vid, weights[ofs]);
if (duWeights) addWithWeight(&vdu, src, vid, duWeights[ofs]);
if (dvWeights) addWithWeight(&vdv, src, vid, dvWeights[ofs]);
if (duuWeights) addWithWeight(&vduu, src, vid, duuWeights[ofs]);
if (duvWeights) addWithWeight(&vduv, src, vid, duvWeights[ofs]);
if (dvvWeights) addWithWeight(&vdvv, src, vid, dvvWeights[ofs]);
}
if (dst) writeVertex (dst, current, &v);
if (du) writeVertexStride(du, current, &vdu, duStride);
if (dv) writeVertexStride(dv, current, &vdv, dvStride);
if (duu) writeVertexStride(duu, current, &vduu, duuStride);
if (duv) writeVertexStride(duv, current, &vduv, duvStride);
if (dvv) writeVertexStride(dvv, current, &vdvv, dvvStride);
}
// ---------------------------------------------------------------------------
@ -205,6 +223,9 @@ __kernel void computePatches(__global float *src, int srcOffset,
__global float *dst, int dstOffset,
__global float *du, int duOffset, int duStride,
__global float *dv, int dvOffset, int dvStride,
__global float *duu, int duuOffset, int duuStride,
__global float *duv, int duvOffset, int duvStride,
__global float *dvv, int dvvOffset, int dvvStride,
__global struct PatchCoord *patchCoords,
__global struct PatchArray *patchArrayBuffer,
__global int *patchIndexBuffer,
@ -213,8 +234,11 @@ __kernel void computePatches(__global float *src, int srcOffset,
if (src) src += srcOffset;
if (dst) dst += dstOffset;
if (du) du += duOffset;
if (dv) dv += dvOffset;
if (du) du += duOffset;
if (dv) dv += dvOffset;
if (duu) duu += duuOffset;
if (duv) duv += duvOffset;
if (dvv) dvv += dvvOffset;
struct PatchCoord coord = patchCoords[current];
struct PatchArray array = patchArrayBuffer[coord.arrayIndex];
@ -274,5 +298,31 @@ __kernel void computePatches(__global float *src, int srcOffset,
}
writeVertexStride(dv, current, &vdv, dvStride);
}
if (duu) {
struct Vertex vduu;
clear(&vduu);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(&vduu, src, index, wDss[i]);
}
writeVertexStride(duu, current, &vduu, duuStride);
}
if (duv) {
struct Vertex vduv;
clear(&vduv);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(&vduv, src, index, wDst[i]);
}
writeVertexStride(duv, current, &vduv, duvStride);
}
if (dvv) {
struct Vertex vdvv;
clear(&vdvv);
for (int i = 0; i < numControlVertices; ++i) {
int index = patchIndexBuffer[indexBase + i];
addWithWeight(&vdvv, src, index, wDtt[i]);
}
writeVertexStride(dvv, current, &vdvv, dvvStride);
}
}

View File

@ -82,6 +82,48 @@ CpuEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
return true;
}
/* static */
bool
CpuEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
const float * duuWeights,
const float * duvWeights,
const float * dvvWeights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
if (srcDesc.length != duDesc.length) return false;
if (srcDesc.length != dvDesc.length) return false;
if (srcDesc.length != duuDesc.length) return false;
if (srcDesc.length != duvDesc.length) return false;
if (srcDesc.length != dvvDesc.length) return false;
CpuEvalStencils(src, srcDesc,
dst, dstDesc,
du, duDesc,
dv, dvDesc,
duu, duuDesc,
duv, duvDesc,
dvv, dvvDesc,
sizes, offsets, indices,
weights, duWeights, dvWeights,
duuWeights, duvWeights, dvvWeights,
start, end);
return true;
}
template <typename T>
struct BufferAdapter {
BufferAdapter(T *p, int length, int stride) :
@ -264,6 +306,120 @@ CpuEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc,
return true;
}
/* static */
bool
CpuEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
if (src) {
src += srcDesc.offset;
} else {
return false;
}
if (dst) {
if (srcDesc.length != dstDesc.length) return false;
dst += dstDesc.offset;
}
if (du) {
du += duDesc.offset;
if (srcDesc.length != duDesc.length) return false;
}
if (dv) {
dv += dvDesc.offset;
if (srcDesc.length != dvDesc.length) return false;
}
if (duu) {
duu += duuDesc.offset;
if (srcDesc.length != duuDesc.length) return false;
}
if (duv) {
duv += duvDesc.offset;
if (srcDesc.length != duvDesc.length) return false;
}
if (dvv) {
dvv += dvvDesc.offset;
if (srcDesc.length != dvvDesc.length) return false;
}
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
BufferAdapter<float> duT(du, duDesc.length, duDesc.stride);
BufferAdapter<float> dvT(dv, dvDesc.length, dvDesc.stride);
BufferAdapter<float> duuT(duu, duuDesc.length, duuDesc.stride);
BufferAdapter<float> duvT(duv, duvDesc.length, duvDesc.stride);
BufferAdapter<float> dvvT(dvv, dvvDesc.length, dvvDesc.stride);
float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
for (int i = 0; i < numPatchCoords; ++i) {
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
Far::PatchParam const & param =
patchParamBuffer[coord.handle.patchIndex];
int patchType = param.IsRegular()
? Far::PatchDescriptor::REGULAR
: array.GetPatchType();
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(param,
coord.s, coord.t, wP, wDu, wDv,
wDuu, wDuv, wDvv);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(param,
coord.s, coord.t, wP, wDu, wDv,
wDuu, wDuv, wDvv);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(param,
coord.s, coord.t, wP, wDu, wDv,
wDuu, wDuv, wDvv);
numControlVertices = 4;
} else {
assert(0);
}
int indexStride = Far::PatchDescriptor(array.GetPatchType()).GetNumControlVertices();
int indexBase = array.GetIndexBase() + indexStride *
(coord.handle.patchIndex - array.GetPrimitiveIdBase());
const int *cvs = &patchIndexBuffer[indexBase];
dstT.Clear();
duT.Clear();
dvT.Clear();
duuT.Clear();
duvT.Clear();
dvvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
duT.AddWithWeight (srcT[cvs[j]], wDu[j]);
dvT.AddWithWeight (srcT[cvs[j]], wDv[j]);
duuT.AddWithWeight (srcT[cvs[j]], wDuu[j]);
duvT.AddWithWeight (srcT[cvs[j]], wDuv[j]);
dvvT.AddWithWeight (srcT[cvs[j]], wDvv[j]);
}
++dstT;
++duT;
++dvT;
++duuT;
++duvT;
++dvvT;
}
return true;
}
} // end namespace Osd

View File

@ -244,6 +244,177 @@ public:
const float * dvWeights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the cpu kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
STENCIL_TABLE const *stencilTable,
const CpuEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
&stencilTable->GetDuuWeights()[0],
&stencilTable->GetDuvWeights()[0],
&stencilTable->GetDvvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw CPU pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output pointer derivative wrt u. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dv Output pointer derivative wrt v. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duu Output pointer 2nd derivative wrt u. An offset of
/// duuDesc will be applied internally.
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
/// duvDesc will be applied internally.
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
/// dvvDesc will be applied internally.
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param duuWeights pointer to the duu-weights buffer of the stencil table
///
/// @param duvWeights pointer to the duv-weights buffer of the stencil table
///
/// @param dvvWeights pointer to the dvv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
const float * duuWeights,
const float * duvWeights,
const float * dvvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
@ -373,6 +544,102 @@ public:
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CpuEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
// ideally all buffer classes should have templated by datatype
// so that downcast isn't needed there.
// (e.g. Osd::CpuBuffer<PatchCoord> )
//
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
@ -457,6 +724,72 @@ public:
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output pointer derivative wrt u. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dv Output pointer derivative wrt v. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duu Output pointer 2nd derivative wrt u. An offset of
/// duuDesc will be applied internally.
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
/// duvDesc will be applied internally.
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
/// dvvDesc will be applied internally.
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
@ -508,6 +841,164 @@ public:
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CpuEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetVaryingPatchArrayBuffer(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CpuEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetVaryingPatchArrayBuffer(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
@ -562,6 +1053,170 @@ public:
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param fvarChannel face-varying channel
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
CpuEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param fvarChannel face-varying channel
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
CpuEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// ----------------------------------------------------------------------
///
/// Other methods

View File

@ -169,6 +169,76 @@ CpuEvalStencils(float const * src, BufferDescriptor const &srcDesc,
}
}
void
CpuEvalStencils(float const * src, BufferDescriptor const &srcDesc,
float * dst, BufferDescriptor const &dstDesc,
float * dstDu, BufferDescriptor const &dstDuDesc,
float * dstDv, BufferDescriptor const &dstDvDesc,
float * dstDuu, BufferDescriptor const &dstDuuDesc,
float * dstDuv, BufferDescriptor const &dstDuvDesc,
float * dstDvv, BufferDescriptor const &dstDvvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
float const * duuWeights,
float const * duvWeights,
float const * dvvWeights,
int start, int end) {
if (start > 0) {
sizes += start;
indices += offsets[start];
weights += offsets[start];
duWeights += offsets[start];
dvWeights += offsets[start];
duuWeights += offsets[start];
duvWeights += offsets[start];
dvvWeights += offsets[start];
}
src += srcDesc.offset;
dst += dstDesc.offset;
dstDu += dstDuDesc.offset;
dstDv += dstDvDesc.offset;
dstDuu += dstDuuDesc.offset;
dstDuv += dstDuvDesc.offset;
dstDvv += dstDvvDesc.offset;
int nOutLength = dstDesc.length + dstDuDesc.length + dstDvDesc.length
+ dstDuuDesc.length + dstDuvDesc.length + dstDvvDesc.length;
float * result = (float*)alloca(nOutLength * sizeof(float));
float * resultDu = result + dstDesc.length;
float * resultDv = resultDu + dstDuDesc.length;
float * resultDuu = resultDv + dstDuuDesc.length;
float * resultDuv = resultDuu + dstDuvDesc.length;
float * resultDvv = resultDuv + dstDvvDesc.length;
int nStencils = end - start;
for (int i = 0; i < nStencils; ++i, ++sizes) {
// clear
memset(result, 0, nOutLength * sizeof(float));
for (int j=0; j<*sizes; ++j) {
addWithWeight(result, src, *indices, *weights++, srcDesc);
addWithWeight(resultDu, src, *indices, *duWeights++, srcDesc);
addWithWeight(resultDv, src, *indices, *dvWeights++, srcDesc);
addWithWeight(resultDuu, src, *indices, *duuWeights++, srcDesc);
addWithWeight(resultDuv, src, *indices, *duvWeights++, srcDesc);
addWithWeight(resultDvv, src, *indices, *dvvWeights++, srcDesc);
++indices;
}
copy(dst, i, result, dstDesc);
copy(dstDu, i, resultDu, dstDuDesc);
copy(dstDv, i, resultDv, dstDvDesc);
copy(dstDuu, i, resultDuu, dstDuuDesc);
copy(dstDuv, i, resultDuu, dstDuvDesc);
copy(dstDvv, i, resultDvv, dstDvvDesc);
}
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -57,6 +57,25 @@ CpuEvalStencils(float const * src, BufferDescriptor const &srcDesc,
float const * dvWeights,
int start, int end);
void
CpuEvalStencils(float const * src, BufferDescriptor const &srcDesc,
float * dst, BufferDescriptor const &dstDesc,
float * dstDu, BufferDescriptor const &dstDuDesc,
float * dstDv, BufferDescriptor const &dstDvDesc,
float * dstDuu, BufferDescriptor const &dstDuuDesc,
float * dstDuv, BufferDescriptor const &dstDuvDesc,
float * dstDvv, BufferDescriptor const &dstDvvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
float const * duuWeights,
float const * duvWeights,
float const * dvvWeights,
int start, int end);
//
// SIMD ICC optimization of the stencil kernel
//

View File

@ -53,9 +53,12 @@ extern "C" {
const void *patchParams);
void CudaEvalPatchesWithDerivatives(
const float *src, float *dst, float *du, float *dv,
int length,
int srcStride, int dstStride, int dvStride, int duStride,
const float *src, float *dst,
float *du, float *dv,
float *duu, float *duv, float *dvv,
int length, int srcStride, int dstStride,
int duStride, int dvStride,
int duuStride, int duvStride, int dvvStride,
int numPatchCoords,
const void *patchCoords,
const void *patchArrays,
@ -71,6 +74,10 @@ namespace Osd {
template <class T> void *
createCudaBuffer(std::vector<T> const & src) {
if (src.empty()) {
return NULL;
}
void * devicePtr = 0;
size_t size = src.size()*sizeof(T);
@ -98,9 +105,11 @@ CudaStencilTable::CudaStencilTable(Far::StencilTable const *stencilTable) {
_indices = createCudaBuffer(stencilTable->GetControlIndices());
_weights = createCudaBuffer(stencilTable->GetWeights());
_duWeights = _dvWeights = NULL;
_duuWeights = _duvWeights = _dvvWeights = NULL;
} else {
_sizes = _offsets = _indices = _weights = NULL;
_duWeights = _dvWeights = NULL;
_duuWeights = _duvWeights = _dvvWeights = NULL;
}
}
@ -113,9 +122,13 @@ CudaStencilTable::CudaStencilTable(Far::LimitStencilTable const *limitStencilTab
_weights = createCudaBuffer(limitStencilTable->GetWeights());
_duWeights = createCudaBuffer(limitStencilTable->GetDuWeights());
_dvWeights = createCudaBuffer(limitStencilTable->GetDvWeights());
_duuWeights = createCudaBuffer(limitStencilTable->GetDuuWeights());
_duvWeights = createCudaBuffer(limitStencilTable->GetDuvWeights());
_dvvWeights = createCudaBuffer(limitStencilTable->GetDvvWeights());
} else {
_sizes = _offsets = _indices = _weights = NULL;
_duWeights = _dvWeights = NULL;
_duuWeights = _duvWeights = _dvvWeights = NULL;
}
}
@ -126,6 +139,9 @@ CudaStencilTable::~CudaStencilTable() {
if (_weights) cudaFree(_weights);
if (_duWeights) cudaFree(_duWeights);
if (_dvWeights) cudaFree(_dvWeights);
if (_duuWeights) cudaFree(_duuWeights);
if (_duvWeights) cudaFree(_duvWeights);
if (_dvvWeights) cudaFree(_dvvWeights);
}
// ---------------------------------------------------------------------------
@ -197,6 +213,84 @@ CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
return true;
}
/* static */
bool
CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
const float * duuWeights,
const float * duvWeights,
const float * dvvWeights,
int start,
int end) {
// PERFORMANCE: need to combine 3 launches together
if (dst) {
CudaEvalStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDesc.stride,
sizes, offsets, indices, weights,
start, end);
}
if (du) {
CudaEvalStencils(src + srcDesc.offset,
du + duDesc.offset,
srcDesc.length,
srcDesc.stride,
duDesc.stride,
sizes, offsets, indices, duWeights,
start, end);
}
if (dv) {
CudaEvalStencils(src + srcDesc.offset,
dv + dvDesc.offset,
srcDesc.length,
srcDesc.stride,
dvDesc.stride,
sizes, offsets, indices, dvWeights,
start, end);
}
if (duu) {
CudaEvalStencils(src + srcDesc.offset,
duu + duuDesc.offset,
srcDesc.length,
srcDesc.stride,
duuDesc.stride,
sizes, offsets, indices, duuWeights,
start, end);
}
if (duv) {
CudaEvalStencils(src + srcDesc.offset,
duv + duvDesc.offset,
srcDesc.length,
srcDesc.stride,
duvDesc.stride,
sizes, offsets, indices, duvWeights,
start, end);
}
if (dvv) {
CudaEvalStencils(src + srcDesc.offset,
dvv + dvvDesc.offset,
srcDesc.length,
srcDesc.stride,
dvvDesc.stride,
sizes, offsets, indices, dvvWeights,
start, end);
}
return true;
}
/* static */
bool
CudaEvaluator::EvalPatches(const float *src,
@ -237,9 +331,42 @@ CudaEvaluator::EvalPatches(
if (dv) dv += dvDesc.offset;
CudaEvalPatchesWithDerivatives(
src, dst, du, dv,
srcDesc.length, srcDesc.stride,
dstDesc.stride, duDesc.stride, dvDesc.stride,
src, dst, du, dv, NULL, NULL, NULL,
srcDesc.length, srcDesc.stride, dstDesc.stride,
duDesc.stride, dvDesc.stride, 0, 0, 0,
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
return true;
}
/* static */
bool
CudaEvaluator::EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams) {
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
if (duu) duu += duuDesc.offset;
if (duv) duv += duvDesc.offset;
if (dvv) dvv += dvvDesc.offset;
CudaEvalPatchesWithDerivatives(
src, dst, du, dv, duu, duv, dvv,
srcDesc.length, srcDesc.stride, dstDesc.stride,
duDesc.stride, dvDesc.stride,
duuDesc.stride, duvDesc.stride, dvvDesc.stride,
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
return true;
}

View File

@ -73,6 +73,9 @@ public:
void *GetWeightsBuffer() const { return _weights; }
void *GetDuWeightsBuffer() const { return _duWeights; }
void *GetDvWeightsBuffer() const { return _dvWeights; }
void *GetDuuWeightsBuffer() const { return _duuWeights; }
void *GetDuvWeightsBuffer() const { return _duvWeights; }
void *GetDvvWeightsBuffer() const { return _dvvWeights; }
int GetNumStencils() const { return _numStencils; }
private:
@ -81,7 +84,10 @@ private:
* _indices,
* _weights,
* _duWeights,
* _dvWeights;
* _dvWeights,
* _duuWeights,
* _duvWeights,
* _dvvWeights;
int _numStencils;
};
@ -288,6 +294,177 @@ public:
const float * dvWeights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param stencilTable stencil table to be applied.
///
/// @param instance not used in the cuda kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the cuda kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
STENCIL_TABLE const *stencilTable,
const CudaEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
duBuffer->BindCudaBuffer(), duDesc,
dvBuffer->BindCudaBuffer(), dvDesc,
duuBuffer->BindCudaBuffer(), duuDesc,
duvBuffer->BindCudaBuffer(), duvDesc,
dvvBuffer->BindCudaBuffer(), dvvDesc,
(int const *)stencilTable->GetSizesBuffer(),
(int const *)stencilTable->GetOffsetsBuffer(),
(int const *)stencilTable->GetIndicesBuffer(),
(float const *)stencilTable->GetWeightsBuffer(),
(float const *)stencilTable->GetDuWeightsBuffer(),
(float const *)stencilTable->GetDvWeightsBuffer(),
(float const *)stencilTable->GetDuuWeightsBuffer(),
(float const *)stencilTable->GetDuvWeightsBuffer(),
(float const *)stencilTable->GetDvvWeightsBuffer(),
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw cuda pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output pointer derivative wrt u. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dv Output pointer derivative wrt v. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duu Output pointer 2nd derivative wrt u. An offset of
/// duuDesc will be applied internally.
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
/// duvDesc will be applied internally.
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
/// dvvDesc will be applied internally.
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param duuWeights pointer to the duu-weights buffer of the stencil table
///
/// @param duvWeights pointer to the duv-weights buffer of the stencil table
///
/// @param dvvWeights pointer to the dvv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
const float * duuWeights,
const float * duvWeights,
const float * dvvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
@ -396,8 +573,8 @@ public:
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
@ -410,6 +587,95 @@ public:
(const PatchParam *)patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CudaPatchTable or equivalent
///
/// @param instance not used in the cuda evaluator
///
/// @param deviceContext not used in the cuda evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
duBuffer->BindCudaBuffer(), duDesc,
dvBuffer->BindCudaBuffer(), dvDesc,
duuBuffer->BindCudaBuffer(), duuDesc,
duvBuffer->BindCudaBuffer(), duvDesc,
dvvBuffer->BindCudaBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord *)patchCoords->BindCudaBuffer(),
(const PatchArray *)patchTable->GetPatchArrayBuffer(),
(const int *)patchTable->GetPatchIndexBuffer(),
(const PatchParam *)patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
@ -489,10 +755,76 @@ public:
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndices,
const PatchParam *patchParams);
PatchParam const *patchParams);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output pointer derivative wrt u. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dv Output pointer derivative wrt v. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duu Output pointer 2nd derivative wrt u. An offset of
/// duuDesc will be applied internally.
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
/// duvDesc will be applied internally.
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
/// dvvDesc will be applied internally.
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndices an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParams an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndices,
PatchParam const *patchParams);
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
@ -533,8 +865,8 @@ public:
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
@ -545,6 +877,164 @@ public:
(const PatchParam *)patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCudaBuffer() method returning an
/// array of PatchCoord struct in cuda memory.
///
/// @param patchTable CudaPatchTable or equivalent
///
/// @param instance not used in the cuda evaluator
///
/// @param deviceContext not used in the cuda evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
duBuffer->BindCudaBuffer(), duDesc,
dvBuffer->BindCudaBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord *)patchCoords->BindCudaBuffer(),
(const PatchArray *)patchTable->GetVaryingPatchArrayBuffer(),
(const int *)patchTable->GetVaryingPatchIndexBuffer(),
(const PatchParam *)patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCudaBuffer() method returning an
/// array of PatchCoord struct in cuda memory.
///
/// @param patchTable CudaPatchTable or equivalent
///
/// @param instance not used in the cuda evaluator
///
/// @param deviceContext not used in the cuda evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
duBuffer->BindCudaBuffer(), duDesc,
dvBuffer->BindCudaBuffer(), dvDesc,
duuBuffer->BindCudaBuffer(), duuDesc,
duvBuffer->BindCudaBuffer(), duvDesc,
dvvBuffer->BindCudaBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord *)patchCoords->BindCudaBuffer(),
(const PatchArray *)patchTable->GetVaryingPatchArrayBuffer(),
(const int *)patchTable->GetVaryingPatchIndexBuffer(),
(const PatchParam *)patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
@ -587,11 +1077,175 @@ public:
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
numPatchCoords,
(const PatchCoord *)patchCoords->BindCudaBuffer(),
(const PatchArray *)patchTable->GetFVarPatchArrayBuffer(fvarChannel),
(const int *)patchTable->GetFVarPatchIndexBuffer(fvarChannel),
(const PatchParam *)patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCudaBuffer() method returning an
/// array of PatchCoord struct in cuda memory.
///
/// @param patchTable CudaPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param instance not used in the cuda evaluator
///
/// @param deviceContext not used in the cuda evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
duBuffer->BindCudaBuffer(), duDesc,
dvBuffer->BindCudaBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord *)patchCoords->BindCudaBuffer(),
(const PatchArray *)patchTable->GetFVarPatchArrayBuffer(fvarChannel),
(const int *)patchTable->GetFVarPatchIndexBuffer(fvarChannel),
(const PatchParam *)patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCudaBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCudaBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCudaBuffer() method returning an
/// array of PatchCoord struct in cuda memory.
///
/// @param patchTable CudaPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param instance not used in the cuda evaluator
///
/// @param deviceContext not used in the cuda evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
CudaEvaluator const *instance,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
dstBuffer->BindCudaBuffer(), dstDesc,
duBuffer->BindCudaBuffer(), duDesc,
dvBuffer->BindCudaBuffer(), dvDesc,
duuBuffer->BindCudaBuffer(), duuDesc,
duvBuffer->BindCudaBuffer(), duvDesc,
dvvBuffer->BindCudaBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord *)patchCoords->BindCudaBuffer(),
(const PatchArray *)patchTable->GetFVarPatchArrayBuffer(fvarChannel),

View File

@ -305,8 +305,12 @@ int getNumControlVertices(int patchType) {
}
__global__ void
computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
computePatches(const float *src, float *dst,
float *dstDu, float *dstDv,
float *dstDuu, float *dstDuv, float *dstDvv,
int length, int srcStride, int dstStride,
int dstDuStride, int dstDvStride,
int dstDuuStride, int dstDuvStride, int dstDvvStride,
int numPatchCoords, const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
@ -376,6 +380,30 @@ computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
addWithWeight(d, srcVert, wDt[j], length);
}
}
if (dstDuu) {
float *d = dstDuu + i * dstDuuStride;
clear(d, length);
for (int j = 0; j < numControlVertices; ++j) {
const float * srcVert = src + cvs[j] * srcStride;
addWithWeight(d, srcVert, wDss[j], length);
}
}
if (dstDuv) {
float *d = dstDuv + i * dstDuvStride;
clear(d, length);
for (int j = 0; j < numControlVertices; ++j) {
const float * srcVert = src + cvs[j] * srcStride;
addWithWeight(d, srcVert, wDst[j], length);
}
}
if (dstDvv) {
float *d = dstDvv + i * dstDvvStride;
clear(d, length);
for (int j = 0; j < numControlVertices; ++j) {
const float * srcVert = src + cvs[j] * srcStride;
addWithWeight(d, srcVert, wDtt[j], length);
}
}
}
}
@ -447,14 +475,19 @@ void CudaEvalPatches(
// PERFORMANCE: not optimized at all
computePatches <<<512, 32>>>(
src, dst, NULL, NULL, length, srcStride, dstStride, 0, 0,
src, dst, NULL, NULL, NULL, NULL, NULL,
length, srcStride, dstStride, 0, 0, 0, 0, 0,
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
}
void CudaEvalPatchesWithDerivatives(
const float *src, float *dst, float *dstDu, float *dstDv,
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
const float *src, float *dst,
float *dstDu, float *dstDv,
float *dstDuu, float *dstDuv, float *dstDvv,
int length, int srcStride, int dstStride,
int dstDuStride, int dstDvStride,
int dstDuuStride, int dstDuvStride, int dstDvvStride,
int numPatchCoords, const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
@ -463,7 +496,9 @@ void CudaEvalPatchesWithDerivatives(
// PERFORMANCE: not optimized at all
computePatches <<<512, 32>>>(
src, dst, dstDu, dstDv, length, srcStride, dstStride, dstDuStride, dstDvStride,
src, dst, dstDu, dstDv, dstDuu, dstDuv, dstDvv,
length, srcStride, dstStride,
dstDuStride, dstDvStride, dstDuuStride, dstDuvStride, dstDvvStride,
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
}

View File

@ -172,6 +172,22 @@ D3D11ComputeEvaluator::Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
ID3D11DeviceContext *deviceContext) {
return Create(srcDesc, dstDesc, duDesc, dvDesc,
BufferDescriptor(),
BufferDescriptor(),
BufferDescriptor(),
deviceContext);
}
D3D11ComputeEvaluator *
D3D11ComputeEvaluator::Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
ID3D11DeviceContext *deviceContext) {
(void)deviceContext; // not used
// TODO: implements derivatives

View File

@ -102,6 +102,15 @@ public:
BufferDescriptor const &dvDesc,
ID3D11DeviceContext *deviceContext);
static D3D11ComputeEvaluator * Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
ID3D11DeviceContext *deviceContext);
/// Constructor.
D3D11ComputeEvaluator();

View File

@ -44,6 +44,10 @@ static const char *shaderSource =
template <class T> GLuint
createSSBO(std::vector<T> const & src) {
if (src.empty()) {
return 0;
}
GLuint devicePtr = 0;
glGenBuffers(1, &devicePtr);
@ -75,9 +79,11 @@ GLStencilTableSSBO::GLStencilTableSSBO(
_indices = createSSBO(stencilTable->GetControlIndices());
_weights = createSSBO(stencilTable->GetWeights());
_duWeights = _dvWeights = 0;
_duuWeights = _duvWeights = _dvvWeights = 0;
} else {
_sizes = _offsets = _indices = _weights = 0;
_duWeights = _dvWeights = 0;
_duuWeights = _duvWeights = _dvvWeights = 0;
}
}
@ -91,9 +97,13 @@ GLStencilTableSSBO::GLStencilTableSSBO(
_weights = createSSBO(limitStencilTable->GetWeights());
_duWeights = createSSBO(limitStencilTable->GetDuWeights());
_dvWeights = createSSBO(limitStencilTable->GetDvWeights());
_duuWeights = createSSBO(limitStencilTable->GetDuuWeights());
_duvWeights = createSSBO(limitStencilTable->GetDuvWeights());
_dvvWeights = createSSBO(limitStencilTable->GetDvvWeights());
} else {
_sizes = _offsets = _indices = _weights = 0;
_duWeights = _dvWeights = 0;
_duuWeights = _duvWeights = _dvvWeights = 0;
}
}
@ -104,6 +114,9 @@ GLStencilTableSSBO::~GLStencilTableSSBO() {
if (_weights) glDeleteBuffers(1, &_weights);
if (_duWeights) glDeleteBuffers(1, &_duWeights);
if (_dvWeights) glDeleteBuffers(1, &_dvWeights);
if (_duuWeights) glDeleteBuffers(1, &_duuWeights);
if (_duvWeights) glDeleteBuffers(1, &_duvWeights);
if (_dvvWeights) glDeleteBuffers(1, &_dvvWeights);
}
// ---------------------------------------------------------------------------
@ -120,8 +133,11 @@ GLComputeEvaluator::~GLComputeEvaluator() {
static GLuint
compileKernel(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const & /* duDesc */,
BufferDescriptor const & /* dvDesc */,
BufferDescriptor const & duDesc,
BufferDescriptor const & dvDesc,
BufferDescriptor const & duuDesc,
BufferDescriptor const & duvDesc,
BufferDescriptor const & dvvDesc,
const char *kernelDefine,
int workGroupSize) {
GLuint program = glCreateProgram();
@ -139,6 +155,16 @@ compileKernel(BufferDescriptor const &srcDesc,
<< "#define WORK_GROUP_SIZE " << workGroupSize << "\n"
<< kernelDefine << "\n"
<< patchBasisShaderSourceDefine << "\n";
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
if (deriv1) {
defines << "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n";
}
if (deriv2) {
defines << "#define OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES\n";
}
std::string defineStr = defines.str();
const char *shaderSources[4] = {"#version 430\n", 0, 0, 0};
@ -175,16 +201,23 @@ bool
GLComputeEvaluator::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc) {
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc) {
// create a stencil kernel
if (!_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc,
if (!_stencilKernel.Compile(srcDesc, dstDesc,
duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
_workGroupSize)) {
return false;
}
// create a patch kernel
if (!_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc,
if (!_patchKernel.Compile(srcDesc, dstDesc,
duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
_workGroupSize)) {
return false;
}
@ -214,6 +247,40 @@ GLComputeEvaluator::EvalStencils(
GLuint dvWeightsBuffer,
int start, int end) const {
return EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
0, BufferDescriptor(),
0, BufferDescriptor(),
0, BufferDescriptor(),
sizesBuffer, offsetsBuffer, indicesBuffer,
weightsBuffer,
duWeightsBuffer, dvWeightsBuffer,
0, 0, 0,
start, end);
}
bool
GLComputeEvaluator::EvalStencils(
GLuint srcBuffer, BufferDescriptor const &srcDesc,
GLuint dstBuffer, BufferDescriptor const &dstDesc,
GLuint duBuffer, BufferDescriptor const &duDesc,
GLuint dvBuffer, BufferDescriptor const &dvDesc,
GLuint duuBuffer, BufferDescriptor const &duuDesc,
GLuint duvBuffer, BufferDescriptor const &duvDesc,
GLuint dvvBuffer, BufferDescriptor const &dvvDesc,
GLuint sizesBuffer,
GLuint offsetsBuffer,
GLuint indicesBuffer,
GLuint weightsBuffer,
GLuint duWeightsBuffer,
GLuint dvWeightsBuffer,
GLuint duuWeightsBuffer,
GLuint duvWeightsBuffer,
GLuint dvvWeightsBuffer,
int start, int end) const {
if (!_stencilKernel.program) return false;
int count = end - start;
if (count <= 0) {
@ -224,6 +291,9 @@ GLComputeEvaluator::EvalStencils(
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, sizesBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, offsetsBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, indicesBuffer);
@ -232,6 +302,12 @@ GLComputeEvaluator::EvalStencils(
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 8, duWeightsBuffer);
if (dvWeightsBuffer)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, dvWeightsBuffer);
if (duuWeightsBuffer)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 13, duuWeightsBuffer);
if (duvWeightsBuffer)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 14, duvWeightsBuffer);
if (dvvWeightsBuffer)
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 15, dvvWeightsBuffer);
glUseProgram(_stencilKernel.program);
@ -247,13 +323,25 @@ GLComputeEvaluator::EvalStencils(
glUniform3i(_stencilKernel.uniformDvDesc,
dvDesc.offset, dvDesc.length, dvDesc.stride);
}
if (_stencilKernel.uniformDuuDesc > 0) {
glUniform3i(_stencilKernel.uniformDuuDesc,
duuDesc.offset, duuDesc.length, duuDesc.stride);
}
if (_stencilKernel.uniformDuvDesc > 0) {
glUniform3i(_stencilKernel.uniformDuvDesc,
duvDesc.offset, duvDesc.length, duvDesc.stride);
}
if (_stencilKernel.uniformDvvDesc > 0) {
glUniform3i(_stencilKernel.uniformDvvDesc,
dvvDesc.offset, dvvDesc.length, dvvDesc.stride);
}
glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
glUseProgram(0);
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
for (int i = 0; i < 10; ++i) {
for (int i = 0; i < 16; ++i) {
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, 0);
}
@ -272,12 +360,44 @@ GLComputeEvaluator::EvalPatches(
GLuint patchIndexBuffer,
GLuint patchParamsBuffer) const {
return EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
0, BufferDescriptor(),
0, BufferDescriptor(),
0, BufferDescriptor(),
numPatchCoords,
patchCoordsBuffer,
patchArrays,
patchIndexBuffer,
patchParamsBuffer);
}
bool
GLComputeEvaluator::EvalPatches(
GLuint srcBuffer, BufferDescriptor const &srcDesc,
GLuint dstBuffer, BufferDescriptor const &dstDesc,
GLuint duBuffer, BufferDescriptor const &duDesc,
GLuint dvBuffer, BufferDescriptor const &dvDesc,
GLuint duuBuffer, BufferDescriptor const &duuDesc,
GLuint duvBuffer, BufferDescriptor const &duvDesc,
GLuint dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexBuffer,
GLuint patchParamsBuffer) const {
if (!_patchKernel.program) return false;
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, patchCoordsBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchIndexBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchParamsBuffer);
@ -288,8 +408,27 @@ GLComputeEvaluator::EvalPatches(
glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset);
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
(const GLint*)&patchArrays[0]);
glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride);
glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride);
if (_patchKernel.uniformDuDesc > 0) {
glUniform3i(_patchKernel.uniformDuDesc,
duDesc.offset, duDesc.length, duDesc.stride);
}
if (_patchKernel.uniformDvDesc > 0) {
glUniform3i(_patchKernel.uniformDvDesc,
dvDesc.offset, dvDesc.length, dvDesc.stride);
}
if (_patchKernel.uniformDuuDesc > 0) {
glUniform3i(_patchKernel.uniformDuuDesc,
duuDesc.offset, duuDesc.length, duuDesc.stride);
}
if (_patchKernel.uniformDuvDesc > 0) {
glUniform3i(_patchKernel.uniformDuvDesc,
duvDesc.offset, duvDesc.length, duvDesc.stride);
}
if (_patchKernel.uniformDvvDesc > 0) {
glUniform3i(_patchKernel.uniformDvvDesc,
dvvDesc.offset, dvvDesc.length, dvvDesc.stride);
}
glDispatchCompute((numPatchCoords + _workGroupSize - 1) / _workGroupSize, 1, 1);
@ -303,6 +442,10 @@ GLComputeEvaluator::EvalPatches(
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, 0);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, 0);
return true;
}
// ---------------------------------------------------------------------------
@ -320,20 +463,21 @@ GLComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
int workGroupSize) {
// create stencil kernel
if (program) {
glDeleteProgram(program);
}
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
const char *kernelDef = derivatives
? "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n"
"#define OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES\n"
: "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n";
const char * kernelDefine =
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n";
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, kernelDef,
workGroupSize);
program = compileKernel(srcDesc, dstDesc,
duDesc, dvDesc, duuDesc, duvDesc, dvvDesc,
kernelDefine, workGroupSize);
if (program == 0) return false;
// cache uniform locations (TODO: use uniform block)
@ -343,6 +487,9 @@ GLComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
uniformDstOffset = glGetUniformLocation(program, "dstOffset");
uniformDuDesc = glGetUniformLocation(program, "duDesc");
uniformDvDesc = glGetUniformLocation(program, "dvDesc");
uniformDuuDesc = glGetUniformLocation(program, "duuDesc");
uniformDuvDesc = glGetUniformLocation(program, "duvDesc");
uniformDvvDesc = glGetUniformLocation(program, "dvvDesc");
return true;
}
@ -362,20 +509,21 @@ GLComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
int workGroupSize) {
// create stencil kernel
if (program) {
glDeleteProgram(program);
}
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
const char *kernelDef = derivatives
? "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n"
"#define OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES\n"
: "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n";
const char * kernelDefine =
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n";
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, kernelDef,
workGroupSize);
program = compileKernel(srcDesc, dstDesc,
duDesc, dvDesc, duuDesc, duvDesc, dvvDesc,
kernelDefine, workGroupSize);
if (program == 0) return false;
// cache uniform locations
@ -384,6 +532,9 @@ GLComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
uniformPatchArray = glGetUniformLocation(program, "patchArray");
uniformDuDesc = glGetUniformLocation(program, "duDesc");
uniformDvDesc = glGetUniformLocation(program, "dvDesc");
uniformDuuDesc = glGetUniformLocation(program, "duuDesc");
uniformDuvDesc = glGetUniformLocation(program, "duvDesc");
uniformDvvDesc = glGetUniformLocation(program, "dvvDesc");
return true;
}

File diff suppressed because it is too large Load Diff

View File

@ -48,6 +48,10 @@ static const char *shaderSource =
template <class T> GLuint
createGLTextureBuffer(std::vector<T> const & src, GLenum type) {
if (src.empty()) {
return 0;
}
GLint size = static_cast<int>(src.size()*sizeof(T));
void const * ptr = &src.at(0);
@ -95,9 +99,11 @@ GLStencilTableTBO::GLStencilTableTBO(
stencilTable->GetControlIndices(), GL_R32I);
_weights = createGLTextureBuffer(stencilTable->GetWeights(), GL_R32F);
_duWeights = _dvWeights = 0;
_duuWeights = _duvWeights = _dvvWeights = 0;
} else {
_sizes = _offsets = _indices = _weights = 0;
_duWeights = _dvWeights = 0;
_duuWeights = _duvWeights = _dvvWeights = 0;
}
}
@ -118,9 +124,16 @@ GLStencilTableTBO::GLStencilTableTBO(
limitStencilTable->GetDuWeights(), GL_R32F);
_dvWeights = createGLTextureBuffer(
limitStencilTable->GetDvWeights(), GL_R32F);
_duuWeights = createGLTextureBuffer(
limitStencilTable->GetDuuWeights(), GL_R32F);
_duvWeights = createGLTextureBuffer(
limitStencilTable->GetDuvWeights(), GL_R32F);
_dvvWeights = createGLTextureBuffer(
limitStencilTable->GetDvvWeights(), GL_R32F);
} else {
_sizes = _offsets = _indices = _weights = 0;
_duWeights = _dvWeights = 0;
_duuWeights = _duvWeights = _dvvWeights = 0;
}
}
@ -131,11 +144,16 @@ GLStencilTableTBO::~GLStencilTableTBO() {
if (_weights) glDeleteTextures(1, &_weights);
if (_duWeights) glDeleteTextures(1, &_duWeights);
if (_dvWeights) glDeleteTextures(1, &_dvWeights);
if (_duuWeights) glDeleteTextures(1, &_duuWeights);
if (_duvWeights) glDeleteTextures(1, &_duvWeights);
if (_dvvWeights) glDeleteTextures(1, &_dvvWeights);
}
// ---------------------------------------------------------------------------
GLXFBEvaluator::GLXFBEvaluator() : _srcBufferTexture(0) {
GLXFBEvaluator::GLXFBEvaluator(bool sharedDerivativeBuffers)
: _srcBufferTexture(0),
_sharedDerivativeBuffers(sharedDerivativeBuffers) {
}
GLXFBEvaluator::~GLXFBEvaluator() {
@ -149,7 +167,11 @@ compileKernel(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
const char *kernelDefine) {
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
const char *kernelDefine,
bool sharedDerivativeBuffers) {
GLuint program = glCreateProgram();
@ -165,8 +187,25 @@ compileKernel(BufferDescriptor const &srcDesc,
<< "#define VERTEX_SHADER\n"
<< kernelDefine << "\n"
<< patchBasisShaderSourceDefine << "\n";
std::string defineStr = defines.str();
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
if (deriv1) {
defines << "#define OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES\n";
if (sharedDerivativeBuffers) {
defines <<
"#define OPENSUBDIV_GLSL_XFB_SHARED_1ST_DERIVATIVE_BUFFERS\n";
}
}
if (deriv2) {
defines << "#define OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES\n";
if (sharedDerivativeBuffers) {
defines <<
"#define OPENSUBDIV_GLSL_XFB_SHARED_2ND_DERIVATIVE_BUFFERS\n";
}
}
std::string defineStr = defines.str();
const char *shaderSources[4] = {"#version 410\n", NULL, NULL, NULL};
@ -204,41 +243,148 @@ compileKernel(BufferDescriptor const &srcDesc,
outputs.push_back("gl_SkipComponents1");
}
}
if (duDesc.length) {
//
// For derivatives, we use another buffer bindings so gl_NextBuffer
// is inserted here to switch the destination of transform feedback.
//
// Note that the destination buffers may or may not be shared between
// vertex and each derivatives. gl_NextBuffer seems still works well
// in either case.
//
//
// For derivatives, we use another buffer bindings so gl_NextBuffer
// is inserted here to switch the destination of transform feedback.
//
// Note that the destination buffers may or may not be shared between
// vertex and each derivatives. gl_NextBuffer seems still works well
// in either case.
//
// If we know that the buffers for derivatives are shared, then we
// can use fewer buffer bindings. This can be important, since most GL
// implementations will support only up to 4 transform feedback bindings.
//
if (deriv1 && sharedDerivativeBuffers) {
outputs.push_back("gl_NextBuffer");
int primvarOffset = (duDesc.offset % duDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
int primvar1Offset = (duDesc.offset % duDesc.stride);
int primvar2Offset = (dvDesc.offset % dvDesc.stride);
for (int i = 0; i < primvar1Offset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < duDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDuBuffer[%d]", i);
snprintf(attrName, sizeof(attrName), "outDeriv1Buffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + duDesc.length; i < duDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
}
if (dvDesc.length) {
outputs.push_back("gl_NextBuffer");
int primvarOffset = (dvDesc.offset % dvDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
for (int i = primvar1Offset + duDesc.length; i < primvar2Offset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < dvDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDvBuffer[%d]", i);
snprintf(attrName, sizeof(attrName), "outDeriv1Buffer[%d]", i+duDesc.length);
outputs.push_back(attrName);
}
for (int i = primvarOffset + dvDesc.length; i < dvDesc.stride; ++i) {
for (int i = primvar2Offset + dvDesc.length; i < dvDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
} else {
if (duDesc.length) {
outputs.push_back("gl_NextBuffer");
int primvarOffset = (duDesc.offset % duDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < duDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDuBuffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + duDesc.length; i < duDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
}
if (dvDesc.length) {
outputs.push_back("gl_NextBuffer");
int primvarOffset = (dvDesc.offset % dvDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < dvDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDvBuffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + dvDesc.length; i < dvDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
}
}
if (deriv2 && sharedDerivativeBuffers) {
outputs.push_back("gl_NextBuffer");
int primvar1Offset = (duuDesc.offset % duuDesc.stride);
int primvar2Offset = (duvDesc.offset % duvDesc.stride);
int primvar3Offset = (dvvDesc.offset % dvvDesc.stride);
for (int i = 0; i < primvar1Offset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < duuDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDeriv2Buffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvar1Offset + duuDesc.length; i < primvar2Offset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < duvDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDeriv2Buffer[%d]", i+duuDesc.length);
outputs.push_back(attrName);
}
for (int i = primvar2Offset + duvDesc.length; i < primvar3Offset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < dvvDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDeriv2Buffer[%d]", i+duuDesc.length+duvDesc.length);
outputs.push_back(attrName);
}
for (int i = primvar3Offset + dvvDesc.length; i < dvvDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
} else {
if (duuDesc.length) {
outputs.push_back("gl_NextBuffer");
int primvarOffset = (duuDesc.offset % duuDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < duuDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDuuBuffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + duuDesc.length; i < duuDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
}
if (duvDesc.length) {
outputs.push_back("gl_NextBuffer");
int primvarOffset = (duvDesc.offset % duvDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < duvDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDuvBuffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + duvDesc.length; i < duvDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
}
if (dvvDesc.length) {
outputs.push_back("gl_NextBuffer");
int primvarOffset = (dvvDesc.offset % dvvDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < dvvDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDvvBuffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + dvvDesc.length; i < dvvDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
}
}
// convert to char* array
std::vector<const char *> pOutputs;
@ -274,13 +420,20 @@ bool
GLXFBEvaluator::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc) {
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc) {
// create a stencil kernel
_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc);
_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
_sharedDerivativeBuffers);
// create a patch kernel
_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc);
_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
_sharedDerivativeBuffers);
// create a texture for input buffer
if (!_srcBufferTexture) {
@ -314,12 +467,46 @@ GLXFBEvaluator::EvalStencils(
GLuint dstBuffer, BufferDescriptor const &dstDesc,
GLuint duBuffer, BufferDescriptor const &duDesc,
GLuint dvBuffer, BufferDescriptor const &dvDesc,
GLuint sizesBuffer,
GLuint offsetsBuffer,
GLuint indicesBuffer,
GLuint weightsBuffer,
GLuint duWeightsBuffer,
GLuint dvWeightsBuffer,
int start, int end) const {
return EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
0, BufferDescriptor(),
0, BufferDescriptor(),
0, BufferDescriptor(),
sizesBuffer, offsetsBuffer, indicesBuffer,
weightsBuffer,
duWeightsBuffer, dvWeightsBuffer,
0, 0, 0,
start, end);
}
bool
GLXFBEvaluator::EvalStencils(
GLuint srcBuffer, BufferDescriptor const &srcDesc,
GLuint dstBuffer, BufferDescriptor const &dstDesc,
GLuint duBuffer, BufferDescriptor const &duDesc,
GLuint dvBuffer, BufferDescriptor const &dvDesc,
GLuint duuBuffer, BufferDescriptor const &duuDesc,
GLuint duvBuffer, BufferDescriptor const &duvDesc,
GLuint dvvBuffer, BufferDescriptor const &dvvDesc,
GLuint sizesTexture,
GLuint offsetsTexture,
GLuint indicesTexture,
GLuint weightsTexture,
GLuint duWeightsTexture,
GLuint dvWeightsTexture,
GLuint duuWeightsTexture,
GLuint duvWeightsTexture,
GLuint dvvWeightsTexture,
int start, int end) const {
if (!_stencilKernel.program) return false;
@ -353,6 +540,14 @@ GLXFBEvaluator::EvalStencils(
bindTexture(_stencilKernel.uniformDuWeightsTexture, duWeightsTexture, 5);
if (_stencilKernel.uniformDvWeightsTexture >= 0 && dvWeightsTexture)
bindTexture(_stencilKernel.uniformDvWeightsTexture, dvWeightsTexture, 6);
if (_stencilKernel.uniformDuWeightsTexture >= 0 && duWeightsTexture)
bindTexture(_stencilKernel.uniformDuWeightsTexture, duWeightsTexture, 5);
if (_stencilKernel.uniformDuuWeightsTexture >= 0 && duuWeightsTexture)
bindTexture(_stencilKernel.uniformDuuWeightsTexture, duuWeightsTexture, 5);
if (_stencilKernel.uniformDuvWeightsTexture >= 0 && duvWeightsTexture)
bindTexture(_stencilKernel.uniformDuvWeightsTexture, duvWeightsTexture, 6);
if (_stencilKernel.uniformDvvWeightsTexture >= 0 && dvvWeightsTexture)
bindTexture(_stencilKernel.uniformDvvWeightsTexture, dvvWeightsTexture, 6);
// set batch range
glUniform1i(_stencilKernel.uniformStart, start);
@ -392,6 +587,12 @@ GLXFBEvaluator::EvalStencils(
(duDesc.offset - (duDesc.offset % duDesc.stride)) : 0;
int dvBufferBindOffset = dvDesc.stride ?
(dvDesc.offset - (dvDesc.offset % dvDesc.stride)) : 0;
int duuBufferBindOffset = duuDesc.stride ?
(duuDesc.offset - (duuDesc.offset % duuDesc.stride)) : 0;
int duvBufferBindOffset = duvDesc.stride ?
(duvDesc.offset - (duvDesc.offset % duvDesc.stride)) : 0;
int dvvBufferBindOffset = dvvDesc.stride ?
(dvvDesc.offset - (dvvDesc.offset % dvvDesc.stride)) : 0;
// bind destination buffer
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
@ -399,18 +600,53 @@ GLXFBEvaluator::EvalStencils(
dstBufferBindOffset * sizeof(float),
count * dstDesc.stride * sizeof(float));
if (duDesc.length > 0) {
if ((duDesc.length > 0) && _sharedDerivativeBuffers) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
1, duBuffer,
duBufferBindOffset * sizeof(float),
count * duDesc.stride * sizeof(float));
} else {
if (duDesc.length > 0) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
1, duBuffer,
duBufferBindOffset * sizeof(float),
count * duDesc.stride * sizeof(float));
}
if (dvDesc.length > 0) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
2, dvBuffer,
dvBufferBindOffset * sizeof(float),
count * dvDesc.stride * sizeof(float));
}
}
if (dvDesc.length > 0) {
if ((duuDesc.length > 0) && _sharedDerivativeBuffers) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
2, dvBuffer,
dvBufferBindOffset * sizeof(float),
count * dvDesc.stride * sizeof(float));
2, duuBuffer,
duuBufferBindOffset * sizeof(float),
count * duuDesc.stride * sizeof(float));
} else {
if (duuDesc.length > 0) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
3, duuBuffer,
duuBufferBindOffset * sizeof(float),
count * duuDesc.stride * sizeof(float));
}
if (duvDesc.length > 0) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
4, duvBuffer,
duvBufferBindOffset * sizeof(float),
count * duvDesc.stride * sizeof(float));
}
if (dvvDesc.length > 0) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
5, dvvBuffer,
dvvBufferBindOffset * sizeof(float),
count * dvvDesc.stride * sizeof(float));
}
}
glBeginTransformFeedback(GL_POINTS);
@ -419,7 +655,7 @@ GLXFBEvaluator::EvalStencils(
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
for (int i = 0; i < 5; ++i) {
for (int i = 0; i < 6; ++i) {
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
@ -448,7 +684,36 @@ GLXFBEvaluator::EvalPatches(
GLuint patchIndexTexture,
GLuint patchParamTexture) const {
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
return EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
0, BufferDescriptor(),
0, BufferDescriptor(),
0, BufferDescriptor(),
numPatchCoords,
patchCoordsBuffer, patchArrays,
patchIndexTexture,
patchParamTexture);
}
bool
GLXFBEvaluator::EvalPatches(
GLuint srcBuffer, BufferDescriptor const &srcDesc,
GLuint dstBuffer, BufferDescriptor const &dstDesc,
GLuint duBuffer, BufferDescriptor const &duDesc,
GLuint dvBuffer, BufferDescriptor const &dvDesc,
GLuint duuBuffer, BufferDescriptor const &duuDesc,
GLuint duvBuffer, BufferDescriptor const &duvDesc,
GLuint dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexTexture,
GLuint patchParamTexture) const {
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
if (!_patchKernel.program) return false;
@ -493,6 +758,15 @@ GLXFBEvaluator::EvalPatches(
int dvBufferBindOffset = dvDesc.stride
? (dvDesc.offset - (dvDesc.offset % dvDesc.stride))
: 0;
int duuBufferBindOffset = duuDesc.stride
? (duuDesc.offset - (duuDesc.offset % duuDesc.stride))
: 0;
int duvBufferBindOffset = duvDesc.stride
? (duvDesc.offset - (duvDesc.offset % duvDesc.stride))
: 0;
int dvvBufferBindOffset = dvvDesc.stride
? (dvvDesc.offset - (dvvDesc.offset % dvvDesc.stride))
: 0;
// bind destination buffer
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
@ -500,7 +774,12 @@ GLXFBEvaluator::EvalPatches(
dstBufferBindOffset * sizeof(float),
numPatchCoords * dstDesc.stride * sizeof(float));
if (derivatives) {
if (deriv1 && _sharedDerivativeBuffers) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
1, duBuffer,
duBufferBindOffset * sizeof(float),
numPatchCoords * duDesc.stride * sizeof(float));
} else if (deriv1) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
1, duBuffer,
duBufferBindOffset * sizeof(float),
@ -510,7 +789,27 @@ GLXFBEvaluator::EvalPatches(
2, dvBuffer,
dvBufferBindOffset * sizeof(float),
numPatchCoords * dvDesc.stride * sizeof(float));
}
if (deriv2 && _sharedDerivativeBuffers) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
2, duuBuffer,
duuBufferBindOffset * sizeof(float),
numPatchCoords * duuDesc.stride * sizeof(float));
} else if (deriv2) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
3, duuBuffer,
duuBufferBindOffset * sizeof(float),
numPatchCoords * duuDesc.stride * sizeof(float));
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
4, duvBuffer,
duvBufferBindOffset * sizeof(float),
numPatchCoords * duvDesc.stride * sizeof(float));
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
5, dvvBuffer,
dvvBufferBindOffset * sizeof(float),
numPatchCoords * dvvDesc.stride * sizeof(float));
}
glBeginTransformFeedback(GL_POINTS);
@ -520,7 +819,7 @@ GLXFBEvaluator::EvalPatches(
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
// unbind textures
for (int i = 0; i < 3; ++i) {
for (int i = 0; i < 6; ++i) {
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
@ -536,7 +835,6 @@ GLXFBEvaluator::EvalPatches(
glBindVertexArray(0);
glDeleteVertexArrays(1, &vao);
return true;
}
@ -554,32 +852,38 @@ bool
GLXFBEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc) {
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
bool sharedDerivativeBuffers) {
// create stencil kernel
if (program) {
glDeleteProgram(program);
}
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
const char *kernelDef = derivatives
? "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS\n"
"#define OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES\n"
: "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS\n";
const char * kernelDefines =
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS\n";
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, kernelDef);
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
kernelDefines, sharedDerivativeBuffers);
if (program == 0) return false;
// cache uniform locations (TODO: use uniform block)
uniformSrcBufferTexture = glGetUniformLocation(program, "vertexBuffer");
uniformSrcOffset = glGetUniformLocation(program, "srcOffset");
uniformSizesTexture = glGetUniformLocation(program, "sizes");
uniformOffsetsTexture = glGetUniformLocation(program, "offsets");
uniformIndicesTexture = glGetUniformLocation(program, "indices");
uniformWeightsTexture = glGetUniformLocation(program, "weights");
uniformDuWeightsTexture = glGetUniformLocation(program, "duWeights");
uniformDvWeightsTexture = glGetUniformLocation(program, "dvWeights");
uniformStart = glGetUniformLocation(program, "batchStart");
uniformEnd = glGetUniformLocation(program, "batchEnd");
uniformSrcBufferTexture = glGetUniformLocation(program, "vertexBuffer");
uniformSrcOffset = glGetUniformLocation(program, "srcOffset");
uniformSizesTexture = glGetUniformLocation(program, "sizes");
uniformOffsetsTexture = glGetUniformLocation(program, "offsets");
uniformIndicesTexture = glGetUniformLocation(program, "indices");
uniformWeightsTexture = glGetUniformLocation(program, "weights");
uniformDuWeightsTexture = glGetUniformLocation(program, "duWeights");
uniformDvWeightsTexture = glGetUniformLocation(program, "dvWeights");
uniformDuuWeightsTexture = glGetUniformLocation(program, "duuWeights");
uniformDuvWeightsTexture = glGetUniformLocation(program, "duvWeights");
uniformDvvWeightsTexture = glGetUniformLocation(program, "dvvWeights");
uniformStart = glGetUniformLocation(program, "batchStart");
uniformEnd = glGetUniformLocation(program, "batchEnd");
return true;
}
@ -598,19 +902,22 @@ bool
GLXFBEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc) {
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
bool sharedDerivativeBuffers) {
// create stencil kernel
if (program) {
glDeleteProgram(program);
}
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
const char *kernelDef = derivatives
? "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES\n"
"#define OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES\n"
: "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES\n";
const char * kernelDefines =
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES\n";
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, kernelDef);
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
kernelDefines, sharedDerivativeBuffers);
if (program == 0) return false;
// cache uniform locations
@ -623,7 +930,6 @@ GLXFBEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

File diff suppressed because it is too large Load Diff

View File

@ -37,13 +37,22 @@ layout(binding=1) buffer dst_buffer { float dstVertexBuffer[]; };
// derivative buffers (if needed)
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
uniform ivec3 duDesc;
uniform ivec3 dvDesc;
layout(binding=2) buffer du_buffer { float duBuffer[]; };
layout(binding=3) buffer dv_buffer { float dvBuffer[]; };
#endif
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
uniform ivec3 duuDesc;
uniform ivec3 duvDesc;
uniform ivec3 dvvDesc;
layout(binding=10) buffer duu_buffer { float duuBuffer[]; };
layout(binding=11) buffer duv_buffer { float duvBuffer[]; };
layout(binding=12) buffer dvv_buffer { float dvvBuffer[]; };
#endif
// stencil buffers
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
@ -55,11 +64,17 @@ layout(binding=5) buffer stencilOffsets { int _offsets[]; };
layout(binding=6) buffer stencilIndices { int _indices[]; };
layout(binding=7) buffer stencilWeights { float _weights[]; };
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
layout(binding=8) buffer stencilDuWeights { float _duWeights[]; };
layout(binding=9) buffer stencilDvWeights { float _dvWeights[]; };
#endif
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
layout(binding=13) buffer stencilDuuWeights { float _duuWeights[]; };
layout(binding=14) buffer stencilDuvWeights { float _duvWeights[]; };
layout(binding=15) buffer stencilDvvWeights { float _dvvWeights[]; };
#endif
#endif
// patch buffers
@ -119,7 +134,7 @@ void addWithWeight(inout Vertex v, const Vertex src, float weight) {
}
}
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
void writeDu(int index, Vertex du) {
int duIndex = duDesc.x + index * duDesc.z;
for (int i = 0; i < LENGTH; ++i) {
@ -135,6 +150,29 @@ void writeDv(int index, Vertex dv) {
}
#endif
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
void writeDuu(int index, Vertex duu) {
int duuIndex = duuDesc.x + index * duuDesc.z;
for (int i = 0; i < LENGTH; ++i) {
duuBuffer[duuIndex + i] = duu.vertexData[i];
}
}
void writeDuv(int index, Vertex duv) {
int duvIndex = duvDesc.x + index * duvDesc.z;
for (int i = 0; i < LENGTH; ++i) {
duvBuffer[duvIndex + i] = duv.vertexData[i];
}
}
void writeDvv(int index, Vertex dvv) {
int dvvIndex = dvvDesc.x + index * dvvDesc.z;
for (int i = 0; i < LENGTH; ++i) {
dvvBuffer[dvvIndex + i] = dvv.vertexData[i];
}
}
#endif
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
@ -160,7 +198,7 @@ void main() {
writeVertex(current, dst);
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
Vertex du, dv;
clear(du);
clear(dv);
@ -178,6 +216,29 @@ void main() {
writeDv(current, dv);
}
#endif
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
Vertex duu, duv, dvv;
clear(duu);
clear(duv);
clear(dvv);
for (int i=0; i<size; ++i) {
// expects the compiler optimizes readVertex out here.
Vertex src = readVertex(_indices[offset+i]);
addWithWeight(duu, src, _duuWeights[offset+i]);
addWithWeight(duv, src, _duvWeights[offset+i]);
addWithWeight(dvv, src, _dvvWeights[offset+i]);
}
if (duuDesc.y > 0) { // length
writeDuu(current, duu);
}
if (duvDesc.y > 0) {
writeDuv(current, duv);
}
if (dvvDesc.y > 0) {
writeDvv(current, dvv);
}
#endif
}
#endif
@ -260,6 +321,9 @@ void main() {
wP[i] = wP4[i];
wDs[i] = wDs4[i];
wDt[i] = wDt4[i];
wDss[i] = wDss4[i];
wDst[i] = wDst4[i];
wDtt[i] = wDtt4[i];
}
} else if (patchType == 6) {
float wP16[16], wDs16[16], wDt16[16], wDss16[16], wDst16[16], wDtt16[16];
@ -269,16 +333,22 @@ void main() {
wP[i] = wP16[i];
wDs[i] = wDs16[i];
wDt[i] = wDt16[i];
wDss[i] = wDss16[i];
wDst[i] = wDst16[i];
wDtt[i] = wDtt16[i];
}
} else if (patchType == 9) {
OsdGetGregoryPatchWeights(uv.s, uv.t, dScale, wP, wDs, wDt, wDss, wDst, wDtt);
numControlVertices = 20;
}
Vertex dst, du, dv;
Vertex dst, du, dv, duu, duv, dvv;
clear(dst);
clear(du);
clear(dv);
clear(duu);
clear(duv);
clear(dvv);
int indexStride = getNumControlVertices(array.x);
int indexBase = array.z + indexStride * (patchIndex - array.w);
@ -288,10 +358,13 @@ void main() {
addWithWeight(dst, readVertex(index), wP[cv]);
addWithWeight(du, readVertex(index), wDs[cv]);
addWithWeight(dv, readVertex(index), wDt[cv]);
addWithWeight(duu, readVertex(index), wDss[cv]);
addWithWeight(duv, readVertex(index), wDst[cv]);
addWithWeight(dvv, readVertex(index), wDtt[cv]);
}
writeVertex(current, dst);
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
if (duDesc.y > 0) { // length
writeDu(current, du);
}
@ -299,6 +372,17 @@ void main() {
writeDv(current, dv);
}
#endif
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
if (duuDesc.y > 0) { // length
writeDuu(current, duu);
}
if (duvDesc.y > 0) { // length
writeDuv(current, duv);
}
if (dvvDesc.y > 0) {
writeDvv(current, dvv);
}
#endif
}
#endif

View File

@ -63,7 +63,22 @@ void writeVertex(Vertex v) {
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES) && \
defined(OPENSUBDIV_GLSL_XFB_SHARED_1ST_DERIVATIVE_BUFFERS)
out float outDeriv1Buffer[2*LENGTH];
void writeDu(Vertex v) {
for(int i = 0; i < LENGTH; i++) {
outDeriv1Buffer[i] = v.vertexData[i];
}
}
void writeDv(Vertex v) {
for(int i = 0; i < LENGTH; i++) {
outDeriv1Buffer[i+LENGTH] = v.vertexData[i];
}
}
#elif defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES)
out float outDuBuffer[LENGTH];
out float outDvBuffer[LENGTH];
@ -80,6 +95,51 @@ void writeDv(Vertex v) {
}
#endif
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES) && \
defined(OPENSUBDIV_GLSL_XFB_SHARED_2ND_DERIVATIVE_BUFFERS)
out float outDeriv2Buffer[3*LENGTH];
void writeDuu(Vertex v) {
for(int i = 0; i < LENGTH; i++) {
outDeriv2Buffer[i] = v.vertexData[i];
}
}
void writeDuv(Vertex v) {
for(int i = 0; i < LENGTH; i++) {
outDeriv2Buffer[i+LENGTH] = v.vertexData[i];
}
}
void writeDvv(Vertex v) {
for(int i = 0; i < LENGTH; i++) {
outDeriv2Buffer[i+2*LENGTH] = v.vertexData[i];
}
}
#elif defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES)
out float outDuuBuffer[LENGTH];
out float outDuvBuffer[LENGTH];
out float outDvvBuffer[LENGTH];
void writeDuu(Vertex v) {
for(int i = 0; i < LENGTH; i++) {
outDuuBuffer[i] = v.vertexData[i];
}
}
void writeDuv(Vertex v) {
for(int i = 0; i < LENGTH; i++) {
outDuvBuffer[i] = v.vertexData[i];
}
}
void writeDvv(Vertex v) {
for(int i = 0; i < LENGTH; i++) {
outDvvBuffer[i] = v.vertexData[i];
}
}
#endif
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS)
@ -89,11 +149,17 @@ uniform isamplerBuffer offsets;
uniform isamplerBuffer indices;
uniform samplerBuffer weights;
#if defined(OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES)
uniform samplerBuffer duWeights;
uniform samplerBuffer dvWeights;
#endif
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES)
uniform samplerBuffer duuWeights;
uniform samplerBuffer duvWeights;
uniform samplerBuffer dvvWeights;
#endif
uniform int batchStart = 0;
uniform int batchEnd = 0;
@ -104,10 +170,13 @@ void main() {
return;
}
Vertex dst, du, dv;
Vertex dst, du, dv, duu, duv, dvv;
clear(dst);
clear(du);
clear(dv);
clear(duu);
clear(duv);
clear(dvv);
int offset = texelFetch(offsets, current).x;
uint size = texelFetch(sizes, current).x;
@ -117,19 +186,32 @@ void main() {
float weight = texelFetch(weights, offset+stencil).x;
addWithWeight(dst, readVertex( index ), weight);
#if defined(OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES)
float duWeight = texelFetch(duWeights, offset+stencil).x;
float dvWeight = texelFetch(dvWeights, offset+stencil).x;
addWithWeight(du, readVertex(index), duWeight);
addWithWeight(dv, readVertex(index), dvWeight);
#endif
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES)
float duuWeight = texelFetch(duuWeights, offset+stencil).x;
float duvWeight = texelFetch(duvWeights, offset+stencil).x;
float dvvWeight = texelFetch(dvvWeights, offset+stencil).x;
addWithWeight(duu, readVertex(index), duuWeight);
addWithWeight(duv, readVertex(index), duvWeight);
addWithWeight(dvv, readVertex(index), dvvWeight);
#endif
}
writeVertex(dst);
#if defined(OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES)
writeDu(du);
writeDv(dv);
#endif
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES)
writeDuu(duu);
writeDuv(duv);
writeDvv(dvv);
#endif
}
#endif
@ -213,31 +295,43 @@ void main() {
int numControlVertices = 0;
if (patchType == 3) {
float wP4[4], wDs4[4], wDt4[4], wDss4[4], wDst4[4], wDtt4[4];
OsdGetBilinearPatchWeights(coord.s, coord.t, dScale, wP4, wDs4, wDt4, wDss4, wDst4, wDtt4);
OsdGetBilinearPatchWeights(coord.s, coord.t, dScale, wP4,
wDs4, wDt4, wDss4, wDst4, wDtt4);
numControlVertices = 4;
for (int i=0; i<numControlVertices; ++i) {
wP[i] = wP4[i];
wDs[i] = wDs4[i];
wDt[i] = wDt4[i];
wDss[i] = wDss4[i];
wDst[i] = wDst4[i];
wDtt[i] = wDtt4[i];
}
} else if (patchType == 6) {
float wP16[16], wDs16[16], wDt16[16], wDss16[16], wDst16[16], wDtt16[16];
OsdGetBSplinePatchWeights(coord.s, coord.t, dScale, boundary, wP16, wDs16, wDt16, wDss16, wDst16, wDtt16);
OsdGetBSplinePatchWeights(coord.s, coord.t, dScale, boundary, wP16,
wDs16, wDt16, wDss16, wDst16, wDtt16);
numControlVertices = 16;
for (int i=0; i<numControlVertices; ++i) {
wP[i] = wP16[i];
wDs[i] = wDs16[i];
wDt[i] = wDt16[i];
wDss[i] = wDss16[i];
wDst[i] = wDst16[i];
wDtt[i] = wDtt16[i];
}
} else if (patchType == 9) {
OsdGetGregoryPatchWeights(coord.s, coord.t, dScale, wP, wDs, wDt, wDss, wDst, wDtt);
OsdGetGregoryPatchWeights(coord.s, coord.t, dScale, wP,
wDs, wDt, wDss, wDst, wDtt);
numControlVertices = 20;
}
Vertex dst, du, dv;
Vertex dst, du, dv, duu, duv, dvv;
clear(dst);
clear(du);
clear(dv);
clear(duu);
clear(duv);
clear(dvv);
int indexStride = getNumControlVertices(array.x);
int indexBase = array.z + indexStride * (patchIndex - array.w);
@ -247,15 +341,22 @@ void main() {
addWithWeight(dst, readVertex(index), wP[cv]);
addWithWeight(du, readVertex(index), wDs[cv]);
addWithWeight(dv, readVertex(index), wDt[cv]);
addWithWeight(duu, readVertex(index), wDss[cv]);
addWithWeight(duv, readVertex(index), wDst[cv]);
addWithWeight(dvv, readVertex(index), wDtt[cv]);
}
writeVertex(dst);
#if defined(OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES)
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES)
writeDu(du);
writeDv(dv);
#endif
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES)
writeDuu(duu);
writeDuv(duv);
writeDvv(dvv);
#endif
}
#endif

View File

@ -197,7 +197,7 @@ OmpEvaluator::EvalPatches(
#pragma omp parallel for
for (int i = 0; i < numPatchCoords; ++i) {
float wP[20], wDs[20], wDt[20];
float wP[20], wDu[20], wDv[20];
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
BufferAdapter<float> duT(du + duDesc.stride*i, duDesc.length, duDesc.stride);
BufferAdapter<float> dvT(dv + dvDesc.stride*i, dvDesc.length, dvDesc.stride);
@ -214,15 +214,15 @@ OmpEvaluator::EvalPatches(
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(param,
coord.s, coord.t, wP, wDs, wDt);
coord.s, coord.t, wP, wDu, wDv);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(param,
coord.s, coord.t, wP, wDs, wDt);
coord.s, coord.t, wP, wDu, wDv);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(param,
coord.s, coord.t, wP, wDs, wDt);
coord.s, coord.t, wP, wDu, wDv);
numControlVertices = 4;
} else {
continue;
@ -239,8 +239,8 @@ OmpEvaluator::EvalPatches(
dvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
duT.AddWithWeight(srcT[cvs[j]], wDs[j]);
dvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
duT.AddWithWeight(srcT[cvs[j]], wDu[j]);
dvT.AddWithWeight(srcT[cvs[j]], wDv[j]);
}
++dstT;
++duT;
@ -249,6 +249,101 @@ OmpEvaluator::EvalPatches(
return true;
}
/* static */
bool
OmpEvaluator::EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer) {
src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
if (duu) duu += duuDesc.offset;
if (duv) duv += duvDesc.offset;
if (dvv) dvv += dvvDesc.offset;
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
#pragma omp parallel for
for (int i = 0; i < numPatchCoords; ++i) {
float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
BufferAdapter<float> duT(du + duDesc.stride*i, duDesc.length, duDesc.stride);
BufferAdapter<float> dvT(dv + dvDesc.stride*i, dvDesc.length, dvDesc.stride);
BufferAdapter<float> duuT(duu + duuDesc.stride*i, duuDesc.length, duuDesc.stride);
BufferAdapter<float> duvT(duv + duvDesc.stride*i, duvDesc.length, duvDesc.stride);
BufferAdapter<float> dvvT(dvv + dvvDesc.stride*i, dvvDesc.length, dvvDesc.stride);
PatchCoord const &coord = patchCoords[i];
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
Far::PatchParam const & param =
patchParamBuffer[coord.handle.patchIndex];
int patchType = param.IsRegular()
? Far::PatchDescriptor::REGULAR
: array.GetPatchType();
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(param,
coord.s, coord.t, wP,
wDu, wDv, wDuu, wDuv, wDvv);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(param,
coord.s, coord.t, wP,
wDu, wDv, wDuu, wDuv, wDvv);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(param,
coord.s, coord.t, wP,
wDu, wDv, wDuu, wDuv, wDvv);
numControlVertices = 4;
} else {
continue;
}
int indexStride = Far::PatchDescriptor(array.GetPatchType()).GetNumControlVertices();
int indexBase = array.GetIndexBase() + indexStride *
(coord.handle.patchIndex - array.GetPrimitiveIdBase());
const int *cvs = &patchIndexBuffer[indexBase];
dstT.Clear();
duT.Clear();
dvT.Clear();
duuT.Clear();
duvT.Clear();
dvvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
duT.AddWithWeight(srcT[cvs[j]], wDu[j]);
dvT.AddWithWeight(srcT[cvs[j]], wDv[j]);
duuT.AddWithWeight(srcT[cvs[j]], wDuu[j]);
duvT.AddWithWeight(srcT[cvs[j]], wDuv[j]);
dvvT.AddWithWeight(srcT[cvs[j]], wDvv[j]);
}
++dstT;
++duT;
++dvT;
++duuT;
++duvT;
++dvvT;
}
return true;
}
/* static */
void

View File

@ -244,6 +244,177 @@ public:
const float * dvWeights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the omp kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the omp kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
STENCIL_TABLE const *stencilTable,
const OmpEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
&stencilTable->GetDuuWeights()[0],
&stencilTable->GetDuvWeights()[0],
&stencilTable->GetDvvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw CPU pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output pointer derivative wrt u. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dv Output pointer derivative wrt v. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duu Output pointer 2nd derivative wrt u. An offset of
/// duuDesc will be applied internally.
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
/// duvDesc will be applied internally.
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
/// dvvDesc will be applied internally.
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param duuWeights pointer to the duu-weights buffer of the stencil table
///
/// @param duvWeights pointer to the duv-weights buffer of the stencil table
///
/// @param dvvWeights pointer to the dvv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
const float * duuWeights,
const float * duvWeights,
const float * dvvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
@ -373,6 +544,102 @@ public:
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the omp evaluator
///
/// @param deviceContext not used in the omp evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
OmpEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
// ideally all buffer classes should have templated by datatype
// so that downcast isn't needed there.
// (e.g. Osd::CpuBuffer<PatchCoord> )
//
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
@ -457,6 +724,72 @@ public:
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output pointer derivative wrt u. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dv Output pointer derivative wrt v. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duu Output pointer 2nd derivative wrt u. An offset of
/// duuDesc will be applied internally.
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
/// duvDesc will be applied internally.
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
/// dvvDesc will be applied internally.
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
@ -508,6 +841,164 @@ public:
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the omp evaluator
///
/// @param deviceContext not used in the omp evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
OmpEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetVaryingPatchArrayBuffer(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the omp evaluator
///
/// @param deviceContext not used in the omp evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
OmpEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetVaryingPatchArrayBuffer(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
@ -562,6 +1053,170 @@ public:
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param fvarChannel face-varying channel
///
/// @param instance not used in the omp evaluator
///
/// @param deviceContext not used in the omp evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
OmpEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param fvarChannel face-varying channel
///
/// @param instance not used in the omp evaluator
///
/// @param deviceContext not used in the omp evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
OmpEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// ----------------------------------------------------------------------
///
/// Other methods

View File

@ -177,6 +177,99 @@ OmpEvalStencils(float const * src, BufferDescriptor const &srcDesc,
}
void
OmpEvalStencils(float const * src, BufferDescriptor const &srcDesc,
float * dst, BufferDescriptor const &dstDesc,
float * dstDu, BufferDescriptor const &dstDuDesc,
float * dstDv, BufferDescriptor const &dstDvDesc,
float * dstDuu, BufferDescriptor const &dstDuuDesc,
float * dstDuv, BufferDescriptor const &dstDuvDesc,
float * dstDvv, BufferDescriptor const &dstDvvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
float const * duuWeights,
float const * duvWeights,
float const * dvvWeights,
int start, int end) {
start = (start > 0 ? start : 0);
src += srcDesc.offset;
dst += dstDesc.offset;
dstDu += dstDuDesc.offset;
dstDv += dstDvDesc.offset;
dstDuu += dstDuuDesc.offset;
dstDuv += dstDuvDesc.offset;
dstDvv += dstDvvDesc.offset;
int numThreads = omp_get_max_threads();
int n = end - start;
float * result = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
float * resultDu = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
float * resultDv = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
float * resultDuu = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
float * resultDuv = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
float * resultDvv = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
#pragma omp parallel for
for (int i = 0; i < n; ++i) {
int index = i + start; // Stencil index
// Get thread-local pointers
int const * threadIndices = indices + offsets[index];
float const * threadWeights = weights + offsets[index];
float const * threadWeightsDu = duWeights + offsets[index];
float const * threadWeightsDv = dvWeights + offsets[index];
float const * threadWeightsDuu = duuWeights + offsets[index];
float const * threadWeightsDuv = duvWeights + offsets[index];
float const * threadWeightsDvv = dvvWeights + offsets[index];
int threadId = omp_get_thread_num();
float * threadResult = result + threadId*srcDesc.length;
float * threadResultDu = resultDu + threadId*srcDesc.length;
float * threadResultDv = resultDv + threadId*srcDesc.length;
float * threadResultDuu = resultDuu + threadId*srcDesc.length;
float * threadResultDuv = resultDuv + threadId*srcDesc.length;
float * threadResultDvv = resultDvv + threadId*srcDesc.length;
clear(threadResult, dstDesc);
clear(threadResultDu, dstDuDesc);
clear(threadResultDv, dstDvDesc);
clear(threadResultDuu, dstDuuDesc);
clear(threadResultDuv, dstDuvDesc);
clear(threadResultDvv, dstDvvDesc);
for (int j=0; j<(int)sizes[index]; ++j) {
addWithWeight(threadResult, src,
threadIndices[j], threadWeights[j], srcDesc);
addWithWeight(threadResultDu, src,
threadIndices[j], threadWeightsDu[j], srcDesc);
addWithWeight(threadResultDv, src,
threadIndices[j], threadWeightsDv[j], srcDesc);
addWithWeight(threadResultDuu, src,
threadIndices[j], threadWeightsDuu[j], srcDesc);
addWithWeight(threadResultDuv, src,
threadIndices[j], threadWeightsDuv[j], srcDesc);
addWithWeight(threadResultDvv, src,
threadIndices[j], threadWeightsDvv[j], srcDesc);
}
copy(dst, i, threadResult, dstDesc);
copy(dstDu, i, threadResultDu, dstDuDesc);
copy(dstDv, i, threadResultDv, dstDvDesc);
copy(dstDuu, i, threadResultDuu, dstDuuDesc);
copy(dstDuv, i, threadResultDuv, dstDuvDesc);
copy(dstDvv, i, threadResultDvv, dstDvvDesc);
}
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -56,6 +56,25 @@ OmpEvalStencils(float const * src, BufferDescriptor const &srcDesc,
float const * dvWeights,
int start, int end);
void
OmpEvalStencils(float const * src, BufferDescriptor const &srcDesc,
float * dst, BufferDescriptor const &dstDesc,
float * dstDu, BufferDescriptor const &dstDuDesc,
float * dstDv, BufferDescriptor const &dstDvDesc,
float * dstDuu, BufferDescriptor const &dstDuuDesc,
float * dstDuv, BufferDescriptor const &dstDuvDesc,
float * dstDvv, BufferDescriptor const &dstDvvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
float const * duuWeights,
float const * duvWeights,
float const * dvvWeights,
int start, int end);
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION

View File

@ -75,8 +75,55 @@ TbbEvaluator::EvalStencils(
dst, dstDesc,
du, duDesc,
dv, dvDesc,
NULL, BufferDescriptor(),
NULL, BufferDescriptor(),
NULL, BufferDescriptor(),
sizes, offsets, indices,
weights, duWeights, dvWeights, NULL, NULL, NULL,
start, end);
return true;
}
/* static */
bool
TbbEvaluator::EvalStencils(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
const float * duuWeights,
const float * duvWeights,
const float * dvvWeights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
if (srcDesc.length != duDesc.length) return false;
if (srcDesc.length != dvDesc.length) return false;
if (srcDesc.length != duuDesc.length) return false;
if (srcDesc.length != duvDesc.length) return false;
if (srcDesc.length != dvvDesc.length) return false;
TbbEvalStencils(src, srcDesc,
dst, dstDesc,
du, duDesc,
dv, dvDesc,
duu, duuDesc,
duv, duvDesc,
dvv, dvvDesc,
sizes, offsets, indices,
weights, duWeights, dvWeights,
duuWeights, duvWeights, dvvWeights,
start, end);
return true;
@ -96,6 +143,9 @@ TbbEvaluator::EvalPatches(
if (srcDesc.length != dstDesc.length) return false;
TbbEvalPatches(src, srcDesc, dst, dstDesc,
NULL, BufferDescriptor(),
NULL, BufferDescriptor(),
NULL, BufferDescriptor(),
NULL, BufferDescriptor(),
NULL, BufferDescriptor(),
numPatchCoords, patchCoords,
@ -121,6 +171,36 @@ TbbEvaluator::EvalPatches(
TbbEvalPatches(src, srcDesc, dst, dstDesc,
du, duDesc, dv, dvDesc,
NULL, BufferDescriptor(),
NULL, BufferDescriptor(),
NULL, BufferDescriptor(),
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
return true;
}
/* static */
bool
TbbEvaluator::EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
if (srcDesc.length != dstDesc.length) return false;
TbbEvalPatches(src, srcDesc, dst, dstDesc,
du, duDesc, dv, dvDesc,
duu, duuDesc, duv, duvDesc, dvv, dvvDesc,
numPatchCoords, patchCoords,
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);

View File

@ -244,6 +244,177 @@ public:
const float * dvWeights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the tbb kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the tbb kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
STENCIL_TABLE const *stencilTable,
const TbbEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
&stencilTable->GetDuuWeights()[0],
&stencilTable->GetDuvWeights()[0],
&stencilTable->GetDvvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw CPU pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output pointer derivative wrt u. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dv Output pointer derivative wrt v. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duu Output pointer 2nd derivative wrt u. An offset of
/// duuDesc will be applied internally.
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
/// duvDesc will be applied internally.
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
/// dvvDesc will be applied internally.
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param duuWeights pointer to the duu-weights buffer of the stencil table
///
/// @param duvWeights pointer to the duv-weights buffer of the stencil table
///
/// @param dvvWeights pointer to the dvv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
const float * duuWeights,
const float * duvWeights,
const float * dvvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
@ -373,6 +544,102 @@ public:
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
TbbEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
// ideally all buffer classes should have templated by datatype
// so that downcast isn't needed there.
// (e.g. Osd::CpuBuffer<PatchCoord> )
//
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
@ -457,6 +724,72 @@ public:
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output pointer derivative wrt u. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dv Output pointer derivative wrt v. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duu Output pointer 2nd derivative wrt u. An offset of
/// duuDesc will be applied internally.
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
/// duvDesc will be applied internally.
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
/// dvvDesc will be applied internally.
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
@ -508,6 +841,164 @@ public:
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
TbbEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetVaryingPatchArrayBuffer(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
TbbEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetVaryingPatchArrayBuffer(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
@ -562,6 +1053,170 @@ public:
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param fvarChannel face-varying channel
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
TbbEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param fvarChannel face-varying channel
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
TbbEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
duuBuffer->BindCpuBuffer(), duuDesc,
duvBuffer->BindCpuBuffer(), duvDesc,
dvvBuffer->BindCpuBuffer(), dvvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel));
}
/// ----------------------------------------------------------------------
///
/// Other methods

View File

@ -219,6 +219,78 @@ TbbEvalStencils(float const * src, BufferDescriptor const &srcDesc,
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
}
void
TbbEvalStencils(float const * src, BufferDescriptor const &srcDesc,
float * dst, BufferDescriptor const &dstDesc,
float * du, BufferDescriptor const &duDesc,
float * dv, BufferDescriptor const &dvDesc,
float * duu, BufferDescriptor const &duuDesc,
float * duv, BufferDescriptor const &duvDesc,
float * dvv, BufferDescriptor const &dvvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
float const * duuWeights,
float const * duvWeights,
float const * dvvWeights,
int start, int end) {
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
if (duu) duu += duuDesc.offset;
if (duv) duv += duvDesc.offset;
if (dvv) dvv += dvvDesc.offset;
// PERFORMANCE: need to combine 3 launches together
if (dst) {
TBBStencilKernel kernel(src, srcDesc, dst, dstDesc,
sizes, offsets, indices, weights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
if (du) {
TBBStencilKernel kernel(src, srcDesc, du, duDesc,
sizes, offsets, indices, duWeights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
if (dv) {
TBBStencilKernel kernel(src, srcDesc, dv, dvDesc,
sizes, offsets, indices, dvWeights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
if (duu) {
TBBStencilKernel kernel(src, srcDesc, duu, duuDesc,
sizes, offsets, indices, duuWeights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
if (duv) {
TBBStencilKernel kernel(src, srcDesc, duv, duvDesc,
sizes, offsets, indices, duvWeights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
if (dvv) {
TBBStencilKernel kernel(src, srcDesc, dvv, dvvDesc,
sizes, offsets, indices, dvvWeights);
tbb::blocked_range<int> range(start, end, grain_size);
tbb::parallel_for(range, kernel);
}
}
// ---------------------------------------------------------------------------
@ -257,10 +329,16 @@ class TbbEvalPatchesKernel {
BufferDescriptor _dstDesc;
BufferDescriptor _dstDuDesc;
BufferDescriptor _dstDvDesc;
BufferDescriptor _dstDuuDesc;
BufferDescriptor _dstDuvDesc;
BufferDescriptor _dstDvvDesc;
float const * _src;
float * _dst;
float * _dstDu;
float * _dstDv;
float * _dstDuu;
float * _dstDuv;
float * _dstDvv;
int _numPatchCoords;
const PatchCoord *_patchCoords;
const PatchArray *_patchArrayBuffer;
@ -272,6 +350,9 @@ public:
float *dst, BufferDescriptor dstDesc,
float *dstDu, BufferDescriptor dstDuDesc,
float *dstDv, BufferDescriptor dstDvDesc,
float *dstDuu, BufferDescriptor dstDuuDesc,
float *dstDuv, BufferDescriptor dstDuvDesc,
float *dstDvv, BufferDescriptor dstDvvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
@ -279,7 +360,10 @@ public:
const PatchParam *patchParamBuffer) :
_srcDesc(srcDesc), _dstDesc(dstDesc),
_dstDuDesc(dstDuDesc), _dstDvDesc(dstDvDesc),
_src(src), _dst(dst), _dstDu(dstDu), _dstDv(dstDv),
_dstDuuDesc(dstDuuDesc), _dstDuvDesc(dstDuvDesc), _dstDvvDesc(dstDvvDesc),
_src(src), _dst(dst),
_dstDu(dstDu), _dstDv(dstDv),
_dstDuu(dstDuu), _dstDuv(dstDuv), _dstDvv(dstDvv),
_numPatchCoords(numPatchCoords),
_patchCoords(patchCoords),
_patchArrayBuffer(patchArrayBuffer),
@ -290,13 +374,15 @@ public:
void operator() (tbb::blocked_range<int> const &r) const {
if (_dstDu == NULL && _dstDv == NULL) {
compute(r);
} else if (_dstDuu == NULL && _dstDuv == NULL && _dstDvv == NULL) {
computeWith1stDerivative(r);
} else {
computeWithDerivative(r);
computeWith2ndDerivative(r);
}
}
void compute(tbb::blocked_range<int> const &r) const {
float wP[20], wDs[20], wDt[20];
float wP[20], wDu[20], wDv[20];
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
_srcDesc.length,
_srcDesc.stride);
@ -305,10 +391,66 @@ public:
_dstDesc.length,
_dstDesc.stride);
BufferAdapter<float> dstDuT(_dstDu,
for (int i = r.begin(); i < r.end(); ++i) {
PatchCoord const &coord = _patchCoords[i];
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
Far::PatchParam const & param =
_patchParamBuffer[coord.handle.patchIndex];
int patchType = param.IsRegular()
? Far::PatchDescriptor::REGULAR
: array.GetPatchType();
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(param,
coord.s, coord.t, wP,
wDu, wDv);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(param,
coord.s, coord.t, wP,
wDu, wDv);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(param,
coord.s, coord.t, wP,
wDu, wDv);
numControlVertices = 4;
} else {
assert(0);
}
int indexStride = Far::PatchDescriptor(array.GetPatchType()).GetNumControlVertices();
int indexBase = array.GetIndexBase() + indexStride *
(coord.handle.patchIndex - array.GetPrimitiveIdBase());
const int *cvs = &_patchIndexBuffer[indexBase];
dstT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
}
++dstT;
}
}
void computeWith1stDerivative(tbb::blocked_range<int> const &r) const {
float wP[20], wDu[20], wDv[20];
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
_srcDesc.length,
_srcDesc.stride);
BufferAdapter<float> dstT(_dst + _dstDesc.offset
+ r.begin() * _dstDesc.stride,
_dstDesc.length,
_dstDesc.stride);
BufferAdapter<float> dstDuT(_dstDu + _dstDuDesc.offset
+ r.begin() * _dstDuDesc.stride,
_dstDuDesc.length,
_dstDuDesc.stride);
BufferAdapter<float> dstDvT(_dstDv,
BufferAdapter<float> dstDvT(_dstDv + _dstDvDesc.offset
+ r.begin() * _dstDvDesc.stride,
_dstDvDesc.length,
_dstDvDesc.stride);
@ -325,74 +467,18 @@ public:
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(param,
coord.s, coord.t, wP, wDs, wDt);
coord.s, coord.t, wP,
wDu, wDv);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(param,
coord.s, coord.t, wP, wDs, wDt);
coord.s, coord.t, wP,
wDu, wDv);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(param,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 4;
} else {
assert(0);
}
int indexStride = Far::PatchDescriptor(array.GetPatchType()).GetNumControlVertices();
int indexBase = array.GetIndexBase() + indexStride *
(coord.handle.patchIndex - array.GetPrimitiveIdBase());
const int *cvs = &_patchIndexBuffer[indexBase];
dstT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
}
++dstT;
}
}
void computeWithDerivative(tbb::blocked_range<int> const &r) const {
float wP[20], wDs[20], wDt[20];
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
_srcDesc.length,
_srcDesc.stride);
BufferAdapter<float> dstT(_dst + _dstDesc.offset
+ r.begin() * _dstDesc.stride,
_dstDesc.length,
_dstDesc.stride);
BufferAdapter<float> dstDuT(_dstDu + _dstDuDesc.offset
+ r.begin() * _dstDuDesc.stride,
_dstDuDesc.length,
_dstDuDesc.stride);
BufferAdapter<float> dstDvT(_dstDv + _dstDvDesc.offset
+ r.begin() * _dstDvDesc.stride,
_dstDvDesc.length,
_dstDvDesc.stride);
for (int i = r.begin(); i < r.end(); ++i) {
PatchCoord const &coord = _patchCoords[i];
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
Far::PatchParam const & param =
_patchParamBuffer[coord.handle.patchIndex];
int patchType = param.IsRegular()
? Far::PatchDescriptor::REGULAR
: array.GetPatchType();
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(param,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(param,
coord.s, coord.t, wP, wDs, wDt);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(param,
coord.s, coord.t, wP, wDs, wDt);
coord.s, coord.t,
wP, wDu, wDv);
numControlVertices = 4;
} else {
assert(0);
@ -409,14 +495,103 @@ public:
dstDvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
dstDuT.AddWithWeight(srcT[cvs[j]], wDs[j]);
dstDvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
dstDuT.AddWithWeight(srcT[cvs[j]], wDu[j]);
dstDvT.AddWithWeight(srcT[cvs[j]], wDv[j]);
}
++dstT;
++dstDuT;
++dstDvT;
}
}
void computeWith2ndDerivative(tbb::blocked_range<int> const &r) const {
float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
_srcDesc.length,
_srcDesc.stride);
BufferAdapter<float> dstT(_dst + _dstDesc.offset
+ r.begin() * _dstDesc.stride,
_dstDesc.length,
_dstDesc.stride);
BufferAdapter<float> dstDuT(_dstDu + _dstDuDesc.offset
+ r.begin() * _dstDuDesc.stride,
_dstDuDesc.length,
_dstDuDesc.stride);
BufferAdapter<float> dstDvT(_dstDv + _dstDvDesc.offset
+ r.begin() * _dstDvDesc.stride,
_dstDvDesc.length,
_dstDvDesc.stride);
BufferAdapter<float> dstDuuT(_dstDuu + _dstDuuDesc.offset
+ r.begin() * _dstDuuDesc.stride,
_dstDuuDesc.length,
_dstDuuDesc.stride);
BufferAdapter<float> dstDuvT(_dstDuv + _dstDuvDesc.offset
+ r.begin() * _dstDuvDesc.stride,
_dstDuvDesc.length,
_dstDuvDesc.stride);
BufferAdapter<float> dstDvvT(_dstDvv + _dstDvvDesc.offset
+ r.begin() * _dstDvvDesc.stride,
_dstDvvDesc.length,
_dstDvvDesc.stride);
for (int i = r.begin(); i < r.end(); ++i) {
PatchCoord const &coord = _patchCoords[i];
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
Far::PatchParam const & param =
_patchParamBuffer[coord.handle.patchIndex];
int patchType = param.IsRegular()
? Far::PatchDescriptor::REGULAR
: array.GetPatchType();
int numControlVertices = 0;
if (patchType == Far::PatchDescriptor::REGULAR) {
Far::internal::GetBSplineWeights(param,
coord.s, coord.t, wP,
wDu, wDv, wDuu, wDuv, wDvv);
numControlVertices = 16;
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
Far::internal::GetGregoryWeights(param,
coord.s, coord.t, wP,
wDu, wDv, wDuu, wDuv, wDvv);
numControlVertices = 20;
} else if (patchType == Far::PatchDescriptor::QUADS) {
Far::internal::GetBilinearWeights(param,
coord.s, coord.t, wP,
wDu, wDv, wDuu, wDuv, wDvv);
numControlVertices = 4;
} else {
assert(0);
}
int indexStride = Far::PatchDescriptor(array.GetPatchType()).GetNumControlVertices();
int indexBase = array.GetIndexBase() + indexStride *
(coord.handle.patchIndex - array.GetPrimitiveIdBase());
const int *cvs = &_patchIndexBuffer[indexBase];
dstT.Clear();
dstDuT.Clear();
dstDvT.Clear();
dstDuuT.Clear();
dstDuvT.Clear();
dstDvvT.Clear();
for (int j = 0; j < numControlVertices; ++j) {
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
dstDuT.AddWithWeight(srcT[cvs[j]], wDu[j]);
dstDvT.AddWithWeight(srcT[cvs[j]], wDv[j]);
dstDuuT.AddWithWeight(srcT[cvs[j]], wDuu[j]);
dstDuvT.AddWithWeight(srcT[cvs[j]], wDuv[j]);
dstDvvT.AddWithWeight(srcT[cvs[j]], wDvv[j]);
}
++dstT;
++dstDuT;
++dstDvT;
++dstDuuT;
++dstDuvT;
++dstDvvT;
}
}
};
@ -433,6 +608,39 @@ TbbEvalPatches(float const *src, BufferDescriptor const &srcDesc,
TbbEvalPatchesKernel kernel(src, srcDesc, dst, dstDesc,
dstDu, dstDuDesc, dstDv, dstDvDesc,
NULL, BufferDescriptor(),
NULL, BufferDescriptor(),
NULL, BufferDescriptor(),
numPatchCoords, patchCoords,
patchArrayBuffer,
patchIndexBuffer,
patchParamBuffer);
tbb::blocked_range<int> range(0, numPatchCoords, grain_size);
tbb::parallel_for(range, kernel);
}
void
TbbEvalPatches(float const *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *dstDu, BufferDescriptor const &dstDuDesc,
float *dstDv, BufferDescriptor const &dstDvDesc,
float *dstDuu, BufferDescriptor const &dstDuuDesc,
float *dstDuv, BufferDescriptor const &dstDuvDesc,
float *dstDvv, BufferDescriptor const &dstDvvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
TbbEvalPatchesKernel kernel(src, srcDesc, dst, dstDesc,
dstDu, dstDuDesc, dstDv, dstDvDesc,
dstDuu, dstDuuDesc,
dstDuv, dstDuvDesc,
dstDvv, dstDvvDesc,
numPatchCoords, patchCoords,
patchArrayBuffer,
patchIndexBuffer,

View File

@ -61,6 +61,25 @@ TbbEvalStencils(float const * src, BufferDescriptor const &srcDesc,
float const * dvWeights,
int start, int end);
void
TbbEvalStencils(float const * src, BufferDescriptor const &srcDesc,
float * dst, BufferDescriptor const &dstDesc,
float * dstDu, BufferDescriptor const &dstDuDesc,
float * dstDv, BufferDescriptor const &dstDvDesc,
float * dstDuu, BufferDescriptor const &dstDuuDesc,
float * dstDuv, BufferDescriptor const &dstDuvDesc,
float * dstDvv, BufferDescriptor const &dstDvvDesc,
int const * sizes,
int const * offsets,
int const * indices,
float const * weights,
float const * duWeights,
float const * dvWeights,
float const * duuWeights,
float const * duvWeights,
float const * dvvWeights,
int start, int end);
void
TbbEvalPatches(float const *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
@ -72,6 +91,20 @@ TbbEvalPatches(float const *src, BufferDescriptor const &srcDesc,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
void
TbbEvalPatches(float const *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *dstDu, BufferDescriptor const &dstDuDesc,
float *dstDv, BufferDescriptor const &dstDvDesc,
float *dstDuu, BufferDescriptor const &dstDuuDesc,
float *dstDuv, BufferDescriptor const &dstDuvDesc,
float *dstDvv, BufferDescriptor const &dstDvvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrayBuffer,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION