mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2025-01-09 00:00:18 +00:00
Added missing Osd Evaluator methods for deriv eval
Now that Far::LimitStencilTable and Far::PatchTable support evaluation of 1st and 2nd derivatives the Osd Evaluator API for evaluating stencils and patches has been updated to match.
This commit is contained in:
parent
432b6b235f
commit
fe38ad8cda
@ -49,6 +49,10 @@ static const char *patchBasisSource =
|
||||
|
||||
template <class T> cl_mem
|
||||
createCLBuffer(std::vector<T> const & src, cl_context clContext) {
|
||||
if (src.empty()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
cl_int errNum = 0;
|
||||
cl_mem devicePtr = clCreateBuffer(clContext,
|
||||
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
|
||||
@ -76,9 +80,11 @@ CLStencilTable::CLStencilTable(Far::StencilTable const *stencilTable,
|
||||
clContext);
|
||||
_weights = createCLBuffer(stencilTable->GetWeights(), clContext);
|
||||
_duWeights = _dvWeights = NULL;
|
||||
_duuWeights = _duvWeights = _dvvWeights = NULL;
|
||||
} else {
|
||||
_sizes = _offsets = _indices = _weights = NULL;
|
||||
_duWeights = _dvWeights = NULL;
|
||||
_duuWeights = _duvWeights = _dvvWeights = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -96,9 +102,16 @@ CLStencilTable::CLStencilTable(Far::LimitStencilTable const *limitStencilTable,
|
||||
limitStencilTable->GetDuWeights(), clContext);
|
||||
_dvWeights = createCLBuffer(
|
||||
limitStencilTable->GetDvWeights(), clContext);
|
||||
_duuWeights = createCLBuffer(
|
||||
limitStencilTable->GetDuuWeights(), clContext);
|
||||
_duvWeights = createCLBuffer(
|
||||
limitStencilTable->GetDuvWeights(), clContext);
|
||||
_dvvWeights = createCLBuffer(
|
||||
limitStencilTable->GetDvvWeights(), clContext);
|
||||
} else {
|
||||
_sizes = _offsets = _indices = _weights = NULL;
|
||||
_duWeights = _dvWeights = NULL;
|
||||
_duuWeights = _duvWeights = _dvvWeights = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -109,6 +122,9 @@ CLStencilTable::~CLStencilTable() {
|
||||
if (_weights) clReleaseMemObject(_weights);
|
||||
if (_duWeights) clReleaseMemObject(_duWeights);
|
||||
if (_dvWeights) clReleaseMemObject(_dvWeights);
|
||||
if (_duuWeights) clReleaseMemObject(_duuWeights);
|
||||
if (_duvWeights) clReleaseMemObject(_duvWeights);
|
||||
if (_dvvWeights) clReleaseMemObject(_dvvWeights);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -130,7 +146,10 @@ bool
|
||||
CLEvaluator::Compile(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const & /*duDesc*/,
|
||||
BufferDescriptor const & /*dvDesc*/) {
|
||||
BufferDescriptor const & /*dvDesc*/,
|
||||
BufferDescriptor const & /*duuDesc*/,
|
||||
BufferDescriptor const & /*duvDesc*/,
|
||||
BufferDescriptor const & /*dvvDesc*/) {
|
||||
if (srcDesc.length > dstDesc.length) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR,
|
||||
"srcDesc length must be less than or equal to "
|
||||
@ -263,6 +282,7 @@ CLEvaluator::EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
|
||||
|
||||
size_t globalWorkSize = (size_t)(end - start);
|
||||
|
||||
BufferDescriptor empty;
|
||||
clSetKernelArg(_stencilDerivKernel, 0, sizeof(cl_mem), &src);
|
||||
clSetKernelArg(_stencilDerivKernel, 1, sizeof(int), &srcDesc.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 2, sizeof(cl_mem), &dst);
|
||||
@ -273,14 +293,26 @@ CLEvaluator::EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
|
||||
clSetKernelArg(_stencilDerivKernel, 7, sizeof(cl_mem), &dv);
|
||||
clSetKernelArg(_stencilDerivKernel, 8, sizeof(int), &dvDesc.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 9, sizeof(int), &dvDesc.stride);
|
||||
clSetKernelArg(_stencilDerivKernel, 10, sizeof(cl_mem), &sizes);
|
||||
clSetKernelArg(_stencilDerivKernel, 11, sizeof(cl_mem), &offsets);
|
||||
clSetKernelArg(_stencilDerivKernel, 12, sizeof(cl_mem), &indices);
|
||||
clSetKernelArg(_stencilDerivKernel, 13, sizeof(cl_mem), &weights);
|
||||
clSetKernelArg(_stencilDerivKernel, 14, sizeof(cl_mem), &duWeights);
|
||||
clSetKernelArg(_stencilDerivKernel, 15, sizeof(cl_mem), &dvWeights);
|
||||
clSetKernelArg(_stencilDerivKernel, 16, sizeof(int), &start);
|
||||
clSetKernelArg(_stencilDerivKernel, 17, sizeof(int), &end);
|
||||
clSetKernelArg(_stencilDerivKernel, 10, sizeof(cl_mem), NULL);
|
||||
clSetKernelArg(_stencilDerivKernel, 11, sizeof(int), &empty.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 12, sizeof(int), &empty.stride);
|
||||
clSetKernelArg(_stencilDerivKernel, 13, sizeof(cl_mem), NULL);
|
||||
clSetKernelArg(_stencilDerivKernel, 14, sizeof(int), &empty.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 15, sizeof(int), &empty.stride);
|
||||
clSetKernelArg(_stencilDerivKernel, 16, sizeof(cl_mem), NULL);
|
||||
clSetKernelArg(_stencilDerivKernel, 17, sizeof(int), &empty.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 18, sizeof(int), &empty.stride);
|
||||
clSetKernelArg(_stencilDerivKernel, 19, sizeof(cl_mem), &sizes);
|
||||
clSetKernelArg(_stencilDerivKernel, 20, sizeof(cl_mem), &offsets);
|
||||
clSetKernelArg(_stencilDerivKernel, 21, sizeof(cl_mem), &indices);
|
||||
clSetKernelArg(_stencilDerivKernel, 22, sizeof(cl_mem), &weights);
|
||||
clSetKernelArg(_stencilDerivKernel, 23, sizeof(cl_mem), &duWeights);
|
||||
clSetKernelArg(_stencilDerivKernel, 24, sizeof(cl_mem), &dvWeights);
|
||||
clSetKernelArg(_stencilDerivKernel, 25, sizeof(cl_mem), NULL);
|
||||
clSetKernelArg(_stencilDerivKernel, 26, sizeof(cl_mem), NULL);
|
||||
clSetKernelArg(_stencilDerivKernel, 27, sizeof(cl_mem), NULL);
|
||||
clSetKernelArg(_stencilDerivKernel, 28, sizeof(int), &start);
|
||||
clSetKernelArg(_stencilDerivKernel, 29, sizeof(int), &end);
|
||||
|
||||
cl_int errNum = clEnqueueNDRangeKernel(
|
||||
_clCommandQueue, _stencilDerivKernel, 1, NULL,
|
||||
@ -292,9 +324,80 @@ CLEvaluator::EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (endEvent == NULL)
|
||||
{
|
||||
clFinish(_clCommandQueue);
|
||||
if (endEvent == NULL) {
|
||||
clFinish(_clCommandQueue);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CLEvaluator::EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
|
||||
cl_mem dst, BufferDescriptor const &dstDesc,
|
||||
cl_mem du, BufferDescriptor const &duDesc,
|
||||
cl_mem dv, BufferDescriptor const &dvDesc,
|
||||
cl_mem duu, BufferDescriptor const &duuDesc,
|
||||
cl_mem duv, BufferDescriptor const &duvDesc,
|
||||
cl_mem dvv, BufferDescriptor const &dvvDesc,
|
||||
cl_mem sizes,
|
||||
cl_mem offsets,
|
||||
cl_mem indices,
|
||||
cl_mem weights,
|
||||
cl_mem duWeights,
|
||||
cl_mem dvWeights,
|
||||
cl_mem duuWeights,
|
||||
cl_mem duvWeights,
|
||||
cl_mem dvvWeights,
|
||||
int start, int end,
|
||||
unsigned int numStartEvents,
|
||||
const cl_event* startEvents,
|
||||
cl_event* endEvent) const {
|
||||
if (end <= start) return true;
|
||||
|
||||
size_t globalWorkSize = (size_t)(end - start);
|
||||
|
||||
clSetKernelArg(_stencilDerivKernel, 0, sizeof(cl_mem), &src);
|
||||
clSetKernelArg(_stencilDerivKernel, 1, sizeof(int), &srcDesc.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 2, sizeof(cl_mem), &dst);
|
||||
clSetKernelArg(_stencilDerivKernel, 3, sizeof(int), &dstDesc.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 4, sizeof(cl_mem), &du);
|
||||
clSetKernelArg(_stencilDerivKernel, 5, sizeof(int), &duDesc.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 6, sizeof(int), &duDesc.stride);
|
||||
clSetKernelArg(_stencilDerivKernel, 7, sizeof(cl_mem), &dv);
|
||||
clSetKernelArg(_stencilDerivKernel, 8, sizeof(int), &dvDesc.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 9, sizeof(int), &dvDesc.stride);
|
||||
clSetKernelArg(_stencilDerivKernel, 10, sizeof(cl_mem), &duu);
|
||||
clSetKernelArg(_stencilDerivKernel, 11, sizeof(int), &duuDesc.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 12, sizeof(int), &duuDesc.stride);
|
||||
clSetKernelArg(_stencilDerivKernel, 13, sizeof(cl_mem), &duv);
|
||||
clSetKernelArg(_stencilDerivKernel, 14, sizeof(int), &duvDesc.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 15, sizeof(int), &duvDesc.stride);
|
||||
clSetKernelArg(_stencilDerivKernel, 16, sizeof(cl_mem), &dvv);
|
||||
clSetKernelArg(_stencilDerivKernel, 17, sizeof(int), &dvvDesc.offset);
|
||||
clSetKernelArg(_stencilDerivKernel, 18, sizeof(int), &dvvDesc.stride);
|
||||
clSetKernelArg(_stencilDerivKernel, 19, sizeof(cl_mem), &sizes);
|
||||
clSetKernelArg(_stencilDerivKernel, 20, sizeof(cl_mem), &offsets);
|
||||
clSetKernelArg(_stencilDerivKernel, 21, sizeof(cl_mem), &indices);
|
||||
clSetKernelArg(_stencilDerivKernel, 22, sizeof(cl_mem), &weights);
|
||||
clSetKernelArg(_stencilDerivKernel, 23, sizeof(cl_mem), &duWeights);
|
||||
clSetKernelArg(_stencilDerivKernel, 24, sizeof(cl_mem), &dvWeights);
|
||||
clSetKernelArg(_stencilDerivKernel, 25, sizeof(cl_mem), &duuWeights);
|
||||
clSetKernelArg(_stencilDerivKernel, 26, sizeof(cl_mem), &duvWeights);
|
||||
clSetKernelArg(_stencilDerivKernel, 27, sizeof(cl_mem), &dvvWeights);
|
||||
clSetKernelArg(_stencilDerivKernel, 28, sizeof(int), &start);
|
||||
clSetKernelArg(_stencilDerivKernel, 29, sizeof(int), &end);
|
||||
|
||||
cl_int errNum = clEnqueueNDRangeKernel(
|
||||
_clCommandQueue, _stencilDerivKernel, 1, NULL,
|
||||
&globalWorkSize, NULL, numStartEvents, startEvents, endEvent);
|
||||
|
||||
if (errNum != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR,
|
||||
"ApplyStencilKernel (%d) ", errNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (endEvent == NULL) {
|
||||
clFinish(_clCommandQueue);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -315,6 +418,66 @@ CLEvaluator::EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
|
||||
|
||||
size_t globalWorkSize = (size_t)(numPatchCoords);
|
||||
|
||||
BufferDescriptor empty;
|
||||
clSetKernelArg(_patchKernel, 0, sizeof(cl_mem), &src);
|
||||
clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst);
|
||||
clSetKernelArg(_patchKernel, 3, sizeof(int), &dstDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 4, sizeof(cl_mem), &du);
|
||||
clSetKernelArg(_patchKernel, 5, sizeof(int), &duDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 6, sizeof(int), &duDesc.stride);
|
||||
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
|
||||
clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
|
||||
clSetKernelArg(_patchKernel, 10, sizeof(cl_mem), NULL);
|
||||
clSetKernelArg(_patchKernel, 11, sizeof(int), &empty.offset);
|
||||
clSetKernelArg(_patchKernel, 12, sizeof(int), &empty.stride);
|
||||
clSetKernelArg(_patchKernel, 13, sizeof(cl_mem), NULL);
|
||||
clSetKernelArg(_patchKernel, 14, sizeof(int), &empty.offset);
|
||||
clSetKernelArg(_patchKernel, 15, sizeof(int), &empty.stride);
|
||||
clSetKernelArg(_patchKernel, 16, sizeof(cl_mem), NULL);
|
||||
clSetKernelArg(_patchKernel, 17, sizeof(int), &empty.offset);
|
||||
clSetKernelArg(_patchKernel, 18, sizeof(int), &empty.stride);
|
||||
clSetKernelArg(_patchKernel, 19, sizeof(cl_mem), &patchCoordsBuffer);
|
||||
clSetKernelArg(_patchKernel, 20, sizeof(cl_mem), &patchArrayBuffer);
|
||||
clSetKernelArg(_patchKernel, 21, sizeof(cl_mem), &patchIndexBuffer);
|
||||
clSetKernelArg(_patchKernel, 22, sizeof(cl_mem), &patchParamBuffer);
|
||||
|
||||
cl_int errNum = clEnqueueNDRangeKernel(
|
||||
_clCommandQueue, _patchKernel, 1, NULL,
|
||||
&globalWorkSize, NULL, numStartEvents, startEvents, endEvent);
|
||||
|
||||
if (errNum != CL_SUCCESS) {
|
||||
Far::Error(Far::FAR_RUNTIME_ERROR,
|
||||
"ApplyPatchKernel (%d) ", errNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (endEvent == NULL) {
|
||||
clFinish(_clCommandQueue);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CLEvaluator::EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
|
||||
cl_mem dst, BufferDescriptor const &dstDesc,
|
||||
cl_mem du, BufferDescriptor const &duDesc,
|
||||
cl_mem dv, BufferDescriptor const &dvDesc,
|
||||
cl_mem duu, BufferDescriptor const &duuDesc,
|
||||
cl_mem duv, BufferDescriptor const &duvDesc,
|
||||
cl_mem dvv, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
cl_mem patchCoordsBuffer,
|
||||
cl_mem patchArrayBuffer,
|
||||
cl_mem patchIndexBuffer,
|
||||
cl_mem patchParamBuffer,
|
||||
unsigned int numStartEvents,
|
||||
const cl_event* startEvents,
|
||||
cl_event* endEvent) const {
|
||||
|
||||
size_t globalWorkSize = (size_t)(numPatchCoords);
|
||||
|
||||
clSetKernelArg(_patchKernel, 0, sizeof(cl_mem), &src);
|
||||
clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst);
|
||||
@ -325,10 +488,19 @@ CLEvaluator::EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
|
||||
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
|
||||
clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
|
||||
clSetKernelArg(_patchKernel, 10, sizeof(cl_mem), &patchCoordsBuffer);
|
||||
clSetKernelArg(_patchKernel, 11, sizeof(cl_mem), &patchArrayBuffer);
|
||||
clSetKernelArg(_patchKernel, 12, sizeof(cl_mem), &patchIndexBuffer);
|
||||
clSetKernelArg(_patchKernel, 13, sizeof(cl_mem), &patchParamBuffer);
|
||||
clSetKernelArg(_patchKernel, 10, sizeof(cl_mem), &duu);
|
||||
clSetKernelArg(_patchKernel, 11, sizeof(int), &duuDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 12, sizeof(int), &duuDesc.stride);
|
||||
clSetKernelArg(_patchKernel, 13, sizeof(cl_mem), &duv);
|
||||
clSetKernelArg(_patchKernel, 14, sizeof(int), &duvDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 15, sizeof(int), &duvDesc.stride);
|
||||
clSetKernelArg(_patchKernel, 16, sizeof(cl_mem), &dvv);
|
||||
clSetKernelArg(_patchKernel, 17, sizeof(int), &dvvDesc.offset);
|
||||
clSetKernelArg(_patchKernel, 18, sizeof(int), &dvvDesc.stride);
|
||||
clSetKernelArg(_patchKernel, 19, sizeof(cl_mem), &patchCoordsBuffer);
|
||||
clSetKernelArg(_patchKernel, 20, sizeof(cl_mem), &patchArrayBuffer);
|
||||
clSetKernelArg(_patchKernel, 21, sizeof(cl_mem), &patchIndexBuffer);
|
||||
clSetKernelArg(_patchKernel, 22, sizeof(cl_mem), &patchParamBuffer);
|
||||
|
||||
cl_int errNum = clEnqueueNDRangeKernel(
|
||||
_clCommandQueue, _patchKernel, 1, NULL,
|
||||
@ -340,15 +512,13 @@ CLEvaluator::EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (endEvent == NULL)
|
||||
{
|
||||
clFinish(_clCommandQueue);
|
||||
if (endEvent == NULL) {
|
||||
clFinish(_clCommandQueue);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* static */
|
||||
void
|
||||
CLEvaluator::Synchronize(cl_command_queue clCommandQueue) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -99,12 +99,18 @@ __kernel void computeStencilsDerivatives(
|
||||
__global float * dst, int dstOffset,
|
||||
__global float * du, int duOffset, int duStride,
|
||||
__global float * dv, int dvOffset, int dvStride,
|
||||
__global float * duu, int duuOffset, int duuStride,
|
||||
__global float * duv, int duvOffset, int duvStride,
|
||||
__global float * dvv, int dvvOffset, int dvvStride,
|
||||
__global int * sizes,
|
||||
__global int * offsets,
|
||||
__global int * indices,
|
||||
__global float * weights,
|
||||
__global float * duWeights,
|
||||
__global float * dvWeights,
|
||||
__global float * duuWeights,
|
||||
__global float * duvWeights,
|
||||
__global float * dvvWeights,
|
||||
int batchStart, int batchEnd) {
|
||||
|
||||
int current = get_global_id(0) + batchStart;
|
||||
@ -113,10 +119,13 @@ __kernel void computeStencilsDerivatives(
|
||||
return;
|
||||
}
|
||||
|
||||
struct Vertex v, vdu, vdv;
|
||||
struct Vertex v, vdu, vdv, vduu, vduv, vdvv;
|
||||
clear(&v);
|
||||
clear(&vdu);
|
||||
clear(&vdv);
|
||||
clear(&vduu);
|
||||
clear(&vduv);
|
||||
clear(&vdvv);
|
||||
|
||||
int size = sizes[current],
|
||||
offset = offsets[current];
|
||||
@ -125,6 +134,9 @@ __kernel void computeStencilsDerivatives(
|
||||
if (dst) dst += dstOffset;
|
||||
if (du) du += duOffset;
|
||||
if (dv) dv += dvOffset;
|
||||
if (duu) duu += duuOffset;
|
||||
if (duv) duv += duvOffset;
|
||||
if (dvv) dvv += dvvOffset;
|
||||
|
||||
for (int i=0; i<size; ++i) {
|
||||
int ofs = offset + i;
|
||||
@ -132,11 +144,17 @@ __kernel void computeStencilsDerivatives(
|
||||
if (weights) addWithWeight( &v, src, vid, weights[ofs]);
|
||||
if (duWeights) addWithWeight(&vdu, src, vid, duWeights[ofs]);
|
||||
if (dvWeights) addWithWeight(&vdv, src, vid, dvWeights[ofs]);
|
||||
if (duuWeights) addWithWeight(&vduu, src, vid, duuWeights[ofs]);
|
||||
if (duvWeights) addWithWeight(&vduv, src, vid, duvWeights[ofs]);
|
||||
if (dvvWeights) addWithWeight(&vdvv, src, vid, dvvWeights[ofs]);
|
||||
}
|
||||
|
||||
if (dst) writeVertex (dst, current, &v);
|
||||
if (du) writeVertexStride(du, current, &vdu, duStride);
|
||||
if (dv) writeVertexStride(dv, current, &vdv, dvStride);
|
||||
if (duu) writeVertexStride(duu, current, &vduu, duuStride);
|
||||
if (duv) writeVertexStride(duv, current, &vduv, duvStride);
|
||||
if (dvv) writeVertexStride(dvv, current, &vdvv, dvvStride);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -205,6 +223,9 @@ __kernel void computePatches(__global float *src, int srcOffset,
|
||||
__global float *dst, int dstOffset,
|
||||
__global float *du, int duOffset, int duStride,
|
||||
__global float *dv, int dvOffset, int dvStride,
|
||||
__global float *duu, int duuOffset, int duuStride,
|
||||
__global float *duv, int duvOffset, int duvStride,
|
||||
__global float *dvv, int dvvOffset, int dvvStride,
|
||||
__global struct PatchCoord *patchCoords,
|
||||
__global struct PatchArray *patchArrayBuffer,
|
||||
__global int *patchIndexBuffer,
|
||||
@ -213,8 +234,11 @@ __kernel void computePatches(__global float *src, int srcOffset,
|
||||
|
||||
if (src) src += srcOffset;
|
||||
if (dst) dst += dstOffset;
|
||||
if (du) du += duOffset;
|
||||
if (dv) dv += dvOffset;
|
||||
if (du) du += duOffset;
|
||||
if (dv) dv += dvOffset;
|
||||
if (duu) duu += duuOffset;
|
||||
if (duv) duv += duvOffset;
|
||||
if (dvv) dvv += dvvOffset;
|
||||
|
||||
struct PatchCoord coord = patchCoords[current];
|
||||
struct PatchArray array = patchArrayBuffer[coord.arrayIndex];
|
||||
@ -274,5 +298,31 @@ __kernel void computePatches(__global float *src, int srcOffset,
|
||||
}
|
||||
writeVertexStride(dv, current, &vdv, dvStride);
|
||||
}
|
||||
|
||||
if (duu) {
|
||||
struct Vertex vduu;
|
||||
clear(&vduu);
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(&vduu, src, index, wDss[i]);
|
||||
}
|
||||
writeVertexStride(duu, current, &vduu, duuStride);
|
||||
}
|
||||
if (duv) {
|
||||
struct Vertex vduv;
|
||||
clear(&vduv);
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(&vduv, src, index, wDst[i]);
|
||||
}
|
||||
writeVertexStride(duv, current, &vduv, duvStride);
|
||||
}
|
||||
if (dvv) {
|
||||
struct Vertex vdvv;
|
||||
clear(&vdvv);
|
||||
for (int i = 0; i < numControlVertices; ++i) {
|
||||
int index = patchIndexBuffer[indexBase + i];
|
||||
addWithWeight(&vdvv, src, index, wDtt[i]);
|
||||
}
|
||||
writeVertexStride(dvv, current, &vdvv, dvvStride);
|
||||
}
|
||||
}
|
||||
|
@ -82,6 +82,48 @@ CpuEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CpuEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
const float * duuWeights,
|
||||
const float * duvWeights,
|
||||
const float * dvvWeights,
|
||||
int start, int end) {
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
if (srcDesc.length != duDesc.length) return false;
|
||||
if (srcDesc.length != dvDesc.length) return false;
|
||||
if (srcDesc.length != duuDesc.length) return false;
|
||||
if (srcDesc.length != duvDesc.length) return false;
|
||||
if (srcDesc.length != dvvDesc.length) return false;
|
||||
|
||||
CpuEvalStencils(src, srcDesc,
|
||||
dst, dstDesc,
|
||||
du, duDesc,
|
||||
dv, dvDesc,
|
||||
duu, duuDesc,
|
||||
duv, duvDesc,
|
||||
dvv, dvvDesc,
|
||||
sizes, offsets, indices,
|
||||
weights, duWeights, dvWeights,
|
||||
duuWeights, duvWeights, dvvWeights,
|
||||
start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct BufferAdapter {
|
||||
BufferAdapter(T *p, int length, int stride) :
|
||||
@ -264,6 +306,120 @@ CpuEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CpuEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
if (src) {
|
||||
src += srcDesc.offset;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
if (dst) {
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
dst += dstDesc.offset;
|
||||
}
|
||||
if (du) {
|
||||
du += duDesc.offset;
|
||||
if (srcDesc.length != duDesc.length) return false;
|
||||
}
|
||||
if (dv) {
|
||||
dv += dvDesc.offset;
|
||||
if (srcDesc.length != dvDesc.length) return false;
|
||||
}
|
||||
if (duu) {
|
||||
duu += duuDesc.offset;
|
||||
if (srcDesc.length != duuDesc.length) return false;
|
||||
}
|
||||
if (duv) {
|
||||
duv += duvDesc.offset;
|
||||
if (srcDesc.length != duvDesc.length) return false;
|
||||
}
|
||||
if (dvv) {
|
||||
dvv += dvvDesc.offset;
|
||||
if (srcDesc.length != dvvDesc.length) return false;
|
||||
}
|
||||
|
||||
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
|
||||
BufferAdapter<float> dstT(dst, dstDesc.length, dstDesc.stride);
|
||||
BufferAdapter<float> duT(du, duDesc.length, duDesc.stride);
|
||||
BufferAdapter<float> dvT(dv, dvDesc.length, dvDesc.stride);
|
||||
BufferAdapter<float> duuT(duu, duuDesc.length, duuDesc.stride);
|
||||
BufferAdapter<float> duvT(duv, duvDesc.length, duvDesc.stride);
|
||||
BufferAdapter<float> dvvT(dvv, dvvDesc.length, dvvDesc.stride);
|
||||
|
||||
float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
|
||||
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
|
||||
|
||||
Far::PatchParam const & param =
|
||||
patchParamBuffer[coord.handle.patchIndex];
|
||||
int patchType = param.IsRegular()
|
||||
? Far::PatchDescriptor::REGULAR
|
||||
: array.GetPatchType();
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(param,
|
||||
coord.s, coord.t, wP, wDu, wDv,
|
||||
wDuu, wDuv, wDvv);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(param,
|
||||
coord.s, coord.t, wP, wDu, wDv,
|
||||
wDuu, wDuv, wDvv);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(param,
|
||||
coord.s, coord.t, wP, wDu, wDv,
|
||||
wDuu, wDuv, wDvv);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
int indexStride = Far::PatchDescriptor(array.GetPatchType()).GetNumControlVertices();
|
||||
int indexBase = array.GetIndexBase() + indexStride *
|
||||
(coord.handle.patchIndex - array.GetPrimitiveIdBase());
|
||||
|
||||
const int *cvs = &patchIndexBuffer[indexBase];
|
||||
|
||||
dstT.Clear();
|
||||
duT.Clear();
|
||||
dvT.Clear();
|
||||
duuT.Clear();
|
||||
duvT.Clear();
|
||||
dvvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
duT.AddWithWeight (srcT[cvs[j]], wDu[j]);
|
||||
dvT.AddWithWeight (srcT[cvs[j]], wDv[j]);
|
||||
duuT.AddWithWeight (srcT[cvs[j]], wDuu[j]);
|
||||
duvT.AddWithWeight (srcT[cvs[j]], wDuv[j]);
|
||||
dvvT.AddWithWeight (srcT[cvs[j]], wDvv[j]);
|
||||
}
|
||||
++dstT;
|
||||
++duT;
|
||||
++dvT;
|
||||
++duuT;
|
||||
++duvT;
|
||||
++dvvT;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
|
@ -244,6 +244,177 @@ public:
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CpuEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
&stencilTable->GetDuWeights()[0],
|
||||
&stencilTable->GetDvWeights()[0],
|
||||
&stencilTable->GetDuuWeights()[0],
|
||||
&stencilTable->GetDuvWeights()[0],
|
||||
&stencilTable->GetDvvWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw CPU pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output pointer derivative wrt u. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dv Output pointer derivative wrt v. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duu Output pointer 2nd derivative wrt u. An offset of
|
||||
/// duuDesc will be applied internally.
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
|
||||
/// duvDesc will be applied internally.
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
|
||||
/// dvvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param duuWeights pointer to the duu-weights buffer of the stencil table
|
||||
///
|
||||
/// @param duvWeights pointer to the duv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvvWeights pointer to the dvv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
const float * duuWeights,
|
||||
const float * duvWeights,
|
||||
const float * dvvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
@ -373,6 +544,102 @@ public:
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CpuEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
|
||||
// ideally all buffer classes should have templated by datatype
|
||||
// so that downcast isn't needed there.
|
||||
// (e.g. Osd::CpuBuffer<PatchCoord> )
|
||||
//
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
@ -457,6 +724,72 @@ public:
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output pointer derivative wrt u. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dv Output pointer derivative wrt v. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duu Output pointer 2nd derivative wrt u. An offset of
|
||||
/// duuDesc will be applied internally.
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
|
||||
/// duvDesc will be applied internally.
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
|
||||
/// dvvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
@ -508,6 +841,164 @@ public:
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CpuEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetVaryingPatchArrayBuffer(),
|
||||
patchTable->GetVaryingPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CpuEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetVaryingPatchArrayBuffer(),
|
||||
patchTable->GetVaryingPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
@ -562,6 +1053,170 @@ public:
|
||||
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param fvarChannel face-varying channel
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesFaceVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
int fvarChannel,
|
||||
CpuEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param fvarChannel face-varying channel
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesFaceVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
int fvarChannel,
|
||||
CpuEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
|
@ -169,6 +169,76 @@ CpuEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
CpuEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
float * dst, BufferDescriptor const &dstDesc,
|
||||
float * dstDu, BufferDescriptor const &dstDuDesc,
|
||||
float * dstDv, BufferDescriptor const &dstDvDesc,
|
||||
float * dstDuu, BufferDescriptor const &dstDuuDesc,
|
||||
float * dstDuv, BufferDescriptor const &dstDuvDesc,
|
||||
float * dstDvv, BufferDescriptor const &dstDvvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
float const * duuWeights,
|
||||
float const * duvWeights,
|
||||
float const * dvvWeights,
|
||||
int start, int end) {
|
||||
if (start > 0) {
|
||||
sizes += start;
|
||||
indices += offsets[start];
|
||||
weights += offsets[start];
|
||||
duWeights += offsets[start];
|
||||
dvWeights += offsets[start];
|
||||
duuWeights += offsets[start];
|
||||
duvWeights += offsets[start];
|
||||
dvvWeights += offsets[start];
|
||||
}
|
||||
|
||||
src += srcDesc.offset;
|
||||
dst += dstDesc.offset;
|
||||
dstDu += dstDuDesc.offset;
|
||||
dstDv += dstDvDesc.offset;
|
||||
dstDuu += dstDuuDesc.offset;
|
||||
dstDuv += dstDuvDesc.offset;
|
||||
dstDvv += dstDvvDesc.offset;
|
||||
|
||||
int nOutLength = dstDesc.length + dstDuDesc.length + dstDvDesc.length
|
||||
+ dstDuuDesc.length + dstDuvDesc.length + dstDvvDesc.length;
|
||||
float * result = (float*)alloca(nOutLength * sizeof(float));
|
||||
float * resultDu = result + dstDesc.length;
|
||||
float * resultDv = resultDu + dstDuDesc.length;
|
||||
float * resultDuu = resultDv + dstDuuDesc.length;
|
||||
float * resultDuv = resultDuu + dstDuvDesc.length;
|
||||
float * resultDvv = resultDuv + dstDvvDesc.length;
|
||||
|
||||
int nStencils = end - start;
|
||||
for (int i = 0; i < nStencils; ++i, ++sizes) {
|
||||
|
||||
// clear
|
||||
memset(result, 0, nOutLength * sizeof(float));
|
||||
|
||||
for (int j=0; j<*sizes; ++j) {
|
||||
addWithWeight(result, src, *indices, *weights++, srcDesc);
|
||||
addWithWeight(resultDu, src, *indices, *duWeights++, srcDesc);
|
||||
addWithWeight(resultDv, src, *indices, *dvWeights++, srcDesc);
|
||||
addWithWeight(resultDuu, src, *indices, *duuWeights++, srcDesc);
|
||||
addWithWeight(resultDuv, src, *indices, *duvWeights++, srcDesc);
|
||||
addWithWeight(resultDvv, src, *indices, *dvvWeights++, srcDesc);
|
||||
++indices;
|
||||
}
|
||||
copy(dst, i, result, dstDesc);
|
||||
copy(dstDu, i, resultDu, dstDuDesc);
|
||||
copy(dstDv, i, resultDv, dstDvDesc);
|
||||
copy(dstDuu, i, resultDuu, dstDuuDesc);
|
||||
copy(dstDuv, i, resultDuu, dstDuvDesc);
|
||||
copy(dstDvv, i, resultDvv, dstDvvDesc);
|
||||
}
|
||||
}
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -57,6 +57,25 @@ CpuEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
float const * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
void
|
||||
CpuEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
float * dst, BufferDescriptor const &dstDesc,
|
||||
float * dstDu, BufferDescriptor const &dstDuDesc,
|
||||
float * dstDv, BufferDescriptor const &dstDvDesc,
|
||||
float * dstDuu, BufferDescriptor const &dstDuuDesc,
|
||||
float * dstDuv, BufferDescriptor const &dstDuvDesc,
|
||||
float * dstDvv, BufferDescriptor const &dstDvvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
float const * duuWeights,
|
||||
float const * duvWeights,
|
||||
float const * dvvWeights,
|
||||
int start, int end);
|
||||
|
||||
//
|
||||
// SIMD ICC optimization of the stencil kernel
|
||||
//
|
||||
|
@ -53,9 +53,12 @@ extern "C" {
|
||||
const void *patchParams);
|
||||
|
||||
void CudaEvalPatchesWithDerivatives(
|
||||
const float *src, float *dst, float *du, float *dv,
|
||||
int length,
|
||||
int srcStride, int dstStride, int dvStride, int duStride,
|
||||
const float *src, float *dst,
|
||||
float *du, float *dv,
|
||||
float *duu, float *duv, float *dvv,
|
||||
int length, int srcStride, int dstStride,
|
||||
int duStride, int dvStride,
|
||||
int duuStride, int duvStride, int dvvStride,
|
||||
int numPatchCoords,
|
||||
const void *patchCoords,
|
||||
const void *patchArrays,
|
||||
@ -71,6 +74,10 @@ namespace Osd {
|
||||
|
||||
template <class T> void *
|
||||
createCudaBuffer(std::vector<T> const & src) {
|
||||
if (src.empty()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void * devicePtr = 0;
|
||||
|
||||
size_t size = src.size()*sizeof(T);
|
||||
@ -98,9 +105,11 @@ CudaStencilTable::CudaStencilTable(Far::StencilTable const *stencilTable) {
|
||||
_indices = createCudaBuffer(stencilTable->GetControlIndices());
|
||||
_weights = createCudaBuffer(stencilTable->GetWeights());
|
||||
_duWeights = _dvWeights = NULL;
|
||||
_duuWeights = _duvWeights = _dvvWeights = NULL;
|
||||
} else {
|
||||
_sizes = _offsets = _indices = _weights = NULL;
|
||||
_duWeights = _dvWeights = NULL;
|
||||
_duuWeights = _duvWeights = _dvvWeights = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -113,9 +122,13 @@ CudaStencilTable::CudaStencilTable(Far::LimitStencilTable const *limitStencilTab
|
||||
_weights = createCudaBuffer(limitStencilTable->GetWeights());
|
||||
_duWeights = createCudaBuffer(limitStencilTable->GetDuWeights());
|
||||
_dvWeights = createCudaBuffer(limitStencilTable->GetDvWeights());
|
||||
_duuWeights = createCudaBuffer(limitStencilTable->GetDuuWeights());
|
||||
_duvWeights = createCudaBuffer(limitStencilTable->GetDuvWeights());
|
||||
_dvvWeights = createCudaBuffer(limitStencilTable->GetDvvWeights());
|
||||
} else {
|
||||
_sizes = _offsets = _indices = _weights = NULL;
|
||||
_duWeights = _dvWeights = NULL;
|
||||
_duuWeights = _duvWeights = _dvvWeights = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -126,6 +139,9 @@ CudaStencilTable::~CudaStencilTable() {
|
||||
if (_weights) cudaFree(_weights);
|
||||
if (_duWeights) cudaFree(_duWeights);
|
||||
if (_dvWeights) cudaFree(_dvWeights);
|
||||
if (_duuWeights) cudaFree(_duuWeights);
|
||||
if (_duvWeights) cudaFree(_duvWeights);
|
||||
if (_dvvWeights) cudaFree(_dvvWeights);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -197,6 +213,84 @@ CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
const float * duuWeights,
|
||||
const float * duvWeights,
|
||||
const float * dvvWeights,
|
||||
int start,
|
||||
int end) {
|
||||
// PERFORMANCE: need to combine 3 launches together
|
||||
if (dst) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dst + dstDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
dstDesc.stride,
|
||||
sizes, offsets, indices, weights,
|
||||
start, end);
|
||||
}
|
||||
if (du) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
du + duDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
duDesc.stride,
|
||||
sizes, offsets, indices, duWeights,
|
||||
start, end);
|
||||
}
|
||||
if (dv) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dv + dvDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
dvDesc.stride,
|
||||
sizes, offsets, indices, dvWeights,
|
||||
start, end);
|
||||
}
|
||||
if (duu) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
duu + duuDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
duuDesc.stride,
|
||||
sizes, offsets, indices, duuWeights,
|
||||
start, end);
|
||||
}
|
||||
if (duv) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
duv + duvDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
duvDesc.stride,
|
||||
sizes, offsets, indices, duvWeights,
|
||||
start, end);
|
||||
}
|
||||
if (dvv) {
|
||||
CudaEvalStencils(src + srcDesc.offset,
|
||||
dvv + dvvDesc.offset,
|
||||
srcDesc.length,
|
||||
srcDesc.stride,
|
||||
dvvDesc.stride,
|
||||
sizes, offsets, indices, dvvWeights,
|
||||
start, end);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CudaEvaluator::EvalPatches(const float *src,
|
||||
@ -237,9 +331,42 @@ CudaEvaluator::EvalPatches(
|
||||
if (dv) dv += dvDesc.offset;
|
||||
|
||||
CudaEvalPatchesWithDerivatives(
|
||||
src, dst, du, dv,
|
||||
srcDesc.length, srcDesc.stride,
|
||||
dstDesc.stride, duDesc.stride, dvDesc.stride,
|
||||
src, dst, du, dv, NULL, NULL, NULL,
|
||||
srcDesc.length, srcDesc.stride, dstDesc.stride,
|
||||
duDesc.stride, dvDesc.stride, 0, 0, 0,
|
||||
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
CudaEvaluator::EvalPatches(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndices,
|
||||
const PatchParam *patchParams) {
|
||||
|
||||
if (src) src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
if (duu) duu += duuDesc.offset;
|
||||
if (duv) duv += duvDesc.offset;
|
||||
if (dvv) dvv += dvvDesc.offset;
|
||||
|
||||
CudaEvalPatchesWithDerivatives(
|
||||
src, dst, du, dv, duu, duv, dvv,
|
||||
srcDesc.length, srcDesc.stride, dstDesc.stride,
|
||||
duDesc.stride, dvDesc.stride,
|
||||
duuDesc.stride, duvDesc.stride, dvvDesc.stride,
|
||||
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
|
||||
return true;
|
||||
}
|
||||
|
@ -73,6 +73,9 @@ public:
|
||||
void *GetWeightsBuffer() const { return _weights; }
|
||||
void *GetDuWeightsBuffer() const { return _duWeights; }
|
||||
void *GetDvWeightsBuffer() const { return _dvWeights; }
|
||||
void *GetDuuWeightsBuffer() const { return _duuWeights; }
|
||||
void *GetDuvWeightsBuffer() const { return _duvWeights; }
|
||||
void *GetDvvWeightsBuffer() const { return _dvvWeights; }
|
||||
int GetNumStencils() const { return _numStencils; }
|
||||
|
||||
private:
|
||||
@ -81,7 +84,10 @@ private:
|
||||
* _indices,
|
||||
* _weights,
|
||||
* _duWeights,
|
||||
* _dvWeights;
|
||||
* _dvWeights,
|
||||
* _duuWeights,
|
||||
* _duvWeights,
|
||||
* _dvvWeights;
|
||||
int _numStencils;
|
||||
};
|
||||
|
||||
@ -288,6 +294,177 @@ public:
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param stencilTable stencil table to be applied.
|
||||
///
|
||||
/// @param instance not used in the cuda kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the cuda kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const CudaEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
duBuffer->BindCudaBuffer(), duDesc,
|
||||
dvBuffer->BindCudaBuffer(), dvDesc,
|
||||
duuBuffer->BindCudaBuffer(), duuDesc,
|
||||
duvBuffer->BindCudaBuffer(), duvDesc,
|
||||
dvvBuffer->BindCudaBuffer(), dvvDesc,
|
||||
(int const *)stencilTable->GetSizesBuffer(),
|
||||
(int const *)stencilTable->GetOffsetsBuffer(),
|
||||
(int const *)stencilTable->GetIndicesBuffer(),
|
||||
(float const *)stencilTable->GetWeightsBuffer(),
|
||||
(float const *)stencilTable->GetDuWeightsBuffer(),
|
||||
(float const *)stencilTable->GetDvWeightsBuffer(),
|
||||
(float const *)stencilTable->GetDuuWeightsBuffer(),
|
||||
(float const *)stencilTable->GetDuvWeightsBuffer(),
|
||||
(float const *)stencilTable->GetDvvWeightsBuffer(),
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw cuda pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output pointer derivative wrt u. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dv Output pointer derivative wrt v. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duu Output pointer 2nd derivative wrt u. An offset of
|
||||
/// duuDesc will be applied internally.
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
|
||||
/// duvDesc will be applied internally.
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
|
||||
/// dvvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param duuWeights pointer to the duu-weights buffer of the stencil table
|
||||
///
|
||||
/// @param duvWeights pointer to the duv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvvWeights pointer to the dvv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
const float * duuWeights,
|
||||
const float * duvWeights,
|
||||
const float * dvvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
@ -396,8 +573,8 @@ public:
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
@ -410,6 +587,95 @@ public:
|
||||
(const PatchParam *)patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CudaPatchTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cuda evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cuda evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
duBuffer->BindCudaBuffer(), duDesc,
|
||||
dvBuffer->BindCudaBuffer(), dvDesc,
|
||||
duuBuffer->BindCudaBuffer(), duuDesc,
|
||||
duvBuffer->BindCudaBuffer(), duvDesc,
|
||||
dvvBuffer->BindCudaBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord *)patchCoords->BindCudaBuffer(),
|
||||
(const PatchArray *)patchTable->GetPatchArrayBuffer(),
|
||||
(const int *)patchTable->GetPatchIndexBuffer(),
|
||||
(const PatchParam *)patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
@ -489,10 +755,76 @@ public:
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndices,
|
||||
const PatchParam *patchParams);
|
||||
PatchParam const *patchParams);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output pointer derivative wrt u. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dv Output pointer derivative wrt v. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duu Output pointer 2nd derivative wrt u. An offset of
|
||||
/// duuDesc will be applied internally.
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
|
||||
/// duvDesc will be applied internally.
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
|
||||
/// dvvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndices an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParams an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndices,
|
||||
PatchParam const *patchParams);
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
@ -533,8 +865,8 @@ public:
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
@ -545,6 +877,164 @@ public:
|
||||
(const PatchParam *)patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCudaBuffer() method returning an
|
||||
/// array of PatchCoord struct in cuda memory.
|
||||
///
|
||||
/// @param patchTable CudaPatchTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cuda evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cuda evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
duBuffer->BindCudaBuffer(), duDesc,
|
||||
dvBuffer->BindCudaBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord *)patchCoords->BindCudaBuffer(),
|
||||
(const PatchArray *)patchTable->GetVaryingPatchArrayBuffer(),
|
||||
(const int *)patchTable->GetVaryingPatchIndexBuffer(),
|
||||
(const PatchParam *)patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCudaBuffer() method returning an
|
||||
/// array of PatchCoord struct in cuda memory.
|
||||
///
|
||||
/// @param patchTable CudaPatchTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cuda evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cuda evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
duBuffer->BindCudaBuffer(), duDesc,
|
||||
dvBuffer->BindCudaBuffer(), dvDesc,
|
||||
duuBuffer->BindCudaBuffer(), duuDesc,
|
||||
duvBuffer->BindCudaBuffer(), duvDesc,
|
||||
dvvBuffer->BindCudaBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord *)patchCoords->BindCudaBuffer(),
|
||||
(const PatchArray *)patchTable->GetVaryingPatchArrayBuffer(),
|
||||
(const int *)patchTable->GetVaryingPatchIndexBuffer(),
|
||||
(const PatchParam *)patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
@ -587,11 +1077,175 @@ public:
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord *)patchCoords->BindCudaBuffer(),
|
||||
(const PatchArray *)patchTable->GetFVarPatchArrayBuffer(fvarChannel),
|
||||
(const int *)patchTable->GetFVarPatchIndexBuffer(fvarChannel),
|
||||
(const PatchParam *)patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCudaBuffer() method returning an
|
||||
/// array of PatchCoord struct in cuda memory.
|
||||
///
|
||||
/// @param patchTable CudaPatchTable or equivalent
|
||||
///
|
||||
/// @param fvarChannel face-varying channel
|
||||
///
|
||||
/// @param instance not used in the cuda evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cuda evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesFaceVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
int fvarChannel,
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
duBuffer->BindCudaBuffer(), duDesc,
|
||||
dvBuffer->BindCudaBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord *)patchCoords->BindCudaBuffer(),
|
||||
(const PatchArray *)patchTable->GetFVarPatchArrayBuffer(fvarChannel),
|
||||
(const int *)patchTable->GetFVarPatchIndexBuffer(fvarChannel),
|
||||
(const PatchParam *)patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCudaBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
/// must have BindCudaBuffer() method returning an
|
||||
/// array of PatchCoord struct in cuda memory.
|
||||
///
|
||||
/// @param patchTable CudaPatchTable or equivalent
|
||||
///
|
||||
/// @param fvarChannel face-varying channel
|
||||
///
|
||||
/// @param instance not used in the cuda evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cuda evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesFaceVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
int fvarChannel,
|
||||
CudaEvaluator const *instance,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCudaBuffer(), srcDesc,
|
||||
dstBuffer->BindCudaBuffer(), dstDesc,
|
||||
duBuffer->BindCudaBuffer(), duDesc,
|
||||
dvBuffer->BindCudaBuffer(), dvDesc,
|
||||
duuBuffer->BindCudaBuffer(), duuDesc,
|
||||
duvBuffer->BindCudaBuffer(), duvDesc,
|
||||
dvvBuffer->BindCudaBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord *)patchCoords->BindCudaBuffer(),
|
||||
(const PatchArray *)patchTable->GetFVarPatchArrayBuffer(fvarChannel),
|
||||
|
@ -305,8 +305,12 @@ int getNumControlVertices(int patchType) {
|
||||
}
|
||||
|
||||
__global__ void
|
||||
computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
|
||||
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
|
||||
computePatches(const float *src, float *dst,
|
||||
float *dstDu, float *dstDv,
|
||||
float *dstDuu, float *dstDuv, float *dstDvv,
|
||||
int length, int srcStride, int dstStride,
|
||||
int dstDuStride, int dstDvStride,
|
||||
int dstDuuStride, int dstDuvStride, int dstDvvStride,
|
||||
int numPatchCoords, const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
@ -376,6 +380,30 @@ computePatches(const float *src, float *dst, float *dstDu, float *dstDv,
|
||||
addWithWeight(d, srcVert, wDt[j], length);
|
||||
}
|
||||
}
|
||||
if (dstDuu) {
|
||||
float *d = dstDuu + i * dstDuuStride;
|
||||
clear(d, length);
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
const float * srcVert = src + cvs[j] * srcStride;
|
||||
addWithWeight(d, srcVert, wDss[j], length);
|
||||
}
|
||||
}
|
||||
if (dstDuv) {
|
||||
float *d = dstDuv + i * dstDuvStride;
|
||||
clear(d, length);
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
const float * srcVert = src + cvs[j] * srcStride;
|
||||
addWithWeight(d, srcVert, wDst[j], length);
|
||||
}
|
||||
}
|
||||
if (dstDvv) {
|
||||
float *d = dstDvv + i * dstDvvStride;
|
||||
clear(d, length);
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
const float * srcVert = src + cvs[j] * srcStride;
|
||||
addWithWeight(d, srcVert, wDtt[j], length);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -447,14 +475,19 @@ void CudaEvalPatches(
|
||||
// PERFORMANCE: not optimized at all
|
||||
|
||||
computePatches <<<512, 32>>>(
|
||||
src, dst, NULL, NULL, length, srcStride, dstStride, 0, 0,
|
||||
src, dst, NULL, NULL, NULL, NULL, NULL,
|
||||
length, srcStride, dstStride, 0, 0, 0, 0, 0,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
}
|
||||
|
||||
void CudaEvalPatchesWithDerivatives(
|
||||
const float *src, float *dst, float *dstDu, float *dstDv,
|
||||
int length, int srcStride, int dstStride, int dstDuStride, int dstDvStride,
|
||||
const float *src, float *dst,
|
||||
float *dstDu, float *dstDv,
|
||||
float *dstDuu, float *dstDuv, float *dstDvv,
|
||||
int length, int srcStride, int dstStride,
|
||||
int dstDuStride, int dstDvStride,
|
||||
int dstDuuStride, int dstDuvStride, int dstDvvStride,
|
||||
int numPatchCoords, const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
@ -463,7 +496,9 @@ void CudaEvalPatchesWithDerivatives(
|
||||
// PERFORMANCE: not optimized at all
|
||||
|
||||
computePatches <<<512, 32>>>(
|
||||
src, dst, dstDu, dstDv, length, srcStride, dstStride, dstDuStride, dstDvStride,
|
||||
src, dst, dstDu, dstDv, dstDuu, dstDuv, dstDvv,
|
||||
length, srcStride, dstStride,
|
||||
dstDuStride, dstDvStride, dstDuuStride, dstDuvStride, dstDvvStride,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
}
|
||||
|
@ -172,6 +172,22 @@ D3D11ComputeEvaluator::Create(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc,
|
||||
ID3D11DeviceContext *deviceContext) {
|
||||
return Create(srcDesc, dstDesc, duDesc, dvDesc,
|
||||
BufferDescriptor(),
|
||||
BufferDescriptor(),
|
||||
BufferDescriptor(),
|
||||
deviceContext);
|
||||
}
|
||||
|
||||
D3D11ComputeEvaluator *
|
||||
D3D11ComputeEvaluator::Create(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc,
|
||||
ID3D11DeviceContext *deviceContext) {
|
||||
(void)deviceContext; // not used
|
||||
|
||||
// TODO: implements derivatives
|
||||
|
@ -102,6 +102,15 @@ public:
|
||||
BufferDescriptor const &dvDesc,
|
||||
ID3D11DeviceContext *deviceContext);
|
||||
|
||||
static D3D11ComputeEvaluator * Create(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc,
|
||||
ID3D11DeviceContext *deviceContext);
|
||||
|
||||
/// Constructor.
|
||||
D3D11ComputeEvaluator();
|
||||
|
||||
|
@ -44,6 +44,10 @@ static const char *shaderSource =
|
||||
|
||||
template <class T> GLuint
|
||||
createSSBO(std::vector<T> const & src) {
|
||||
if (src.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLuint devicePtr = 0;
|
||||
glGenBuffers(1, &devicePtr);
|
||||
|
||||
@ -75,9 +79,11 @@ GLStencilTableSSBO::GLStencilTableSSBO(
|
||||
_indices = createSSBO(stencilTable->GetControlIndices());
|
||||
_weights = createSSBO(stencilTable->GetWeights());
|
||||
_duWeights = _dvWeights = 0;
|
||||
_duuWeights = _duvWeights = _dvvWeights = 0;
|
||||
} else {
|
||||
_sizes = _offsets = _indices = _weights = 0;
|
||||
_duWeights = _dvWeights = 0;
|
||||
_duuWeights = _duvWeights = _dvvWeights = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -91,9 +97,13 @@ GLStencilTableSSBO::GLStencilTableSSBO(
|
||||
_weights = createSSBO(limitStencilTable->GetWeights());
|
||||
_duWeights = createSSBO(limitStencilTable->GetDuWeights());
|
||||
_dvWeights = createSSBO(limitStencilTable->GetDvWeights());
|
||||
_duuWeights = createSSBO(limitStencilTable->GetDuuWeights());
|
||||
_duvWeights = createSSBO(limitStencilTable->GetDuvWeights());
|
||||
_dvvWeights = createSSBO(limitStencilTable->GetDvvWeights());
|
||||
} else {
|
||||
_sizes = _offsets = _indices = _weights = 0;
|
||||
_duWeights = _dvWeights = 0;
|
||||
_duuWeights = _duvWeights = _dvvWeights = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -104,6 +114,9 @@ GLStencilTableSSBO::~GLStencilTableSSBO() {
|
||||
if (_weights) glDeleteBuffers(1, &_weights);
|
||||
if (_duWeights) glDeleteBuffers(1, &_duWeights);
|
||||
if (_dvWeights) glDeleteBuffers(1, &_dvWeights);
|
||||
if (_duuWeights) glDeleteBuffers(1, &_duuWeights);
|
||||
if (_duvWeights) glDeleteBuffers(1, &_duvWeights);
|
||||
if (_dvvWeights) glDeleteBuffers(1, &_dvvWeights);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -120,8 +133,11 @@ GLComputeEvaluator::~GLComputeEvaluator() {
|
||||
static GLuint
|
||||
compileKernel(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const & /* duDesc */,
|
||||
BufferDescriptor const & /* dvDesc */,
|
||||
BufferDescriptor const & duDesc,
|
||||
BufferDescriptor const & dvDesc,
|
||||
BufferDescriptor const & duuDesc,
|
||||
BufferDescriptor const & duvDesc,
|
||||
BufferDescriptor const & dvvDesc,
|
||||
const char *kernelDefine,
|
||||
int workGroupSize) {
|
||||
GLuint program = glCreateProgram();
|
||||
@ -139,6 +155,16 @@ compileKernel(BufferDescriptor const &srcDesc,
|
||||
<< "#define WORK_GROUP_SIZE " << workGroupSize << "\n"
|
||||
<< kernelDefine << "\n"
|
||||
<< patchBasisShaderSourceDefine << "\n";
|
||||
|
||||
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
|
||||
if (deriv1) {
|
||||
defines << "#define OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES\n";
|
||||
}
|
||||
if (deriv2) {
|
||||
defines << "#define OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES\n";
|
||||
}
|
||||
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *shaderSources[4] = {"#version 430\n", 0, 0, 0};
|
||||
@ -175,16 +201,23 @@ bool
|
||||
GLComputeEvaluator::Compile(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc) {
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc) {
|
||||
|
||||
// create a stencil kernel
|
||||
if (!_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc,
|
||||
if (!_stencilKernel.Compile(srcDesc, dstDesc,
|
||||
duDesc, dvDesc,
|
||||
duuDesc, duvDesc, dvvDesc,
|
||||
_workGroupSize)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// create a patch kernel
|
||||
if (!_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc,
|
||||
if (!_patchKernel.Compile(srcDesc, dstDesc,
|
||||
duDesc, dvDesc,
|
||||
duuDesc, duvDesc, dvvDesc,
|
||||
_workGroupSize)) {
|
||||
return false;
|
||||
}
|
||||
@ -214,6 +247,40 @@ GLComputeEvaluator::EvalStencils(
|
||||
GLuint dvWeightsBuffer,
|
||||
int start, int end) const {
|
||||
|
||||
return EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
0, BufferDescriptor(),
|
||||
0, BufferDescriptor(),
|
||||
0, BufferDescriptor(),
|
||||
sizesBuffer, offsetsBuffer, indicesBuffer,
|
||||
weightsBuffer,
|
||||
duWeightsBuffer, dvWeightsBuffer,
|
||||
0, 0, 0,
|
||||
start, end);
|
||||
}
|
||||
|
||||
bool
|
||||
GLComputeEvaluator::EvalStencils(
|
||||
GLuint srcBuffer, BufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, BufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, BufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, BufferDescriptor const &dvDesc,
|
||||
GLuint duuBuffer, BufferDescriptor const &duuDesc,
|
||||
GLuint duvBuffer, BufferDescriptor const &duvDesc,
|
||||
GLuint dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
GLuint sizesBuffer,
|
||||
GLuint offsetsBuffer,
|
||||
GLuint indicesBuffer,
|
||||
GLuint weightsBuffer,
|
||||
GLuint duWeightsBuffer,
|
||||
GLuint dvWeightsBuffer,
|
||||
GLuint duuWeightsBuffer,
|
||||
GLuint duvWeightsBuffer,
|
||||
GLuint dvvWeightsBuffer,
|
||||
int start, int end) const {
|
||||
|
||||
if (!_stencilKernel.program) return false;
|
||||
int count = end - start;
|
||||
if (count <= 0) {
|
||||
@ -224,6 +291,9 @@ GLComputeEvaluator::EvalStencils(
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, sizesBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, offsetsBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, indicesBuffer);
|
||||
@ -232,6 +302,12 @@ GLComputeEvaluator::EvalStencils(
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 8, duWeightsBuffer);
|
||||
if (dvWeightsBuffer)
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 9, dvWeightsBuffer);
|
||||
if (duuWeightsBuffer)
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 13, duuWeightsBuffer);
|
||||
if (duvWeightsBuffer)
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 14, duvWeightsBuffer);
|
||||
if (dvvWeightsBuffer)
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 15, dvvWeightsBuffer);
|
||||
|
||||
glUseProgram(_stencilKernel.program);
|
||||
|
||||
@ -247,13 +323,25 @@ GLComputeEvaluator::EvalStencils(
|
||||
glUniform3i(_stencilKernel.uniformDvDesc,
|
||||
dvDesc.offset, dvDesc.length, dvDesc.stride);
|
||||
}
|
||||
if (_stencilKernel.uniformDuuDesc > 0) {
|
||||
glUniform3i(_stencilKernel.uniformDuuDesc,
|
||||
duuDesc.offset, duuDesc.length, duuDesc.stride);
|
||||
}
|
||||
if (_stencilKernel.uniformDuvDesc > 0) {
|
||||
glUniform3i(_stencilKernel.uniformDuvDesc,
|
||||
duvDesc.offset, duvDesc.length, duvDesc.stride);
|
||||
}
|
||||
if (_stencilKernel.uniformDvvDesc > 0) {
|
||||
glUniform3i(_stencilKernel.uniformDvvDesc,
|
||||
dvvDesc.offset, dvvDesc.length, dvvDesc.stride);
|
||||
}
|
||||
|
||||
glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1);
|
||||
|
||||
glUseProgram(0);
|
||||
|
||||
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, 0);
|
||||
}
|
||||
|
||||
@ -272,12 +360,44 @@ GLComputeEvaluator::EvalPatches(
|
||||
GLuint patchIndexBuffer,
|
||||
GLuint patchParamsBuffer) const {
|
||||
|
||||
return EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
0, BufferDescriptor(),
|
||||
0, BufferDescriptor(),
|
||||
0, BufferDescriptor(),
|
||||
numPatchCoords,
|
||||
patchCoordsBuffer,
|
||||
patchArrays,
|
||||
patchIndexBuffer,
|
||||
patchParamsBuffer);
|
||||
}
|
||||
|
||||
bool
|
||||
GLComputeEvaluator::EvalPatches(
|
||||
GLuint srcBuffer, BufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, BufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, BufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, BufferDescriptor const &dvDesc,
|
||||
GLuint duuBuffer, BufferDescriptor const &duuDesc,
|
||||
GLuint duvBuffer, BufferDescriptor const &duvDesc,
|
||||
GLuint dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
GLuint patchCoordsBuffer,
|
||||
const PatchArrayVector &patchArrays,
|
||||
GLuint patchIndexBuffer,
|
||||
GLuint patchParamsBuffer) const {
|
||||
|
||||
if (!_patchKernel.program) return false;
|
||||
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, duBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, dvBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, duuBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, duvBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, dvvBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, patchCoordsBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, patchIndexBuffer);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, patchParamsBuffer);
|
||||
@ -288,8 +408,27 @@ GLComputeEvaluator::EvalPatches(
|
||||
glUniform1i(_patchKernel.uniformDstOffset, dstDesc.offset);
|
||||
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
|
||||
(const GLint*)&patchArrays[0]);
|
||||
glUniform3i(_patchKernel.uniformDuDesc, duDesc.offset, duDesc.length, duDesc.stride);
|
||||
glUniform3i(_patchKernel.uniformDvDesc, dvDesc.offset, dvDesc.length, dvDesc.stride);
|
||||
|
||||
if (_patchKernel.uniformDuDesc > 0) {
|
||||
glUniform3i(_patchKernel.uniformDuDesc,
|
||||
duDesc.offset, duDesc.length, duDesc.stride);
|
||||
}
|
||||
if (_patchKernel.uniformDvDesc > 0) {
|
||||
glUniform3i(_patchKernel.uniformDvDesc,
|
||||
dvDesc.offset, dvDesc.length, dvDesc.stride);
|
||||
}
|
||||
if (_patchKernel.uniformDuuDesc > 0) {
|
||||
glUniform3i(_patchKernel.uniformDuuDesc,
|
||||
duuDesc.offset, duuDesc.length, duuDesc.stride);
|
||||
}
|
||||
if (_patchKernel.uniformDuvDesc > 0) {
|
||||
glUniform3i(_patchKernel.uniformDuvDesc,
|
||||
duvDesc.offset, duvDesc.length, duvDesc.stride);
|
||||
}
|
||||
if (_patchKernel.uniformDvvDesc > 0) {
|
||||
glUniform3i(_patchKernel.uniformDvvDesc,
|
||||
dvvDesc.offset, dvvDesc.length, dvvDesc.stride);
|
||||
}
|
||||
|
||||
glDispatchCompute((numPatchCoords + _workGroupSize - 1) / _workGroupSize, 1, 1);
|
||||
|
||||
@ -303,6 +442,10 @@ GLComputeEvaluator::EvalPatches(
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 6, 0);
|
||||
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, 0);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -320,20 +463,21 @@ GLComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc,
|
||||
int workGroupSize) {
|
||||
// create stencil kernel
|
||||
if (program) {
|
||||
glDeleteProgram(program);
|
||||
}
|
||||
|
||||
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
const char *kernelDef = derivatives
|
||||
? "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n"
|
||||
"#define OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES\n"
|
||||
: "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n";
|
||||
const char * kernelDefine =
|
||||
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS\n";
|
||||
|
||||
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, kernelDef,
|
||||
workGroupSize);
|
||||
program = compileKernel(srcDesc, dstDesc,
|
||||
duDesc, dvDesc, duuDesc, duvDesc, dvvDesc,
|
||||
kernelDefine, workGroupSize);
|
||||
if (program == 0) return false;
|
||||
|
||||
// cache uniform locations (TODO: use uniform block)
|
||||
@ -343,6 +487,9 @@ GLComputeEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
|
||||
uniformDstOffset = glGetUniformLocation(program, "dstOffset");
|
||||
uniformDuDesc = glGetUniformLocation(program, "duDesc");
|
||||
uniformDvDesc = glGetUniformLocation(program, "dvDesc");
|
||||
uniformDuuDesc = glGetUniformLocation(program, "duuDesc");
|
||||
uniformDuvDesc = glGetUniformLocation(program, "duvDesc");
|
||||
uniformDvvDesc = glGetUniformLocation(program, "dvvDesc");
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -362,20 +509,21 @@ GLComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc,
|
||||
int workGroupSize) {
|
||||
// create stencil kernel
|
||||
if (program) {
|
||||
glDeleteProgram(program);
|
||||
}
|
||||
|
||||
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
const char *kernelDef = derivatives
|
||||
? "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n"
|
||||
"#define OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES\n"
|
||||
: "#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n";
|
||||
const char * kernelDefine =
|
||||
"#define OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_PATCHES\n";
|
||||
|
||||
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, kernelDef,
|
||||
workGroupSize);
|
||||
program = compileKernel(srcDesc, dstDesc,
|
||||
duDesc, dvDesc, duuDesc, duvDesc, dvvDesc,
|
||||
kernelDefine, workGroupSize);
|
||||
if (program == 0) return false;
|
||||
|
||||
// cache uniform locations
|
||||
@ -384,6 +532,9 @@ GLComputeEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
|
||||
uniformPatchArray = glGetUniformLocation(program, "patchArray");
|
||||
uniformDuDesc = glGetUniformLocation(program, "duDesc");
|
||||
uniformDvDesc = glGetUniformLocation(program, "dvDesc");
|
||||
uniformDuuDesc = glGetUniformLocation(program, "duuDesc");
|
||||
uniformDuvDesc = glGetUniformLocation(program, "duvDesc");
|
||||
uniformDvvDesc = glGetUniformLocation(program, "dvvDesc");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -48,6 +48,10 @@ static const char *shaderSource =
|
||||
|
||||
template <class T> GLuint
|
||||
createGLTextureBuffer(std::vector<T> const & src, GLenum type) {
|
||||
if (src.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLint size = static_cast<int>(src.size()*sizeof(T));
|
||||
void const * ptr = &src.at(0);
|
||||
|
||||
@ -95,9 +99,11 @@ GLStencilTableTBO::GLStencilTableTBO(
|
||||
stencilTable->GetControlIndices(), GL_R32I);
|
||||
_weights = createGLTextureBuffer(stencilTable->GetWeights(), GL_R32F);
|
||||
_duWeights = _dvWeights = 0;
|
||||
_duuWeights = _duvWeights = _dvvWeights = 0;
|
||||
} else {
|
||||
_sizes = _offsets = _indices = _weights = 0;
|
||||
_duWeights = _dvWeights = 0;
|
||||
_duuWeights = _duvWeights = _dvvWeights = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -118,9 +124,16 @@ GLStencilTableTBO::GLStencilTableTBO(
|
||||
limitStencilTable->GetDuWeights(), GL_R32F);
|
||||
_dvWeights = createGLTextureBuffer(
|
||||
limitStencilTable->GetDvWeights(), GL_R32F);
|
||||
_duuWeights = createGLTextureBuffer(
|
||||
limitStencilTable->GetDuuWeights(), GL_R32F);
|
||||
_duvWeights = createGLTextureBuffer(
|
||||
limitStencilTable->GetDuvWeights(), GL_R32F);
|
||||
_dvvWeights = createGLTextureBuffer(
|
||||
limitStencilTable->GetDvvWeights(), GL_R32F);
|
||||
} else {
|
||||
_sizes = _offsets = _indices = _weights = 0;
|
||||
_duWeights = _dvWeights = 0;
|
||||
_duuWeights = _duvWeights = _dvvWeights = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,11 +144,16 @@ GLStencilTableTBO::~GLStencilTableTBO() {
|
||||
if (_weights) glDeleteTextures(1, &_weights);
|
||||
if (_duWeights) glDeleteTextures(1, &_duWeights);
|
||||
if (_dvWeights) glDeleteTextures(1, &_dvWeights);
|
||||
if (_duuWeights) glDeleteTextures(1, &_duuWeights);
|
||||
if (_duvWeights) glDeleteTextures(1, &_duvWeights);
|
||||
if (_dvvWeights) glDeleteTextures(1, &_dvvWeights);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
GLXFBEvaluator::GLXFBEvaluator() : _srcBufferTexture(0) {
|
||||
GLXFBEvaluator::GLXFBEvaluator(bool sharedDerivativeBuffers)
|
||||
: _srcBufferTexture(0),
|
||||
_sharedDerivativeBuffers(sharedDerivativeBuffers) {
|
||||
}
|
||||
|
||||
GLXFBEvaluator::~GLXFBEvaluator() {
|
||||
@ -149,7 +167,11 @@ compileKernel(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc,
|
||||
const char *kernelDefine) {
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc,
|
||||
const char *kernelDefine,
|
||||
bool sharedDerivativeBuffers) {
|
||||
|
||||
GLuint program = glCreateProgram();
|
||||
|
||||
@ -165,8 +187,25 @@ compileKernel(BufferDescriptor const &srcDesc,
|
||||
<< "#define VERTEX_SHADER\n"
|
||||
<< kernelDefine << "\n"
|
||||
<< patchBasisShaderSourceDefine << "\n";
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
|
||||
if (deriv1) {
|
||||
defines << "#define OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES\n";
|
||||
if (sharedDerivativeBuffers) {
|
||||
defines <<
|
||||
"#define OPENSUBDIV_GLSL_XFB_SHARED_1ST_DERIVATIVE_BUFFERS\n";
|
||||
}
|
||||
}
|
||||
if (deriv2) {
|
||||
defines << "#define OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES\n";
|
||||
if (sharedDerivativeBuffers) {
|
||||
defines <<
|
||||
"#define OPENSUBDIV_GLSL_XFB_SHARED_2ND_DERIVATIVE_BUFFERS\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::string defineStr = defines.str();
|
||||
|
||||
const char *shaderSources[4] = {"#version 410\n", NULL, NULL, NULL};
|
||||
|
||||
@ -204,41 +243,148 @@ compileKernel(BufferDescriptor const &srcDesc,
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
}
|
||||
if (duDesc.length) {
|
||||
//
|
||||
// For derivatives, we use another buffer bindings so gl_NextBuffer
|
||||
// is inserted here to switch the destination of transform feedback.
|
||||
//
|
||||
// Note that the destination buffers may or may not be shared between
|
||||
// vertex and each derivatives. gl_NextBuffer seems still works well
|
||||
// in either case.
|
||||
//
|
||||
//
|
||||
// For derivatives, we use another buffer bindings so gl_NextBuffer
|
||||
// is inserted here to switch the destination of transform feedback.
|
||||
//
|
||||
// Note that the destination buffers may or may not be shared between
|
||||
// vertex and each derivatives. gl_NextBuffer seems still works well
|
||||
// in either case.
|
||||
//
|
||||
// If we know that the buffers for derivatives are shared, then we
|
||||
// can use fewer buffer bindings. This can be important, since most GL
|
||||
// implementations will support only up to 4 transform feedback bindings.
|
||||
//
|
||||
if (deriv1 && sharedDerivativeBuffers) {
|
||||
outputs.push_back("gl_NextBuffer");
|
||||
int primvarOffset = (duDesc.offset % duDesc.stride);
|
||||
for (int i = 0; i < primvarOffset; ++i) {
|
||||
|
||||
int primvar1Offset = (duDesc.offset % duDesc.stride);
|
||||
int primvar2Offset = (dvDesc.offset % dvDesc.stride);
|
||||
|
||||
for (int i = 0; i < primvar1Offset; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < duDesc.length; ++i) {
|
||||
snprintf(attrName, sizeof(attrName), "outDuBuffer[%d]", i);
|
||||
snprintf(attrName, sizeof(attrName), "outDeriv1Buffer[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
for (int i = primvarOffset + duDesc.length; i < duDesc.stride; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
}
|
||||
if (dvDesc.length) {
|
||||
outputs.push_back("gl_NextBuffer");
|
||||
int primvarOffset = (dvDesc.offset % dvDesc.stride);
|
||||
for (int i = 0; i < primvarOffset; ++i) {
|
||||
for (int i = primvar1Offset + duDesc.length; i < primvar2Offset; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < dvDesc.length; ++i) {
|
||||
snprintf(attrName, sizeof(attrName), "outDvBuffer[%d]", i);
|
||||
snprintf(attrName, sizeof(attrName), "outDeriv1Buffer[%d]", i+duDesc.length);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
for (int i = primvarOffset + dvDesc.length; i < dvDesc.stride; ++i) {
|
||||
for (int i = primvar2Offset + dvDesc.length; i < dvDesc.stride; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
} else {
|
||||
if (duDesc.length) {
|
||||
outputs.push_back("gl_NextBuffer");
|
||||
int primvarOffset = (duDesc.offset % duDesc.stride);
|
||||
for (int i = 0; i < primvarOffset; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < duDesc.length; ++i) {
|
||||
snprintf(attrName, sizeof(attrName), "outDuBuffer[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
for (int i = primvarOffset + duDesc.length; i < duDesc.stride; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
}
|
||||
if (dvDesc.length) {
|
||||
outputs.push_back("gl_NextBuffer");
|
||||
int primvarOffset = (dvDesc.offset % dvDesc.stride);
|
||||
for (int i = 0; i < primvarOffset; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < dvDesc.length; ++i) {
|
||||
snprintf(attrName, sizeof(attrName), "outDvBuffer[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
for (int i = primvarOffset + dvDesc.length; i < dvDesc.stride; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (deriv2 && sharedDerivativeBuffers) {
|
||||
outputs.push_back("gl_NextBuffer");
|
||||
|
||||
int primvar1Offset = (duuDesc.offset % duuDesc.stride);
|
||||
int primvar2Offset = (duvDesc.offset % duvDesc.stride);
|
||||
int primvar3Offset = (dvvDesc.offset % dvvDesc.stride);
|
||||
|
||||
for (int i = 0; i < primvar1Offset; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < duuDesc.length; ++i) {
|
||||
snprintf(attrName, sizeof(attrName), "outDeriv2Buffer[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
|
||||
for (int i = primvar1Offset + duuDesc.length; i < primvar2Offset; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < duvDesc.length; ++i) {
|
||||
snprintf(attrName, sizeof(attrName), "outDeriv2Buffer[%d]", i+duuDesc.length);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
|
||||
for (int i = primvar2Offset + duvDesc.length; i < primvar3Offset; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < dvvDesc.length; ++i) {
|
||||
snprintf(attrName, sizeof(attrName), "outDeriv2Buffer[%d]", i+duuDesc.length+duvDesc.length);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
|
||||
for (int i = primvar3Offset + dvvDesc.length; i < dvvDesc.stride; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
} else {
|
||||
if (duuDesc.length) {
|
||||
outputs.push_back("gl_NextBuffer");
|
||||
int primvarOffset = (duuDesc.offset % duuDesc.stride);
|
||||
for (int i = 0; i < primvarOffset; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < duuDesc.length; ++i) {
|
||||
snprintf(attrName, sizeof(attrName), "outDuuBuffer[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
for (int i = primvarOffset + duuDesc.length; i < duuDesc.stride; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
}
|
||||
if (duvDesc.length) {
|
||||
outputs.push_back("gl_NextBuffer");
|
||||
int primvarOffset = (duvDesc.offset % duvDesc.stride);
|
||||
for (int i = 0; i < primvarOffset; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < duvDesc.length; ++i) {
|
||||
snprintf(attrName, sizeof(attrName), "outDuvBuffer[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
for (int i = primvarOffset + duvDesc.length; i < duvDesc.stride; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
}
|
||||
if (dvvDesc.length) {
|
||||
outputs.push_back("gl_NextBuffer");
|
||||
int primvarOffset = (dvvDesc.offset % dvvDesc.stride);
|
||||
for (int i = 0; i < primvarOffset; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
for (int i = 0; i < dvvDesc.length; ++i) {
|
||||
snprintf(attrName, sizeof(attrName), "outDvvBuffer[%d]", i);
|
||||
outputs.push_back(attrName);
|
||||
}
|
||||
for (int i = primvarOffset + dvvDesc.length; i < dvvDesc.stride; ++i) {
|
||||
outputs.push_back("gl_SkipComponents1");
|
||||
}
|
||||
}
|
||||
}
|
||||
// convert to char* array
|
||||
std::vector<const char *> pOutputs;
|
||||
@ -274,13 +420,20 @@ bool
|
||||
GLXFBEvaluator::Compile(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc) {
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc) {
|
||||
|
||||
// create a stencil kernel
|
||||
_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc);
|
||||
_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc,
|
||||
duuDesc, duvDesc, dvvDesc,
|
||||
_sharedDerivativeBuffers);
|
||||
|
||||
// create a patch kernel
|
||||
_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc);
|
||||
_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc,
|
||||
duuDesc, duvDesc, dvvDesc,
|
||||
_sharedDerivativeBuffers);
|
||||
|
||||
// create a texture for input buffer
|
||||
if (!_srcBufferTexture) {
|
||||
@ -314,12 +467,46 @@ GLXFBEvaluator::EvalStencils(
|
||||
GLuint dstBuffer, BufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, BufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, BufferDescriptor const &dvDesc,
|
||||
GLuint sizesBuffer,
|
||||
GLuint offsetsBuffer,
|
||||
GLuint indicesBuffer,
|
||||
GLuint weightsBuffer,
|
||||
GLuint duWeightsBuffer,
|
||||
GLuint dvWeightsBuffer,
|
||||
int start, int end) const {
|
||||
|
||||
return EvalStencils(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
0, BufferDescriptor(),
|
||||
0, BufferDescriptor(),
|
||||
0, BufferDescriptor(),
|
||||
sizesBuffer, offsetsBuffer, indicesBuffer,
|
||||
weightsBuffer,
|
||||
duWeightsBuffer, dvWeightsBuffer,
|
||||
0, 0, 0,
|
||||
start, end);
|
||||
}
|
||||
|
||||
bool
|
||||
GLXFBEvaluator::EvalStencils(
|
||||
GLuint srcBuffer, BufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, BufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, BufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, BufferDescriptor const &dvDesc,
|
||||
GLuint duuBuffer, BufferDescriptor const &duuDesc,
|
||||
GLuint duvBuffer, BufferDescriptor const &duvDesc,
|
||||
GLuint dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
GLuint sizesTexture,
|
||||
GLuint offsetsTexture,
|
||||
GLuint indicesTexture,
|
||||
GLuint weightsTexture,
|
||||
GLuint duWeightsTexture,
|
||||
GLuint dvWeightsTexture,
|
||||
GLuint duuWeightsTexture,
|
||||
GLuint duvWeightsTexture,
|
||||
GLuint dvvWeightsTexture,
|
||||
int start, int end) const {
|
||||
|
||||
if (!_stencilKernel.program) return false;
|
||||
@ -353,6 +540,14 @@ GLXFBEvaluator::EvalStencils(
|
||||
bindTexture(_stencilKernel.uniformDuWeightsTexture, duWeightsTexture, 5);
|
||||
if (_stencilKernel.uniformDvWeightsTexture >= 0 && dvWeightsTexture)
|
||||
bindTexture(_stencilKernel.uniformDvWeightsTexture, dvWeightsTexture, 6);
|
||||
if (_stencilKernel.uniformDuWeightsTexture >= 0 && duWeightsTexture)
|
||||
bindTexture(_stencilKernel.uniformDuWeightsTexture, duWeightsTexture, 5);
|
||||
if (_stencilKernel.uniformDuuWeightsTexture >= 0 && duuWeightsTexture)
|
||||
bindTexture(_stencilKernel.uniformDuuWeightsTexture, duuWeightsTexture, 5);
|
||||
if (_stencilKernel.uniformDuvWeightsTexture >= 0 && duvWeightsTexture)
|
||||
bindTexture(_stencilKernel.uniformDuvWeightsTexture, duvWeightsTexture, 6);
|
||||
if (_stencilKernel.uniformDvvWeightsTexture >= 0 && dvvWeightsTexture)
|
||||
bindTexture(_stencilKernel.uniformDvvWeightsTexture, dvvWeightsTexture, 6);
|
||||
|
||||
// set batch range
|
||||
glUniform1i(_stencilKernel.uniformStart, start);
|
||||
@ -392,6 +587,12 @@ GLXFBEvaluator::EvalStencils(
|
||||
(duDesc.offset - (duDesc.offset % duDesc.stride)) : 0;
|
||||
int dvBufferBindOffset = dvDesc.stride ?
|
||||
(dvDesc.offset - (dvDesc.offset % dvDesc.stride)) : 0;
|
||||
int duuBufferBindOffset = duuDesc.stride ?
|
||||
(duuDesc.offset - (duuDesc.offset % duuDesc.stride)) : 0;
|
||||
int duvBufferBindOffset = duvDesc.stride ?
|
||||
(duvDesc.offset - (duvDesc.offset % duvDesc.stride)) : 0;
|
||||
int dvvBufferBindOffset = dvvDesc.stride ?
|
||||
(dvvDesc.offset - (dvvDesc.offset % dvvDesc.stride)) : 0;
|
||||
|
||||
// bind destination buffer
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
@ -399,18 +600,53 @@ GLXFBEvaluator::EvalStencils(
|
||||
dstBufferBindOffset * sizeof(float),
|
||||
count * dstDesc.stride * sizeof(float));
|
||||
|
||||
if (duDesc.length > 0) {
|
||||
if ((duDesc.length > 0) && _sharedDerivativeBuffers) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
1, duBuffer,
|
||||
duBufferBindOffset * sizeof(float),
|
||||
count * duDesc.stride * sizeof(float));
|
||||
} else {
|
||||
if (duDesc.length > 0) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
1, duBuffer,
|
||||
duBufferBindOffset * sizeof(float),
|
||||
count * duDesc.stride * sizeof(float));
|
||||
}
|
||||
|
||||
if (dvDesc.length > 0) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
2, dvBuffer,
|
||||
dvBufferBindOffset * sizeof(float),
|
||||
count * dvDesc.stride * sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
if (dvDesc.length > 0) {
|
||||
if ((duuDesc.length > 0) && _sharedDerivativeBuffers) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
2, dvBuffer,
|
||||
dvBufferBindOffset * sizeof(float),
|
||||
count * dvDesc.stride * sizeof(float));
|
||||
2, duuBuffer,
|
||||
duuBufferBindOffset * sizeof(float),
|
||||
count * duuDesc.stride * sizeof(float));
|
||||
} else {
|
||||
if (duuDesc.length > 0) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
3, duuBuffer,
|
||||
duuBufferBindOffset * sizeof(float),
|
||||
count * duuDesc.stride * sizeof(float));
|
||||
}
|
||||
|
||||
if (duvDesc.length > 0) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
4, duvBuffer,
|
||||
duvBufferBindOffset * sizeof(float),
|
||||
count * duvDesc.stride * sizeof(float));
|
||||
}
|
||||
|
||||
if (dvvDesc.length > 0) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
5, dvvBuffer,
|
||||
dvvBufferBindOffset * sizeof(float),
|
||||
count * dvvDesc.stride * sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
glBeginTransformFeedback(GL_POINTS);
|
||||
@ -419,7 +655,7 @@ GLXFBEvaluator::EvalStencils(
|
||||
|
||||
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
glActiveTexture(GL_TEXTURE0 + i);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
}
|
||||
@ -448,7 +684,36 @@ GLXFBEvaluator::EvalPatches(
|
||||
GLuint patchIndexTexture,
|
||||
GLuint patchParamTexture) const {
|
||||
|
||||
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
return EvalPatches(srcBuffer, srcDesc,
|
||||
dstBuffer, dstDesc,
|
||||
duBuffer, duDesc,
|
||||
dvBuffer, dvDesc,
|
||||
0, BufferDescriptor(),
|
||||
0, BufferDescriptor(),
|
||||
0, BufferDescriptor(),
|
||||
numPatchCoords,
|
||||
patchCoordsBuffer, patchArrays,
|
||||
patchIndexTexture,
|
||||
patchParamTexture);
|
||||
}
|
||||
|
||||
bool
|
||||
GLXFBEvaluator::EvalPatches(
|
||||
GLuint srcBuffer, BufferDescriptor const &srcDesc,
|
||||
GLuint dstBuffer, BufferDescriptor const &dstDesc,
|
||||
GLuint duBuffer, BufferDescriptor const &duDesc,
|
||||
GLuint dvBuffer, BufferDescriptor const &dvDesc,
|
||||
GLuint duuBuffer, BufferDescriptor const &duuDesc,
|
||||
GLuint duvBuffer, BufferDescriptor const &duvDesc,
|
||||
GLuint dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
GLuint patchCoordsBuffer,
|
||||
const PatchArrayVector &patchArrays,
|
||||
GLuint patchIndexTexture,
|
||||
GLuint patchParamTexture) const {
|
||||
|
||||
bool deriv1 = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
|
||||
|
||||
if (!_patchKernel.program) return false;
|
||||
|
||||
@ -493,6 +758,15 @@ GLXFBEvaluator::EvalPatches(
|
||||
int dvBufferBindOffset = dvDesc.stride
|
||||
? (dvDesc.offset - (dvDesc.offset % dvDesc.stride))
|
||||
: 0;
|
||||
int duuBufferBindOffset = duuDesc.stride
|
||||
? (duuDesc.offset - (duuDesc.offset % duuDesc.stride))
|
||||
: 0;
|
||||
int duvBufferBindOffset = duvDesc.stride
|
||||
? (duvDesc.offset - (duvDesc.offset % duvDesc.stride))
|
||||
: 0;
|
||||
int dvvBufferBindOffset = dvvDesc.stride
|
||||
? (dvvDesc.offset - (dvvDesc.offset % dvvDesc.stride))
|
||||
: 0;
|
||||
|
||||
// bind destination buffer
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
@ -500,7 +774,12 @@ GLXFBEvaluator::EvalPatches(
|
||||
dstBufferBindOffset * sizeof(float),
|
||||
numPatchCoords * dstDesc.stride * sizeof(float));
|
||||
|
||||
if (derivatives) {
|
||||
if (deriv1 && _sharedDerivativeBuffers) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
1, duBuffer,
|
||||
duBufferBindOffset * sizeof(float),
|
||||
numPatchCoords * duDesc.stride * sizeof(float));
|
||||
} else if (deriv1) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
1, duBuffer,
|
||||
duBufferBindOffset * sizeof(float),
|
||||
@ -510,7 +789,27 @@ GLXFBEvaluator::EvalPatches(
|
||||
2, dvBuffer,
|
||||
dvBufferBindOffset * sizeof(float),
|
||||
numPatchCoords * dvDesc.stride * sizeof(float));
|
||||
}
|
||||
if (deriv2 && _sharedDerivativeBuffers) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
2, duuBuffer,
|
||||
duuBufferBindOffset * sizeof(float),
|
||||
numPatchCoords * duuDesc.stride * sizeof(float));
|
||||
} else if (deriv2) {
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
3, duuBuffer,
|
||||
duuBufferBindOffset * sizeof(float),
|
||||
numPatchCoords * duuDesc.stride * sizeof(float));
|
||||
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
4, duvBuffer,
|
||||
duvBufferBindOffset * sizeof(float),
|
||||
numPatchCoords * duvDesc.stride * sizeof(float));
|
||||
|
||||
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
|
||||
5, dvvBuffer,
|
||||
dvvBufferBindOffset * sizeof(float),
|
||||
numPatchCoords * dvvDesc.stride * sizeof(float));
|
||||
}
|
||||
|
||||
glBeginTransformFeedback(GL_POINTS);
|
||||
@ -520,7 +819,7 @@ GLXFBEvaluator::EvalPatches(
|
||||
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
|
||||
|
||||
// unbind textures
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
glActiveTexture(GL_TEXTURE0 + i);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, 0);
|
||||
}
|
||||
@ -536,7 +835,6 @@ GLXFBEvaluator::EvalPatches(
|
||||
glBindVertexArray(0);
|
||||
glDeleteVertexArrays(1, &vao);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -554,32 +852,38 @@ bool
|
||||
GLXFBEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc) {
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc,
|
||||
bool sharedDerivativeBuffers) {
|
||||
// create stencil kernel
|
||||
if (program) {
|
||||
glDeleteProgram(program);
|
||||
}
|
||||
|
||||
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
const char *kernelDef = derivatives
|
||||
? "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS\n"
|
||||
"#define OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES\n"
|
||||
: "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS\n";
|
||||
const char * kernelDefines =
|
||||
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS\n";
|
||||
|
||||
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, kernelDef);
|
||||
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc,
|
||||
duuDesc, duvDesc, dvvDesc,
|
||||
kernelDefines, sharedDerivativeBuffers);
|
||||
if (program == 0) return false;
|
||||
|
||||
// cache uniform locations (TODO: use uniform block)
|
||||
uniformSrcBufferTexture = glGetUniformLocation(program, "vertexBuffer");
|
||||
uniformSrcOffset = glGetUniformLocation(program, "srcOffset");
|
||||
uniformSizesTexture = glGetUniformLocation(program, "sizes");
|
||||
uniformOffsetsTexture = glGetUniformLocation(program, "offsets");
|
||||
uniformIndicesTexture = glGetUniformLocation(program, "indices");
|
||||
uniformWeightsTexture = glGetUniformLocation(program, "weights");
|
||||
uniformDuWeightsTexture = glGetUniformLocation(program, "duWeights");
|
||||
uniformDvWeightsTexture = glGetUniformLocation(program, "dvWeights");
|
||||
uniformStart = glGetUniformLocation(program, "batchStart");
|
||||
uniformEnd = glGetUniformLocation(program, "batchEnd");
|
||||
uniformSrcBufferTexture = glGetUniformLocation(program, "vertexBuffer");
|
||||
uniformSrcOffset = glGetUniformLocation(program, "srcOffset");
|
||||
uniformSizesTexture = glGetUniformLocation(program, "sizes");
|
||||
uniformOffsetsTexture = glGetUniformLocation(program, "offsets");
|
||||
uniformIndicesTexture = glGetUniformLocation(program, "indices");
|
||||
uniformWeightsTexture = glGetUniformLocation(program, "weights");
|
||||
uniformDuWeightsTexture = glGetUniformLocation(program, "duWeights");
|
||||
uniformDvWeightsTexture = glGetUniformLocation(program, "dvWeights");
|
||||
uniformDuuWeightsTexture = glGetUniformLocation(program, "duuWeights");
|
||||
uniformDuvWeightsTexture = glGetUniformLocation(program, "duvWeights");
|
||||
uniformDvvWeightsTexture = glGetUniformLocation(program, "dvvWeights");
|
||||
uniformStart = glGetUniformLocation(program, "batchStart");
|
||||
uniformEnd = glGetUniformLocation(program, "batchEnd");
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -598,19 +902,22 @@ bool
|
||||
GLXFBEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
|
||||
BufferDescriptor const &dstDesc,
|
||||
BufferDescriptor const &duDesc,
|
||||
BufferDescriptor const &dvDesc) {
|
||||
BufferDescriptor const &dvDesc,
|
||||
BufferDescriptor const &duuDesc,
|
||||
BufferDescriptor const &duvDesc,
|
||||
BufferDescriptor const &dvvDesc,
|
||||
bool sharedDerivativeBuffers) {
|
||||
// create stencil kernel
|
||||
if (program) {
|
||||
glDeleteProgram(program);
|
||||
}
|
||||
|
||||
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
|
||||
const char *kernelDef = derivatives
|
||||
? "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES\n"
|
||||
"#define OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES\n"
|
||||
: "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES\n";
|
||||
const char * kernelDefines =
|
||||
"#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES\n";
|
||||
|
||||
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, kernelDef);
|
||||
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc,
|
||||
duuDesc, duvDesc, dvvDesc,
|
||||
kernelDefines, sharedDerivativeBuffers);
|
||||
if (program == 0) return false;
|
||||
|
||||
// cache uniform locations
|
||||
@ -623,7 +930,6 @@ GLXFBEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -37,13 +37,22 @@ layout(binding=1) buffer dst_buffer { float dstVertexBuffer[]; };
|
||||
|
||||
// derivative buffers (if needed)
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
|
||||
uniform ivec3 duDesc;
|
||||
uniform ivec3 dvDesc;
|
||||
layout(binding=2) buffer du_buffer { float duBuffer[]; };
|
||||
layout(binding=3) buffer dv_buffer { float dvBuffer[]; };
|
||||
#endif
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
|
||||
uniform ivec3 duuDesc;
|
||||
uniform ivec3 duvDesc;
|
||||
uniform ivec3 dvvDesc;
|
||||
layout(binding=10) buffer duu_buffer { float duuBuffer[]; };
|
||||
layout(binding=11) buffer duv_buffer { float duvBuffer[]; };
|
||||
layout(binding=12) buffer dvv_buffer { float dvvBuffer[]; };
|
||||
#endif
|
||||
|
||||
// stencil buffers
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
|
||||
@ -55,11 +64,17 @@ layout(binding=5) buffer stencilOffsets { int _offsets[]; };
|
||||
layout(binding=6) buffer stencilIndices { int _indices[]; };
|
||||
layout(binding=7) buffer stencilWeights { float _weights[]; };
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
|
||||
layout(binding=8) buffer stencilDuWeights { float _duWeights[]; };
|
||||
layout(binding=9) buffer stencilDvWeights { float _dvWeights[]; };
|
||||
#endif
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
|
||||
layout(binding=13) buffer stencilDuuWeights { float _duuWeights[]; };
|
||||
layout(binding=14) buffer stencilDuvWeights { float _duvWeights[]; };
|
||||
layout(binding=15) buffer stencilDvvWeights { float _dvvWeights[]; };
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// patch buffers
|
||||
@ -119,7 +134,7 @@ void addWithWeight(inout Vertex v, const Vertex src, float weight) {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
|
||||
void writeDu(int index, Vertex du) {
|
||||
int duIndex = duDesc.x + index * duDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
@ -135,6 +150,29 @@ void writeDv(int index, Vertex dv) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
|
||||
void writeDuu(int index, Vertex duu) {
|
||||
int duuIndex = duuDesc.x + index * duuDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
duuBuffer[duuIndex + i] = duu.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDuv(int index, Vertex duv) {
|
||||
int duvIndex = duvDesc.x + index * duvDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
duvBuffer[duvIndex + i] = duv.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDvv(int index, Vertex dvv) {
|
||||
int dvvIndex = dvvDesc.x + index * dvvDesc.z;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
dvvBuffer[dvvIndex + i] = dvv.vertexData[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_KERNEL_EVAL_STENCILS)
|
||||
|
||||
@ -160,7 +198,7 @@ void main() {
|
||||
|
||||
writeVertex(current, dst);
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
|
||||
Vertex du, dv;
|
||||
clear(du);
|
||||
clear(dv);
|
||||
@ -178,6 +216,29 @@ void main() {
|
||||
writeDv(current, dv);
|
||||
}
|
||||
#endif
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
|
||||
Vertex duu, duv, dvv;
|
||||
clear(duu);
|
||||
clear(duv);
|
||||
clear(dvv);
|
||||
for (int i=0; i<size; ++i) {
|
||||
// expects the compiler optimizes readVertex out here.
|
||||
Vertex src = readVertex(_indices[offset+i]);
|
||||
addWithWeight(duu, src, _duuWeights[offset+i]);
|
||||
addWithWeight(duv, src, _duvWeights[offset+i]);
|
||||
addWithWeight(dvv, src, _dvvWeights[offset+i]);
|
||||
}
|
||||
|
||||
if (duuDesc.y > 0) { // length
|
||||
writeDuu(current, duu);
|
||||
}
|
||||
if (duvDesc.y > 0) {
|
||||
writeDuv(current, duv);
|
||||
}
|
||||
if (dvvDesc.y > 0) {
|
||||
writeDvv(current, dvv);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -260,6 +321,9 @@ void main() {
|
||||
wP[i] = wP4[i];
|
||||
wDs[i] = wDs4[i];
|
||||
wDt[i] = wDt4[i];
|
||||
wDss[i] = wDss4[i];
|
||||
wDst[i] = wDst4[i];
|
||||
wDtt[i] = wDtt4[i];
|
||||
}
|
||||
} else if (patchType == 6) {
|
||||
float wP16[16], wDs16[16], wDt16[16], wDss16[16], wDst16[16], wDtt16[16];
|
||||
@ -269,16 +333,22 @@ void main() {
|
||||
wP[i] = wP16[i];
|
||||
wDs[i] = wDs16[i];
|
||||
wDt[i] = wDt16[i];
|
||||
wDss[i] = wDss16[i];
|
||||
wDst[i] = wDst16[i];
|
||||
wDtt[i] = wDtt16[i];
|
||||
}
|
||||
} else if (patchType == 9) {
|
||||
OsdGetGregoryPatchWeights(uv.s, uv.t, dScale, wP, wDs, wDt, wDss, wDst, wDtt);
|
||||
numControlVertices = 20;
|
||||
}
|
||||
|
||||
Vertex dst, du, dv;
|
||||
Vertex dst, du, dv, duu, duv, dvv;
|
||||
clear(dst);
|
||||
clear(du);
|
||||
clear(dv);
|
||||
clear(duu);
|
||||
clear(duv);
|
||||
clear(dvv);
|
||||
|
||||
int indexStride = getNumControlVertices(array.x);
|
||||
int indexBase = array.z + indexStride * (patchIndex - array.w);
|
||||
@ -288,10 +358,13 @@ void main() {
|
||||
addWithWeight(dst, readVertex(index), wP[cv]);
|
||||
addWithWeight(du, readVertex(index), wDs[cv]);
|
||||
addWithWeight(dv, readVertex(index), wDt[cv]);
|
||||
addWithWeight(duu, readVertex(index), wDss[cv]);
|
||||
addWithWeight(duv, readVertex(index), wDst[cv]);
|
||||
addWithWeight(dvv, readVertex(index), wDtt[cv]);
|
||||
}
|
||||
writeVertex(current, dst);
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_1ST_DERIVATIVES)
|
||||
if (duDesc.y > 0) { // length
|
||||
writeDu(current, du);
|
||||
}
|
||||
@ -299,6 +372,17 @@ void main() {
|
||||
writeDv(current, dv);
|
||||
}
|
||||
#endif
|
||||
#if defined(OPENSUBDIV_GLSL_COMPUTE_USE_2ND_DERIVATIVES)
|
||||
if (duuDesc.y > 0) { // length
|
||||
writeDuu(current, duu);
|
||||
}
|
||||
if (duvDesc.y > 0) { // length
|
||||
writeDuv(current, duv);
|
||||
}
|
||||
if (dvvDesc.y > 0) {
|
||||
writeDvv(current, dvv);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -63,7 +63,22 @@ void writeVertex(Vertex v) {
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES) && \
|
||||
defined(OPENSUBDIV_GLSL_XFB_SHARED_1ST_DERIVATIVE_BUFFERS)
|
||||
out float outDeriv1Buffer[2*LENGTH];
|
||||
|
||||
void writeDu(Vertex v) {
|
||||
for(int i = 0; i < LENGTH; i++) {
|
||||
outDeriv1Buffer[i] = v.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDv(Vertex v) {
|
||||
for(int i = 0; i < LENGTH; i++) {
|
||||
outDeriv1Buffer[i+LENGTH] = v.vertexData[i];
|
||||
}
|
||||
}
|
||||
#elif defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES)
|
||||
out float outDuBuffer[LENGTH];
|
||||
out float outDvBuffer[LENGTH];
|
||||
|
||||
@ -80,6 +95,51 @@ void writeDv(Vertex v) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES) && \
|
||||
defined(OPENSUBDIV_GLSL_XFB_SHARED_2ND_DERIVATIVE_BUFFERS)
|
||||
out float outDeriv2Buffer[3*LENGTH];
|
||||
|
||||
void writeDuu(Vertex v) {
|
||||
for(int i = 0; i < LENGTH; i++) {
|
||||
outDeriv2Buffer[i] = v.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDuv(Vertex v) {
|
||||
for(int i = 0; i < LENGTH; i++) {
|
||||
outDeriv2Buffer[i+LENGTH] = v.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDvv(Vertex v) {
|
||||
for(int i = 0; i < LENGTH; i++) {
|
||||
outDeriv2Buffer[i+2*LENGTH] = v.vertexData[i];
|
||||
}
|
||||
}
|
||||
#elif defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES)
|
||||
out float outDuuBuffer[LENGTH];
|
||||
out float outDuvBuffer[LENGTH];
|
||||
out float outDvvBuffer[LENGTH];
|
||||
|
||||
void writeDuu(Vertex v) {
|
||||
for(int i = 0; i < LENGTH; i++) {
|
||||
outDuuBuffer[i] = v.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDuv(Vertex v) {
|
||||
for(int i = 0; i < LENGTH; i++) {
|
||||
outDuvBuffer[i] = v.vertexData[i];
|
||||
}
|
||||
}
|
||||
|
||||
void writeDvv(Vertex v) {
|
||||
for(int i = 0; i < LENGTH; i++) {
|
||||
outDvvBuffer[i] = v.vertexData[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS)
|
||||
@ -89,11 +149,17 @@ uniform isamplerBuffer offsets;
|
||||
uniform isamplerBuffer indices;
|
||||
uniform samplerBuffer weights;
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES)
|
||||
uniform samplerBuffer duWeights;
|
||||
uniform samplerBuffer dvWeights;
|
||||
#endif
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES)
|
||||
uniform samplerBuffer duuWeights;
|
||||
uniform samplerBuffer duvWeights;
|
||||
uniform samplerBuffer dvvWeights;
|
||||
#endif
|
||||
|
||||
uniform int batchStart = 0;
|
||||
uniform int batchEnd = 0;
|
||||
|
||||
@ -104,10 +170,13 @@ void main() {
|
||||
return;
|
||||
}
|
||||
|
||||
Vertex dst, du, dv;
|
||||
Vertex dst, du, dv, duu, duv, dvv;
|
||||
clear(dst);
|
||||
clear(du);
|
||||
clear(dv);
|
||||
clear(duu);
|
||||
clear(duv);
|
||||
clear(dvv);
|
||||
|
||||
int offset = texelFetch(offsets, current).x;
|
||||
uint size = texelFetch(sizes, current).x;
|
||||
@ -117,19 +186,32 @@ void main() {
|
||||
float weight = texelFetch(weights, offset+stencil).x;
|
||||
addWithWeight(dst, readVertex( index ), weight);
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES)
|
||||
float duWeight = texelFetch(duWeights, offset+stencil).x;
|
||||
float dvWeight = texelFetch(dvWeights, offset+stencil).x;
|
||||
addWithWeight(du, readVertex(index), duWeight);
|
||||
addWithWeight(dv, readVertex(index), dvWeight);
|
||||
#endif
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES)
|
||||
float duuWeight = texelFetch(duuWeights, offset+stencil).x;
|
||||
float duvWeight = texelFetch(duvWeights, offset+stencil).x;
|
||||
float dvvWeight = texelFetch(dvvWeights, offset+stencil).x;
|
||||
addWithWeight(duu, readVertex(index), duuWeight);
|
||||
addWithWeight(duv, readVertex(index), duvWeight);
|
||||
addWithWeight(dvv, readVertex(index), dvvWeight);
|
||||
#endif
|
||||
}
|
||||
writeVertex(dst);
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES)
|
||||
writeDu(du);
|
||||
writeDv(dv);
|
||||
#endif
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES)
|
||||
writeDuu(duu);
|
||||
writeDuv(duv);
|
||||
writeDvv(dvv);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -213,31 +295,43 @@ void main() {
|
||||
int numControlVertices = 0;
|
||||
if (patchType == 3) {
|
||||
float wP4[4], wDs4[4], wDt4[4], wDss4[4], wDst4[4], wDtt4[4];
|
||||
OsdGetBilinearPatchWeights(coord.s, coord.t, dScale, wP4, wDs4, wDt4, wDss4, wDst4, wDtt4);
|
||||
OsdGetBilinearPatchWeights(coord.s, coord.t, dScale, wP4,
|
||||
wDs4, wDt4, wDss4, wDst4, wDtt4);
|
||||
numControlVertices = 4;
|
||||
for (int i=0; i<numControlVertices; ++i) {
|
||||
wP[i] = wP4[i];
|
||||
wDs[i] = wDs4[i];
|
||||
wDt[i] = wDt4[i];
|
||||
wDss[i] = wDss4[i];
|
||||
wDst[i] = wDst4[i];
|
||||
wDtt[i] = wDtt4[i];
|
||||
}
|
||||
} else if (patchType == 6) {
|
||||
float wP16[16], wDs16[16], wDt16[16], wDss16[16], wDst16[16], wDtt16[16];
|
||||
OsdGetBSplinePatchWeights(coord.s, coord.t, dScale, boundary, wP16, wDs16, wDt16, wDss16, wDst16, wDtt16);
|
||||
OsdGetBSplinePatchWeights(coord.s, coord.t, dScale, boundary, wP16,
|
||||
wDs16, wDt16, wDss16, wDst16, wDtt16);
|
||||
numControlVertices = 16;
|
||||
for (int i=0; i<numControlVertices; ++i) {
|
||||
wP[i] = wP16[i];
|
||||
wDs[i] = wDs16[i];
|
||||
wDt[i] = wDt16[i];
|
||||
wDss[i] = wDss16[i];
|
||||
wDst[i] = wDst16[i];
|
||||
wDtt[i] = wDtt16[i];
|
||||
}
|
||||
} else if (patchType == 9) {
|
||||
OsdGetGregoryPatchWeights(coord.s, coord.t, dScale, wP, wDs, wDt, wDss, wDst, wDtt);
|
||||
OsdGetGregoryPatchWeights(coord.s, coord.t, dScale, wP,
|
||||
wDs, wDt, wDss, wDst, wDtt);
|
||||
numControlVertices = 20;
|
||||
}
|
||||
|
||||
Vertex dst, du, dv;
|
||||
Vertex dst, du, dv, duu, duv, dvv;
|
||||
clear(dst);
|
||||
clear(du);
|
||||
clear(dv);
|
||||
clear(duu);
|
||||
clear(duv);
|
||||
clear(dvv);
|
||||
|
||||
int indexStride = getNumControlVertices(array.x);
|
||||
int indexBase = array.z + indexStride * (patchIndex - array.w);
|
||||
@ -247,15 +341,22 @@ void main() {
|
||||
addWithWeight(dst, readVertex(index), wP[cv]);
|
||||
addWithWeight(du, readVertex(index), wDs[cv]);
|
||||
addWithWeight(dv, readVertex(index), wDt[cv]);
|
||||
addWithWeight(duu, readVertex(index), wDss[cv]);
|
||||
addWithWeight(duv, readVertex(index), wDst[cv]);
|
||||
addWithWeight(dvv, readVertex(index), wDtt[cv]);
|
||||
}
|
||||
|
||||
writeVertex(dst);
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES)
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES)
|
||||
writeDu(du);
|
||||
writeDv(dv);
|
||||
#endif
|
||||
|
||||
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES)
|
||||
writeDuu(duu);
|
||||
writeDuv(duv);
|
||||
writeDvv(dvv);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -197,7 +197,7 @@ OmpEvaluator::EvalPatches(
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
float wP[20], wDu[20], wDv[20];
|
||||
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
|
||||
BufferAdapter<float> duT(du + duDesc.stride*i, duDesc.length, duDesc.stride);
|
||||
BufferAdapter<float> dvT(dv + dvDesc.stride*i, dvDesc.length, dvDesc.stride);
|
||||
@ -214,15 +214,15 @@ OmpEvaluator::EvalPatches(
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(param,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
coord.s, coord.t, wP, wDu, wDv);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(param,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
coord.s, coord.t, wP, wDu, wDv);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(param,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
coord.s, coord.t, wP, wDu, wDv);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
continue;
|
||||
@ -239,8 +239,8 @@ OmpEvaluator::EvalPatches(
|
||||
dvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
duT.AddWithWeight(srcT[cvs[j]], wDs[j]);
|
||||
dvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
|
||||
duT.AddWithWeight(srcT[cvs[j]], wDu[j]);
|
||||
dvT.AddWithWeight(srcT[cvs[j]], wDv[j]);
|
||||
}
|
||||
++dstT;
|
||||
++duT;
|
||||
@ -249,6 +249,101 @@ OmpEvaluator::EvalPatches(
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
OmpEvaluator::EvalPatches(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer) {
|
||||
|
||||
src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
if (duu) duu += duuDesc.offset;
|
||||
if (duv) duv += duvDesc.offset;
|
||||
if (dvv) dvv += dvvDesc.offset;
|
||||
|
||||
BufferAdapter<const float> srcT(src, srcDesc.length, srcDesc.stride);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < numPatchCoords; ++i) {
|
||||
float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
|
||||
BufferAdapter<float> dstT(dst + dstDesc.stride*i, dstDesc.length, dstDesc.stride);
|
||||
BufferAdapter<float> duT(du + duDesc.stride*i, duDesc.length, duDesc.stride);
|
||||
BufferAdapter<float> dvT(dv + dvDesc.stride*i, dvDesc.length, dvDesc.stride);
|
||||
BufferAdapter<float> duuT(duu + duuDesc.stride*i, duuDesc.length, duuDesc.stride);
|
||||
BufferAdapter<float> duvT(duv + duvDesc.stride*i, duvDesc.length, duvDesc.stride);
|
||||
BufferAdapter<float> dvvT(dvv + dvvDesc.stride*i, dvvDesc.length, dvvDesc.stride);
|
||||
|
||||
PatchCoord const &coord = patchCoords[i];
|
||||
PatchArray const &array = patchArrays[coord.handle.arrayIndex];
|
||||
|
||||
Far::PatchParam const & param =
|
||||
patchParamBuffer[coord.handle.patchIndex];
|
||||
int patchType = param.IsRegular()
|
||||
? Far::PatchDescriptor::REGULAR
|
||||
: array.GetPatchType();
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(param,
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv, wDuu, wDuv, wDvv);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(param,
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv, wDuu, wDuv, wDvv);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(param,
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv, wDuu, wDuv, wDvv);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
int indexStride = Far::PatchDescriptor(array.GetPatchType()).GetNumControlVertices();
|
||||
int indexBase = array.GetIndexBase() + indexStride *
|
||||
(coord.handle.patchIndex - array.GetPrimitiveIdBase());
|
||||
|
||||
const int *cvs = &patchIndexBuffer[indexBase];
|
||||
|
||||
dstT.Clear();
|
||||
duT.Clear();
|
||||
dvT.Clear();
|
||||
duuT.Clear();
|
||||
duvT.Clear();
|
||||
dvvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
duT.AddWithWeight(srcT[cvs[j]], wDu[j]);
|
||||
dvT.AddWithWeight(srcT[cvs[j]], wDv[j]);
|
||||
duuT.AddWithWeight(srcT[cvs[j]], wDuu[j]);
|
||||
duvT.AddWithWeight(srcT[cvs[j]], wDuv[j]);
|
||||
dvvT.AddWithWeight(srcT[cvs[j]], wDvv[j]);
|
||||
}
|
||||
++dstT;
|
||||
++duT;
|
||||
++dvT;
|
||||
++duuT;
|
||||
++duvT;
|
||||
++dvvT;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* static */
|
||||
void
|
||||
|
@ -244,6 +244,177 @@ public:
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the omp kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the omp kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const OmpEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
&stencilTable->GetDuWeights()[0],
|
||||
&stencilTable->GetDvWeights()[0],
|
||||
&stencilTable->GetDuuWeights()[0],
|
||||
&stencilTable->GetDuvWeights()[0],
|
||||
&stencilTable->GetDvvWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw CPU pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output pointer derivative wrt u. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dv Output pointer derivative wrt v. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duu Output pointer 2nd derivative wrt u. An offset of
|
||||
/// duuDesc will be applied internally.
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
|
||||
/// duvDesc will be applied internally.
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
|
||||
/// dvvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param duuWeights pointer to the duu-weights buffer of the stencil table
|
||||
///
|
||||
/// @param duvWeights pointer to the duv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvvWeights pointer to the dvv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
const float * duuWeights,
|
||||
const float * duvWeights,
|
||||
const float * dvvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
@ -373,6 +544,102 @@ public:
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the omp evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the omp evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
OmpEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
|
||||
// ideally all buffer classes should have templated by datatype
|
||||
// so that downcast isn't needed there.
|
||||
// (e.g. Osd::CpuBuffer<PatchCoord> )
|
||||
//
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
@ -457,6 +724,72 @@ public:
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output pointer derivative wrt u. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dv Output pointer derivative wrt v. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duu Output pointer 2nd derivative wrt u. An offset of
|
||||
/// duuDesc will be applied internally.
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
|
||||
/// duvDesc will be applied internally.
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
|
||||
/// dvvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
@ -508,6 +841,164 @@ public:
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the omp evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the omp evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
OmpEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetVaryingPatchArrayBuffer(),
|
||||
patchTable->GetVaryingPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the omp evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the omp evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
OmpEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetVaryingPatchArrayBuffer(),
|
||||
patchTable->GetVaryingPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
@ -562,6 +1053,170 @@ public:
|
||||
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param fvarChannel face-varying channel
|
||||
///
|
||||
/// @param instance not used in the omp evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the omp evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesFaceVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
int fvarChannel,
|
||||
OmpEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param fvarChannel face-varying channel
|
||||
///
|
||||
/// @param instance not used in the omp evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the omp evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesFaceVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
int fvarChannel,
|
||||
OmpEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
|
@ -177,6 +177,99 @@ OmpEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
OmpEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
float * dst, BufferDescriptor const &dstDesc,
|
||||
float * dstDu, BufferDescriptor const &dstDuDesc,
|
||||
float * dstDv, BufferDescriptor const &dstDvDesc,
|
||||
float * dstDuu, BufferDescriptor const &dstDuuDesc,
|
||||
float * dstDuv, BufferDescriptor const &dstDuvDesc,
|
||||
float * dstDvv, BufferDescriptor const &dstDvvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
float const * duuWeights,
|
||||
float const * duvWeights,
|
||||
float const * dvvWeights,
|
||||
int start, int end) {
|
||||
start = (start > 0 ? start : 0);
|
||||
|
||||
src += srcDesc.offset;
|
||||
dst += dstDesc.offset;
|
||||
dstDu += dstDuDesc.offset;
|
||||
dstDv += dstDvDesc.offset;
|
||||
dstDuu += dstDuuDesc.offset;
|
||||
dstDuv += dstDuvDesc.offset;
|
||||
dstDvv += dstDvvDesc.offset;
|
||||
|
||||
int numThreads = omp_get_max_threads();
|
||||
int n = end - start;
|
||||
|
||||
float * result = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
float * resultDu = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
float * resultDv = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
float * resultDuu = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
float * resultDuv = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
float * resultDvv = (float*)alloca(srcDesc.length * numThreads * sizeof(float));
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < n; ++i) {
|
||||
|
||||
int index = i + start; // Stencil index
|
||||
|
||||
// Get thread-local pointers
|
||||
int const * threadIndices = indices + offsets[index];
|
||||
float const * threadWeights = weights + offsets[index];
|
||||
float const * threadWeightsDu = duWeights + offsets[index];
|
||||
float const * threadWeightsDv = dvWeights + offsets[index];
|
||||
float const * threadWeightsDuu = duuWeights + offsets[index];
|
||||
float const * threadWeightsDuv = duvWeights + offsets[index];
|
||||
float const * threadWeightsDvv = dvvWeights + offsets[index];
|
||||
|
||||
int threadId = omp_get_thread_num();
|
||||
|
||||
float * threadResult = result + threadId*srcDesc.length;
|
||||
float * threadResultDu = resultDu + threadId*srcDesc.length;
|
||||
float * threadResultDv = resultDv + threadId*srcDesc.length;
|
||||
float * threadResultDuu = resultDuu + threadId*srcDesc.length;
|
||||
float * threadResultDuv = resultDuv + threadId*srcDesc.length;
|
||||
float * threadResultDvv = resultDvv + threadId*srcDesc.length;
|
||||
|
||||
clear(threadResult, dstDesc);
|
||||
clear(threadResultDu, dstDuDesc);
|
||||
clear(threadResultDv, dstDvDesc);
|
||||
clear(threadResultDuu, dstDuuDesc);
|
||||
clear(threadResultDuv, dstDuvDesc);
|
||||
clear(threadResultDvv, dstDvvDesc);
|
||||
|
||||
for (int j=0; j<(int)sizes[index]; ++j) {
|
||||
addWithWeight(threadResult, src,
|
||||
threadIndices[j], threadWeights[j], srcDesc);
|
||||
addWithWeight(threadResultDu, src,
|
||||
threadIndices[j], threadWeightsDu[j], srcDesc);
|
||||
addWithWeight(threadResultDv, src,
|
||||
threadIndices[j], threadWeightsDv[j], srcDesc);
|
||||
addWithWeight(threadResultDuu, src,
|
||||
threadIndices[j], threadWeightsDuu[j], srcDesc);
|
||||
addWithWeight(threadResultDuv, src,
|
||||
threadIndices[j], threadWeightsDuv[j], srcDesc);
|
||||
addWithWeight(threadResultDvv, src,
|
||||
threadIndices[j], threadWeightsDvv[j], srcDesc);
|
||||
}
|
||||
|
||||
copy(dst, i, threadResult, dstDesc);
|
||||
copy(dstDu, i, threadResultDu, dstDuDesc);
|
||||
copy(dstDv, i, threadResultDv, dstDvDesc);
|
||||
copy(dstDuu, i, threadResultDuu, dstDuuDesc);
|
||||
copy(dstDuv, i, threadResultDuv, dstDuvDesc);
|
||||
copy(dstDvv, i, threadResultDvv, dstDvvDesc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -56,6 +56,25 @@ OmpEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
float const * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
void
|
||||
OmpEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
float * dst, BufferDescriptor const &dstDesc,
|
||||
float * dstDu, BufferDescriptor const &dstDuDesc,
|
||||
float * dstDv, BufferDescriptor const &dstDvDesc,
|
||||
float * dstDuu, BufferDescriptor const &dstDuuDesc,
|
||||
float * dstDuv, BufferDescriptor const &dstDuvDesc,
|
||||
float * dstDvv, BufferDescriptor const &dstDvvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
float const * duuWeights,
|
||||
float const * duvWeights,
|
||||
float const * dvvWeights,
|
||||
int start, int end);
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -75,8 +75,55 @@ TbbEvaluator::EvalStencils(
|
||||
dst, dstDesc,
|
||||
du, duDesc,
|
||||
dv, dvDesc,
|
||||
NULL, BufferDescriptor(),
|
||||
NULL, BufferDescriptor(),
|
||||
NULL, BufferDescriptor(),
|
||||
sizes, offsets, indices,
|
||||
weights, duWeights, dvWeights, NULL, NULL, NULL,
|
||||
start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
TbbEvaluator::EvalStencils(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
const float * duuWeights,
|
||||
const float * duvWeights,
|
||||
const float * dvvWeights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
if (srcDesc.length != duDesc.length) return false;
|
||||
if (srcDesc.length != dvDesc.length) return false;
|
||||
if (srcDesc.length != duuDesc.length) return false;
|
||||
if (srcDesc.length != duvDesc.length) return false;
|
||||
if (srcDesc.length != dvvDesc.length) return false;
|
||||
|
||||
TbbEvalStencils(src, srcDesc,
|
||||
dst, dstDesc,
|
||||
du, duDesc,
|
||||
dv, dvDesc,
|
||||
duu, duuDesc,
|
||||
duv, duvDesc,
|
||||
dvv, dvvDesc,
|
||||
sizes, offsets, indices,
|
||||
weights, duWeights, dvWeights,
|
||||
duuWeights, duvWeights, dvvWeights,
|
||||
start, end);
|
||||
|
||||
return true;
|
||||
@ -96,6 +143,9 @@ TbbEvaluator::EvalPatches(
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
TbbEvalPatches(src, srcDesc, dst, dstDesc,
|
||||
NULL, BufferDescriptor(),
|
||||
NULL, BufferDescriptor(),
|
||||
NULL, BufferDescriptor(),
|
||||
NULL, BufferDescriptor(),
|
||||
NULL, BufferDescriptor(),
|
||||
numPatchCoords, patchCoords,
|
||||
@ -121,6 +171,36 @@ TbbEvaluator::EvalPatches(
|
||||
|
||||
TbbEvalPatches(src, srcDesc, dst, dstDesc,
|
||||
du, duDesc, dv, dvDesc,
|
||||
NULL, BufferDescriptor(),
|
||||
NULL, BufferDescriptor(),
|
||||
NULL, BufferDescriptor(),
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
TbbEvaluator::EvalPatches(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
TbbEvalPatches(src, srcDesc, dst, dstDesc,
|
||||
du, duDesc, dv, dvDesc,
|
||||
duu, duuDesc, duv, duvDesc, dvv, dvvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer, patchIndexBuffer, patchParamBuffer);
|
||||
|
||||
|
@ -244,6 +244,177 @@ public:
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the tbb kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the tbb kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const TbbEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
&stencilTable->GetDuWeights()[0],
|
||||
&stencilTable->GetDvWeights()[0],
|
||||
&stencilTable->GetDuuWeights()[0],
|
||||
&stencilTable->GetDuvWeights()[0],
|
||||
&stencilTable->GetDvvWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw CPU pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output pointer derivative wrt u. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dv Output pointer derivative wrt v. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duu Output pointer 2nd derivative wrt u. An offset of
|
||||
/// duuDesc will be applied internally.
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
|
||||
/// duvDesc will be applied internally.
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
|
||||
/// dvvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param duuWeights pointer to the duu-weights buffer of the stencil table
|
||||
///
|
||||
/// @param duvWeights pointer to the duv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvvWeights pointer to the dvv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
const float * duuWeights,
|
||||
const float * duvWeights,
|
||||
const float * dvvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
@ -373,6 +544,102 @@ public:
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
|
||||
// ideally all buffer classes should have templated by datatype
|
||||
// so that downcast isn't needed there.
|
||||
// (e.g. Osd::CpuBuffer<PatchCoord> )
|
||||
//
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
@ -457,6 +724,72 @@ public:
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output pointer derivative wrt u. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dv Output pointer derivative wrt v. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duu Output pointer 2nd derivative wrt u. An offset of
|
||||
/// duuDesc will be applied internally.
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duv Output pointer 2nd derivative wrt u and v. An offset of
|
||||
/// duvDesc will be applied internally.
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvv Output pointer 2nd derivative wrt v. An offset of
|
||||
/// dvvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
float *duu, BufferDescriptor const &duuDesc,
|
||||
float *duv, BufferDescriptor const &duvDesc,
|
||||
float *dvv, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
@ -508,6 +841,164 @@ public:
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetVaryingPatchArrayBuffer(),
|
||||
patchTable->GetVaryingPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetVaryingPatchArrayBuffer(),
|
||||
patchTable->GetVaryingPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
@ -562,6 +1053,170 @@ public:
|
||||
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param fvarChannel face-varying channel
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesFaceVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
int fvarChannel,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output buffer derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output buffer derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param duuBuffer Output buffer 2nd derivative wrt u
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duuDesc vertex buffer descriptor for the duuBuffer
|
||||
///
|
||||
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duvDesc vertex buffer descriptor for the duvBuffer
|
||||
///
|
||||
/// @param dvvBuffer Output buffer 2nd derivative wrt v
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param fvarChannel face-varying channel
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatchesFaceVarying(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
|
||||
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
|
||||
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
int fvarChannel,
|
||||
TbbEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
duuBuffer->BindCpuBuffer(), duuDesc,
|
||||
duvBuffer->BindCpuBuffer(), duvDesc,
|
||||
dvvBuffer->BindCpuBuffer(), dvvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
|
||||
patchTable->GetFVarPatchParamBuffer(fvarChannel));
|
||||
}
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
|
@ -219,6 +219,78 @@ TbbEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
TbbEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
float * dst, BufferDescriptor const &dstDesc,
|
||||
float * du, BufferDescriptor const &duDesc,
|
||||
float * dv, BufferDescriptor const &dvDesc,
|
||||
float * duu, BufferDescriptor const &duuDesc,
|
||||
float * duv, BufferDescriptor const &duvDesc,
|
||||
float * dvv, BufferDescriptor const &dvvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
float const * duuWeights,
|
||||
float const * duvWeights,
|
||||
float const * dvvWeights,
|
||||
int start, int end) {
|
||||
|
||||
if (src) src += srcDesc.offset;
|
||||
if (dst) dst += dstDesc.offset;
|
||||
if (du) du += duDesc.offset;
|
||||
if (dv) dv += dvDesc.offset;
|
||||
if (duu) duu += duuDesc.offset;
|
||||
if (duv) duv += duvDesc.offset;
|
||||
if (dvv) dvv += dvvDesc.offset;
|
||||
|
||||
// PERFORMANCE: need to combine 3 launches together
|
||||
if (dst) {
|
||||
TBBStencilKernel kernel(src, srcDesc, dst, dstDesc,
|
||||
sizes, offsets, indices, weights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
if (du) {
|
||||
TBBStencilKernel kernel(src, srcDesc, du, duDesc,
|
||||
sizes, offsets, indices, duWeights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
if (dv) {
|
||||
TBBStencilKernel kernel(src, srcDesc, dv, dvDesc,
|
||||
sizes, offsets, indices, dvWeights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
if (duu) {
|
||||
TBBStencilKernel kernel(src, srcDesc, duu, duuDesc,
|
||||
sizes, offsets, indices, duuWeights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
if (duv) {
|
||||
TBBStencilKernel kernel(src, srcDesc, duv, duvDesc,
|
||||
sizes, offsets, indices, duvWeights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
|
||||
if (dvv) {
|
||||
TBBStencilKernel kernel(src, srcDesc, dvv, dvvDesc,
|
||||
sizes, offsets, indices, dvvWeights);
|
||||
tbb::blocked_range<int> range(start, end, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -257,10 +329,16 @@ class TbbEvalPatchesKernel {
|
||||
BufferDescriptor _dstDesc;
|
||||
BufferDescriptor _dstDuDesc;
|
||||
BufferDescriptor _dstDvDesc;
|
||||
BufferDescriptor _dstDuuDesc;
|
||||
BufferDescriptor _dstDuvDesc;
|
||||
BufferDescriptor _dstDvvDesc;
|
||||
float const * _src;
|
||||
float * _dst;
|
||||
float * _dstDu;
|
||||
float * _dstDv;
|
||||
float * _dstDuu;
|
||||
float * _dstDuv;
|
||||
float * _dstDvv;
|
||||
int _numPatchCoords;
|
||||
const PatchCoord *_patchCoords;
|
||||
const PatchArray *_patchArrayBuffer;
|
||||
@ -272,6 +350,9 @@ public:
|
||||
float *dst, BufferDescriptor dstDesc,
|
||||
float *dstDu, BufferDescriptor dstDuDesc,
|
||||
float *dstDv, BufferDescriptor dstDvDesc,
|
||||
float *dstDuu, BufferDescriptor dstDuuDesc,
|
||||
float *dstDuv, BufferDescriptor dstDuvDesc,
|
||||
float *dstDvv, BufferDescriptor dstDvvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
@ -279,7 +360,10 @@ public:
|
||||
const PatchParam *patchParamBuffer) :
|
||||
_srcDesc(srcDesc), _dstDesc(dstDesc),
|
||||
_dstDuDesc(dstDuDesc), _dstDvDesc(dstDvDesc),
|
||||
_src(src), _dst(dst), _dstDu(dstDu), _dstDv(dstDv),
|
||||
_dstDuuDesc(dstDuuDesc), _dstDuvDesc(dstDuvDesc), _dstDvvDesc(dstDvvDesc),
|
||||
_src(src), _dst(dst),
|
||||
_dstDu(dstDu), _dstDv(dstDv),
|
||||
_dstDuu(dstDuu), _dstDuv(dstDuv), _dstDvv(dstDvv),
|
||||
_numPatchCoords(numPatchCoords),
|
||||
_patchCoords(patchCoords),
|
||||
_patchArrayBuffer(patchArrayBuffer),
|
||||
@ -290,13 +374,15 @@ public:
|
||||
void operator() (tbb::blocked_range<int> const &r) const {
|
||||
if (_dstDu == NULL && _dstDv == NULL) {
|
||||
compute(r);
|
||||
} else if (_dstDuu == NULL && _dstDuv == NULL && _dstDvv == NULL) {
|
||||
computeWith1stDerivative(r);
|
||||
} else {
|
||||
computeWithDerivative(r);
|
||||
computeWith2ndDerivative(r);
|
||||
}
|
||||
}
|
||||
|
||||
void compute(tbb::blocked_range<int> const &r) const {
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
float wP[20], wDu[20], wDv[20];
|
||||
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
|
||||
_srcDesc.length,
|
||||
_srcDesc.stride);
|
||||
@ -305,10 +391,66 @@ public:
|
||||
_dstDesc.length,
|
||||
_dstDesc.stride);
|
||||
|
||||
BufferAdapter<float> dstDuT(_dstDu,
|
||||
|
||||
for (int i = r.begin(); i < r.end(); ++i) {
|
||||
PatchCoord const &coord = _patchCoords[i];
|
||||
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
|
||||
|
||||
Far::PatchParam const & param =
|
||||
_patchParamBuffer[coord.handle.patchIndex];
|
||||
int patchType = param.IsRegular()
|
||||
? Far::PatchDescriptor::REGULAR
|
||||
: array.GetPatchType();
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(param,
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(param,
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(param,
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
int indexStride = Far::PatchDescriptor(array.GetPatchType()).GetNumControlVertices();
|
||||
int indexBase = array.GetIndexBase() + indexStride *
|
||||
(coord.handle.patchIndex - array.GetPrimitiveIdBase());
|
||||
|
||||
const int *cvs = &_patchIndexBuffer[indexBase];
|
||||
|
||||
dstT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
}
|
||||
++dstT;
|
||||
}
|
||||
}
|
||||
|
||||
void computeWith1stDerivative(tbb::blocked_range<int> const &r) const {
|
||||
float wP[20], wDu[20], wDv[20];
|
||||
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
|
||||
_srcDesc.length,
|
||||
_srcDesc.stride);
|
||||
BufferAdapter<float> dstT(_dst + _dstDesc.offset
|
||||
+ r.begin() * _dstDesc.stride,
|
||||
_dstDesc.length,
|
||||
_dstDesc.stride);
|
||||
BufferAdapter<float> dstDuT(_dstDu + _dstDuDesc.offset
|
||||
+ r.begin() * _dstDuDesc.stride,
|
||||
_dstDuDesc.length,
|
||||
_dstDuDesc.stride);
|
||||
BufferAdapter<float> dstDvT(_dstDv,
|
||||
BufferAdapter<float> dstDvT(_dstDv + _dstDvDesc.offset
|
||||
+ r.begin() * _dstDvDesc.stride,
|
||||
_dstDvDesc.length,
|
||||
_dstDvDesc.stride);
|
||||
|
||||
@ -325,74 +467,18 @@ public:
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(param,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(param,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(param,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
int indexStride = Far::PatchDescriptor(array.GetPatchType()).GetNumControlVertices();
|
||||
int indexBase = array.GetIndexBase() + indexStride *
|
||||
(coord.handle.patchIndex - array.GetPrimitiveIdBase());
|
||||
|
||||
const int *cvs = &_patchIndexBuffer[indexBase];
|
||||
|
||||
dstT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
}
|
||||
++dstT;
|
||||
}
|
||||
}
|
||||
|
||||
void computeWithDerivative(tbb::blocked_range<int> const &r) const {
|
||||
float wP[20], wDs[20], wDt[20];
|
||||
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
|
||||
_srcDesc.length,
|
||||
_srcDesc.stride);
|
||||
BufferAdapter<float> dstT(_dst + _dstDesc.offset
|
||||
+ r.begin() * _dstDesc.stride,
|
||||
_dstDesc.length,
|
||||
_dstDesc.stride);
|
||||
BufferAdapter<float> dstDuT(_dstDu + _dstDuDesc.offset
|
||||
+ r.begin() * _dstDuDesc.stride,
|
||||
_dstDuDesc.length,
|
||||
_dstDuDesc.stride);
|
||||
BufferAdapter<float> dstDvT(_dstDv + _dstDvDesc.offset
|
||||
+ r.begin() * _dstDvDesc.stride,
|
||||
_dstDvDesc.length,
|
||||
_dstDvDesc.stride);
|
||||
|
||||
for (int i = r.begin(); i < r.end(); ++i) {
|
||||
PatchCoord const &coord = _patchCoords[i];
|
||||
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
|
||||
|
||||
Far::PatchParam const & param =
|
||||
_patchParamBuffer[coord.handle.patchIndex];
|
||||
int patchType = param.IsRegular()
|
||||
? Far::PatchDescriptor::REGULAR
|
||||
: array.GetPatchType();
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(param,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(param,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(param,
|
||||
coord.s, coord.t, wP, wDs, wDt);
|
||||
coord.s, coord.t,
|
||||
wP, wDu, wDv);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
@ -409,14 +495,103 @@ public:
|
||||
dstDvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
dstDuT.AddWithWeight(srcT[cvs[j]], wDs[j]);
|
||||
dstDvT.AddWithWeight(srcT[cvs[j]], wDt[j]);
|
||||
dstDuT.AddWithWeight(srcT[cvs[j]], wDu[j]);
|
||||
dstDvT.AddWithWeight(srcT[cvs[j]], wDv[j]);
|
||||
}
|
||||
++dstT;
|
||||
++dstDuT;
|
||||
++dstDvT;
|
||||
}
|
||||
}
|
||||
|
||||
void computeWith2ndDerivative(tbb::blocked_range<int> const &r) const {
|
||||
float wP[20], wDu[20], wDv[20], wDuu[20], wDuv[20], wDvv[20];
|
||||
BufferAdapter<const float> srcT(_src + _srcDesc.offset,
|
||||
_srcDesc.length,
|
||||
_srcDesc.stride);
|
||||
BufferAdapter<float> dstT(_dst + _dstDesc.offset
|
||||
+ r.begin() * _dstDesc.stride,
|
||||
_dstDesc.length,
|
||||
_dstDesc.stride);
|
||||
BufferAdapter<float> dstDuT(_dstDu + _dstDuDesc.offset
|
||||
+ r.begin() * _dstDuDesc.stride,
|
||||
_dstDuDesc.length,
|
||||
_dstDuDesc.stride);
|
||||
BufferAdapter<float> dstDvT(_dstDv + _dstDvDesc.offset
|
||||
+ r.begin() * _dstDvDesc.stride,
|
||||
_dstDvDesc.length,
|
||||
_dstDvDesc.stride);
|
||||
BufferAdapter<float> dstDuuT(_dstDuu + _dstDuuDesc.offset
|
||||
+ r.begin() * _dstDuuDesc.stride,
|
||||
_dstDuuDesc.length,
|
||||
_dstDuuDesc.stride);
|
||||
BufferAdapter<float> dstDuvT(_dstDuv + _dstDuvDesc.offset
|
||||
+ r.begin() * _dstDuvDesc.stride,
|
||||
_dstDuvDesc.length,
|
||||
_dstDuvDesc.stride);
|
||||
BufferAdapter<float> dstDvvT(_dstDvv + _dstDvvDesc.offset
|
||||
+ r.begin() * _dstDvvDesc.stride,
|
||||
_dstDvvDesc.length,
|
||||
_dstDvvDesc.stride);
|
||||
|
||||
for (int i = r.begin(); i < r.end(); ++i) {
|
||||
PatchCoord const &coord = _patchCoords[i];
|
||||
PatchArray const &array = _patchArrayBuffer[coord.handle.arrayIndex];
|
||||
|
||||
Far::PatchParam const & param =
|
||||
_patchParamBuffer[coord.handle.patchIndex];
|
||||
int patchType = param.IsRegular()
|
||||
? Far::PatchDescriptor::REGULAR
|
||||
: array.GetPatchType();
|
||||
|
||||
int numControlVertices = 0;
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
Far::internal::GetBSplineWeights(param,
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv, wDuu, wDuv, wDvv);
|
||||
numControlVertices = 16;
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
Far::internal::GetGregoryWeights(param,
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv, wDuu, wDuv, wDvv);
|
||||
numControlVertices = 20;
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
Far::internal::GetBilinearWeights(param,
|
||||
coord.s, coord.t, wP,
|
||||
wDu, wDv, wDuu, wDuv, wDvv);
|
||||
numControlVertices = 4;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
int indexStride = Far::PatchDescriptor(array.GetPatchType()).GetNumControlVertices();
|
||||
int indexBase = array.GetIndexBase() + indexStride *
|
||||
(coord.handle.patchIndex - array.GetPrimitiveIdBase());
|
||||
|
||||
const int *cvs = &_patchIndexBuffer[indexBase];
|
||||
|
||||
dstT.Clear();
|
||||
dstDuT.Clear();
|
||||
dstDvT.Clear();
|
||||
dstDuuT.Clear();
|
||||
dstDuvT.Clear();
|
||||
dstDvvT.Clear();
|
||||
for (int j = 0; j < numControlVertices; ++j) {
|
||||
dstT.AddWithWeight(srcT[cvs[j]], wP[j]);
|
||||
dstDuT.AddWithWeight(srcT[cvs[j]], wDu[j]);
|
||||
dstDvT.AddWithWeight(srcT[cvs[j]], wDv[j]);
|
||||
dstDuuT.AddWithWeight(srcT[cvs[j]], wDuu[j]);
|
||||
dstDuvT.AddWithWeight(srcT[cvs[j]], wDuv[j]);
|
||||
dstDvvT.AddWithWeight(srcT[cvs[j]], wDvv[j]);
|
||||
}
|
||||
++dstT;
|
||||
++dstDuT;
|
||||
++dstDvT;
|
||||
++dstDuuT;
|
||||
++dstDuvT;
|
||||
++dstDvvT;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -433,6 +608,39 @@ TbbEvalPatches(float const *src, BufferDescriptor const &srcDesc,
|
||||
|
||||
TbbEvalPatchesKernel kernel(src, srcDesc, dst, dstDesc,
|
||||
dstDu, dstDuDesc, dstDv, dstDvDesc,
|
||||
NULL, BufferDescriptor(),
|
||||
NULL, BufferDescriptor(),
|
||||
NULL, BufferDescriptor(),
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer,
|
||||
patchIndexBuffer,
|
||||
patchParamBuffer);
|
||||
|
||||
tbb::blocked_range<int> range(0, numPatchCoords, grain_size);
|
||||
tbb::parallel_for(range, kernel);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
TbbEvalPatches(float const *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *dstDu, BufferDescriptor const &dstDuDesc,
|
||||
float *dstDv, BufferDescriptor const &dstDvDesc,
|
||||
float *dstDuu, BufferDescriptor const &dstDuuDesc,
|
||||
float *dstDuv, BufferDescriptor const &dstDuvDesc,
|
||||
float *dstDvv, BufferDescriptor const &dstDvvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
|
||||
TbbEvalPatchesKernel kernel(src, srcDesc, dst, dstDesc,
|
||||
dstDu, dstDuDesc, dstDv, dstDvDesc,
|
||||
dstDuu, dstDuuDesc,
|
||||
dstDuv, dstDuvDesc,
|
||||
dstDvv, dstDvvDesc,
|
||||
numPatchCoords, patchCoords,
|
||||
patchArrayBuffer,
|
||||
patchIndexBuffer,
|
||||
|
@ -61,6 +61,25 @@ TbbEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
float const * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
void
|
||||
TbbEvalStencils(float const * src, BufferDescriptor const &srcDesc,
|
||||
float * dst, BufferDescriptor const &dstDesc,
|
||||
float * dstDu, BufferDescriptor const &dstDuDesc,
|
||||
float * dstDv, BufferDescriptor const &dstDvDesc,
|
||||
float * dstDuu, BufferDescriptor const &dstDuuDesc,
|
||||
float * dstDuv, BufferDescriptor const &dstDuvDesc,
|
||||
float * dstDvv, BufferDescriptor const &dstDvvDesc,
|
||||
int const * sizes,
|
||||
int const * offsets,
|
||||
int const * indices,
|
||||
float const * weights,
|
||||
float const * duWeights,
|
||||
float const * dvWeights,
|
||||
float const * duuWeights,
|
||||
float const * duvWeights,
|
||||
float const * dvvWeights,
|
||||
int start, int end);
|
||||
|
||||
void
|
||||
TbbEvalPatches(float const *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
@ -72,6 +91,20 @@ TbbEvalPatches(float const *src, BufferDescriptor const &srcDesc,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
void
|
||||
TbbEvalPatches(float const *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *dstDu, BufferDescriptor const &dstDuDesc,
|
||||
float *dstDv, BufferDescriptor const &dstDvDesc,
|
||||
float *dstDuu, BufferDescriptor const &dstDuuDesc,
|
||||
float *dstDuv, BufferDescriptor const &dstDuvDesc,
|
||||
float *dstDvv, BufferDescriptor const &dstDvvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrayBuffer,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
Loading…
Reference in New Issue
Block a user