OpenSubdiv/opensubdiv/osd/clEvaluator.h
David G Yu fe38ad8cda Added missing Osd Evaluator methods for deriv eval
Now that Far::LimitStencilTable and Far::PatchTable
support evaluation of 1st and 2nd derivatives the
Osd Evaluator API for evaluating stencils and patches
has been updated to match.
2017-01-27 17:14:18 -08:00

2637 lines
125 KiB
C++

//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_CL_EVALUATOR_H
#define OPENSUBDIV3_OSD_CL_EVALUATOR_H
#include "../version.h"
#include "../osd/opencl.h"
#include "../osd/types.h"
#include "../osd/bufferDescriptor.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Far {
class PatchTable;
class StencilTable;
class LimitStencilTable;
}
namespace Osd {
/// \brief OpenCL stencil table
///
/// This class is an OpenCL buffer representation of Far::StencilTable.
///
/// CLCompute consumes this table to apply stencils
///
///
class CLStencilTable {
public:
template <typename DEVICE_CONTEXT>
static CLStencilTable *Create(Far::StencilTable const *stencilTable,
DEVICE_CONTEXT context) {
return new CLStencilTable(stencilTable, context->GetContext());
}
template <typename DEVICE_CONTEXT>
static CLStencilTable *Create(
Far::LimitStencilTable const *limitStencilTable,
DEVICE_CONTEXT context) {
return new CLStencilTable(limitStencilTable, context->GetContext());
}
CLStencilTable(Far::StencilTable const *stencilTable,
cl_context clContext);
CLStencilTable(Far::LimitStencilTable const *limitStencilTable,
cl_context clContext);
~CLStencilTable();
// interfaces needed for CLComputeKernel
cl_mem GetSizesBuffer() const { return _sizes; }
cl_mem GetOffsetsBuffer() const { return _offsets; }
cl_mem GetIndicesBuffer() const { return _indices; }
cl_mem GetWeightsBuffer() const { return _weights; }
cl_mem GetDuWeightsBuffer() const { return _duWeights; }
cl_mem GetDvWeightsBuffer() const { return _dvWeights; }
cl_mem GetDuuWeightsBuffer() const { return _duuWeights; }
cl_mem GetDuvWeightsBuffer() const { return _duvWeights; }
cl_mem GetDvvWeightsBuffer() const { return _dvvWeights; }
int GetNumStencils() const { return _numStencils; }
private:
cl_mem _sizes;
cl_mem _offsets;
cl_mem _indices;
cl_mem _weights;
cl_mem _duWeights;
cl_mem _dvWeights;
cl_mem _duuWeights;
cl_mem _duvWeights;
cl_mem _dvvWeights;
int _numStencils;
};
// ---------------------------------------------------------------------------
class CLEvaluator {
public:
typedef bool Instantiatable;
/// Generic creator template.
template <typename DEVICE_CONTEXT>
static CLEvaluator *Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
DEVICE_CONTEXT deviceContext) {
return Create(srcDesc, dstDesc, duDesc, dvDesc,
deviceContext->GetContext(),
deviceContext->GetCommandQueue());
}
static CLEvaluator * Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
cl_context clContext,
cl_command_queue clCommandQueue) {
CLEvaluator *instance = new CLEvaluator(clContext, clCommandQueue);
if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc))
return instance;
delete instance;
return NULL;
}
/// Generic creator template.
template <typename DEVICE_CONTEXT>
static CLEvaluator *Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
DEVICE_CONTEXT deviceContext) {
return Create(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
deviceContext->GetContext(),
deviceContext->GetCommandQueue());
}
static CLEvaluator * Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
cl_context clContext,
cl_command_queue clCommandQueue) {
CLEvaluator *instance = new CLEvaluator(clContext, clCommandQueue);
if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc))
return instance;
delete instance;
return NULL;
}
/// Constructor.
CLEvaluator(cl_context context, cl_command_queue queue);
/// Destructor.
~CLEvaluator();
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static stencil function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning the
/// cl_mem object for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning the
/// cl_mem object for results to be written
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename STENCIL_TABLE, typename DEVICE_CONTEXT>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
instance = Create(srcDesc, dstDesc,
BufferDescriptor(),
BufferDescriptor(),
deviceContext);
if (instance) {
bool r = instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
stencilTable,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic static stencil function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning the
/// cl_mem object for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning the
/// cl_mem object for results to be written
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning the
/// cl_mem object for du results to be written
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning the
/// cl_mem object for dv results to be written
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename STENCIL_TABLE, typename DEVICE_CONTEXT>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
stencilTable,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
instance = Create(srcDesc, dstDesc, duDesc, dvDesc,
deviceContext);
if (instance) {
bool r = instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
stencilTable,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic static stencil function. This function has a same
/// signature as other device kernels have so that it can be called
/// transparently from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning the
/// cl_mem object for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning the
/// cl_mem object for results to be written
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning the
/// cl_mem object for du results to be written
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning the
/// cl_mem object for dv results to be written
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCLBuffer() method returning the
/// cl_mem object for du results to be written
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCLBuffer() method returning the
/// cl_mem object for du results to be written
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCLBuffer() method returning the
/// cl_mem object for dv results to be written
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename STENCIL_TABLE, typename DEVICE_CONTEXT>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
STENCIL_TABLE const *stencilTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
duuBuffer, duuDesc,
duvBuffer, duvDesc,
dvvBuffer, dvvDesc,
stencilTable,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
instance = Create(srcDesc, dstDesc,
duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
deviceContext);
if (instance) {
bool r = instance->EvalStencils(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
duuBuffer, duuDesc,
duvBuffer, duvDesc,
dvvBuffer, dvvDesc,
stencilTable,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic stencil function.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning the
/// cl_mem object for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning the
/// cl_mem object for results to be written
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
stencilTable->GetIndicesBuffer(),
stencilTable->GetWeightsBuffer(),
0,
stencilTable->GetNumStencils(),
numStartEvents, startEvents, endEvent);
}
/// \brief Generic stencil function.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning the
/// cl_mem object for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning the
/// cl_mem object for results to be written
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning the
/// cl_mem object for du results to be written
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning the
/// cl_mem object for dv results to be written
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
stencilTable->GetIndicesBuffer(),
stencilTable->GetWeightsBuffer(),
stencilTable->GetDuWeightsBuffer(),
stencilTable->GetDvWeightsBuffer(),
0,
stencilTable->GetNumStencils(),
numStartEvents, startEvents, endEvent);
}
/// \brief Generic stencil function.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning the
/// cl_mem object for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning the
/// cl_mem object for results to be written
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning the
/// cl_mem object for du results to be written
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning the
/// cl_mem object for dv results to be written
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCLBuffer() method returning the
/// cl_mem object for du results to be written
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCLBuffer() method returning the
/// cl_mem object for du results to be written
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCLBuffer() method returning the
/// cl_mem object for dv results to be written
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param stencilTable stencil table to be applied. The table must have
/// SSBO interfaces.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
STENCIL_TABLE const *stencilTable,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
duuBuffer->BindCLBuffer(_clCommandQueue), duuDesc,
duvBuffer->BindCLBuffer(_clCommandQueue), duvDesc,
dvvBuffer->BindCLBuffer(_clCommandQueue), dvvDesc,
stencilTable->GetSizesBuffer(),
stencilTable->GetOffsetsBuffer(),
stencilTable->GetIndicesBuffer(),
stencilTable->GetWeightsBuffer(),
stencilTable->GetDuWeightsBuffer(),
stencilTable->GetDvWeightsBuffer(),
stencilTable->GetDuuWeightsBuffer(),
stencilTable->GetDuvWeightsBuffer(),
stencilTable->GetDvvWeightsBuffer(),
0,
stencilTable->GetNumStencils(),
numStartEvents, startEvents, endEvent);
}
/// Dispatch the CL compute kernel asynchronously.
/// returns false if the kernel hasn't been compiled yet.
bool EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
cl_mem dst, BufferDescriptor const &dstDesc,
cl_mem sizes,
cl_mem offsets,
cl_mem indices,
cl_mem weights,
int start,
int end,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const;
/// \brief Dispatch the CL compute kernel asynchronously.
/// returns false if the kernel hasn't been compiled yet.
///
/// @param src CL buffer of input primvar source data
///
/// @param srcDesc vertex buffer descriptor for the srcBuffer
///
/// @param dst CL buffer of output primvar destination data
///
/// @param dstDesc vertex buffer descriptor for the dstBuffer
///
/// @param du CL buffer of output derivative wrt u
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dv CL buffer of output derivative wrt v
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param sizes CL buffer of the sizes in the stencil table
///
/// @param offsets CL buffer of the offsets in the stencil table
///
/// @param indices CL buffer of the indices in the stencil table
///
/// @param weights CL buffer of the weights in the stencil table
///
/// @param duWeights CL buffer of the du weights in the stencil table
///
/// @param dvWeights CL buffer of the dv weights in the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
bool EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
cl_mem dst, BufferDescriptor const &dstDesc,
cl_mem du, BufferDescriptor const &duDesc,
cl_mem dv, BufferDescriptor const &dvDesc,
cl_mem sizes,
cl_mem offsets,
cl_mem indices,
cl_mem weights,
cl_mem duWeights,
cl_mem dvWeights,
int start,
int end,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const;
/// \brief Dispatch the CL compute kernel asynchronously.
/// returns false if the kernel hasn't been compiled yet.
///
/// @param src CL buffer of input primvar source data
///
/// @param srcDesc vertex buffer descriptor for the srcBuffer
///
/// @param dst CL buffer of output primvar destination data
///
/// @param dstDesc vertex buffer descriptor for the dstBuffer
///
/// @param du CL buffer of output derivative wrt u
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dv CL buffer of output derivative wrt v
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duu CL buffer of output 2nd derivative wrt u
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duv CL buffer of output 2nd derivative wrt u and v
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvv CL buffer of output 2nd derivative wrt v
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param sizes CL buffer of the sizes in the stencil table
///
/// @param offsets CL buffer of the offsets in the stencil table
///
/// @param indices CL buffer of the indices in the stencil table
///
/// @param weights CL buffer of the weights in the stencil table
///
/// @param duWeights CL buffer of the du weights in the stencil table
///
/// @param dvWeights CL buffer of the dv weights in the stencil table
///
/// @param duuWeights CL buffer of the duu weights in the stencil table
///
/// @param duvWeights CL buffer of the duv weights in the stencil table
///
/// @param dvvWeights CL buffer of the dvv weights in the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
bool EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
cl_mem dst, BufferDescriptor const &dstDesc,
cl_mem du, BufferDescriptor const &duDesc,
cl_mem dv, BufferDescriptor const &dvDesc,
cl_mem duu, BufferDescriptor const &duuDesc,
cl_mem duv, BufferDescriptor const &duvDesc,
cl_mem dvv, BufferDescriptor const &dvvDesc,
cl_mem sizes,
cl_mem offsets,
cl_mem indices,
cl_mem weights,
cl_mem duWeights,
cl_mem dvWeights,
cl_mem duuWeights,
cl_mem duvWeights,
cl_mem dvvWeights,
int start,
int end,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const;
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc,
BufferDescriptor(),
BufferDescriptor(),
deviceContext);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
duuBuffer, duuDesc,
duvBuffer, duvDesc,
dvvBuffer, dvvDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc,
duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
deviceContext);
if (instance) {
bool r = instance->EvalPatches(srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
duuBuffer, duuDesc,
duvBuffer, duvDesc,
dvvBuffer, dvvDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
0, BufferDescriptor(),
0, BufferDescriptor(),
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer(),
numStartEvents, startEvents, endEvent);
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer(),
numStartEvents, startEvents, endEvent);
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
duuBuffer->BindCLBuffer(_clCommandQueue), duuDesc,
duvBuffer->BindCLBuffer(_clCommandQueue), duvDesc,
dvvBuffer->BindCLBuffer(_clCommandQueue), dvvDesc,
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer(),
numStartEvents, startEvents, endEvent);
}
bool EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
cl_mem dst, BufferDescriptor const &dstDesc,
cl_mem du, BufferDescriptor const &duDesc,
cl_mem dv, BufferDescriptor const &dvDesc,
int numPatchCoords,
cl_mem patchCoordsBuffer,
cl_mem patchArrayBuffer,
cl_mem patchIndexBuffer,
cl_mem patchParamsBuffer,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const;
bool EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
cl_mem dst, BufferDescriptor const &dstDesc,
cl_mem du, BufferDescriptor const &duDesc,
cl_mem dv, BufferDescriptor const &dvDesc,
cl_mem duu, BufferDescriptor const &duuDesc,
cl_mem duv, BufferDescriptor const &duvDesc,
cl_mem dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
cl_mem patchCoordsBuffer,
cl_mem patchArrayBuffer,
cl_mem patchIndexBuffer,
cl_mem patchParamsBuffer,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const;
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalPatchesVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc,
BufferDescriptor(),
BufferDescriptor(),
deviceContext);
if (instance) {
bool r = instance->EvalPatchesVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
0, BufferDescriptor(),
0, BufferDescriptor(),
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetVaryingPatchArrayBuffer(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer(),
numStartEvents, startEvents, endEvent);
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalPatchesVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc,
duDesc, dvDesc,
deviceContext);
if (instance) {
bool r = instance->EvalPatchesVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetVaryingPatchArrayBuffer(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer(),
numStartEvents, startEvents, endEvent);
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalPatchesVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
duuBuffer, duuDesc,
duvBuffer, duvDesc,
dvvBuffer, dvvDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc,
duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
deviceContext);
if (instance) {
bool r = instance->EvalPatchesVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
duuBuffer, duuDesc,
duvBuffer, duvDesc,
dvvBuffer, dvvDesc,
numPatchCoords, patchCoords,
patchTable,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatchesVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
duuBuffer->BindCLBuffer(_clCommandQueue), duuDesc,
duvBuffer->BindCLBuffer(_clCommandQueue), duvDesc,
dvvBuffer->BindCLBuffer(_clCommandQueue), dvvDesc,
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetVaryingPatchArrayBuffer(),
patchTable->GetVaryingPatchIndexBuffer(),
patchTable->GetPatchParamBuffer(),
numStartEvents, startEvents, endEvent);
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalPatchesFaceVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable, fvarChannel,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc,
BufferDescriptor(),
BufferDescriptor(),
deviceContext);
if (instance) {
bool r = instance->EvalPatchesFaceVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
numPatchCoords, patchCoords,
patchTable, fvarChannel,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel = 0,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
0, BufferDescriptor(),
0, BufferDescriptor(),
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel),
numStartEvents, startEvents, endEvent);
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalPatchesFaceVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable, fvarChannel,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc,
duDesc, dvDesc, deviceContext);
if (instance) {
bool r = instance->EvalPatchesFaceVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
numPatchCoords, patchCoords,
patchTable, fvarChannel,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel = 0,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel),
numStartEvents, startEvents, endEvent);
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param instance cached compiled instance. Clients are supposed to
/// pre-compile an instance of this class and provide
/// to this function. If it's null the kernel still
/// compute by instantiating on-demand kernel although
/// it may cause a performance problem.
///
/// @param deviceContext client providing context class which supports
/// cL_context GetContext()
/// cl_command_queue GetCommandQueue()
/// methods.
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE,
typename DEVICE_CONTEXT>
static bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel,
CLEvaluator const *instance,
DEVICE_CONTEXT deviceContext,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) {
if (instance) {
return instance->EvalPatchesFaceVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
duuBuffer, duuDesc,
duvBuffer, duvDesc,
dvvBuffer, dvvDesc,
numPatchCoords, patchCoords,
patchTable, fvarChannel,
numStartEvents, startEvents, endEvent);
} else {
// Create an instance on demand (slow)
(void)deviceContext; // unused
instance = Create(srcDesc, dstDesc,
duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
deviceContext);
if (instance) {
bool r = instance->EvalPatchesFaceVarying(
srcBuffer, srcDesc,
dstBuffer, dstDesc,
duBuffer, duDesc,
dvBuffer, dvDesc,
duuBuffer, duuDesc,
duvBuffer, duvDesc,
dvvBuffer, dvvDesc,
numPatchCoords, patchCoords,
patchTable, fvarChannel,
numStartEvents, startEvents, endEvent);
delete instance;
return r;
}
return false;
}
}
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCLBuffer() method returning a CL
/// buffer object of source data
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output buffer derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output buffer derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param duuBuffer Output buffer 2nd derivative wrt u
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duuDesc vertex buffer descriptor for the duuBuffer
///
/// @param duvBuffer Output buffer 2nd derivative wrt u and v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param duvDesc vertex buffer descriptor for the duvBuffer
///
/// @param dvvBuffer Output buffer 2nd derivative wrt v
/// must have BindCLBuffer() method returning a CL
/// buffer object of destination data
///
/// @param dvvDesc vertex buffer descriptor for the dvvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
/// must have BindCLBuffer() method returning an
/// array of PatchCoord struct.
///
/// @param patchTable CLPatchTable or equivalent
///
/// @param fvarChannel face-varying channel
///
/// @param numStartEvents the number of events in the array pointed to by
/// startEvents.
///
/// @param startEvents points to an array of cl_event which will determine
/// when it is safe for the OpenCL device to begin work
/// or NULL if it can begin immediately.
///
/// @param endEvent pointer to a cl_event which will receive a copy of
/// the cl_event which indicates when all work for this
/// call has completed. This cl_event has an incremented
/// reference count and should be released via
/// clReleaseEvent(). NULL if not required.
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
bool EvalPatchesFaceVarying(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
DST_BUFFER *duuBuffer, BufferDescriptor const &duuDesc,
DST_BUFFER *duvBuffer, BufferDescriptor const &duvDesc,
DST_BUFFER *dvvBuffer, BufferDescriptor const &dvvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
int fvarChannel = 0,
unsigned int numStartEvents=0,
const cl_event* startEvents=NULL,
cl_event* endEvent=NULL) const {
return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
duuBuffer->BindCLBuffer(_clCommandQueue), duuDesc,
duvBuffer->BindCLBuffer(_clCommandQueue), duvDesc,
dvvBuffer->BindCLBuffer(_clCommandQueue), dvvDesc,
numPatchCoords,
patchCoords->BindCLBuffer(_clCommandQueue),
patchTable->GetFVarPatchArrayBuffer(fvarChannel),
patchTable->GetFVarPatchIndexBuffer(fvarChannel),
patchTable->GetFVarPatchParamBuffer(fvarChannel),
numStartEvents, startEvents, endEvent);
}
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// Configure OpenCL kernel.
/// Returns false if it fails to compile the kernel.
bool Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc = BufferDescriptor(),
BufferDescriptor const &dvDesc = BufferDescriptor(),
BufferDescriptor const &duuDesc = BufferDescriptor(),
BufferDescriptor const &duvDesc = BufferDescriptor(),
BufferDescriptor const &dvvDesc = BufferDescriptor());
/// Wait the OpenCL kernels finish.
template <typename DEVICE_CONTEXT>
static void Synchronize(DEVICE_CONTEXT deviceContext) {
Synchronize(deviceContext->GetCommandQueue());
}
static void Synchronize(cl_command_queue queue);
private:
cl_context _clContext;
cl_command_queue _clCommandQueue;
cl_program _program;
cl_kernel _stencilKernel;
cl_kernel _stencilDerivKernel;
cl_kernel _patchKernel;
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV3_OSD_CL_EVALUATOR_H