mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2024-11-30 15:20:07 +00:00
358 lines
13 KiB
C++
358 lines
13 KiB
C++
//
|
|
// Copyright 2015 Pixar
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
|
// with the following modification; you may not use this file except in
|
|
// compliance with the Apache License and the following modification to it:
|
|
// Section 6. Trademarks. is deleted and replaced with:
|
|
//
|
|
// 6. Trademarks. This License does not grant permission to use the trade
|
|
// names, trademarks, service marks, or product names of the Licensor
|
|
// and its affiliates, except as required to comply with Section 4(c) of
|
|
// the License and to reproduce the content of the NOTICE file.
|
|
//
|
|
// You may obtain a copy of the Apache License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the Apache License with the above modification is
|
|
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the Apache License for the specific
|
|
// language governing permissions and limitations under the Apache License.
|
|
//
|
|
|
|
#include "../osd/clEvaluator.h"
|
|
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <cstdio>
|
|
|
|
#include "../osd/opencl.h"
|
|
#include "../far/error.h"
|
|
#include "../far/stencilTable.h"
|
|
|
|
namespace OpenSubdiv {
|
|
namespace OPENSUBDIV_VERSION {
|
|
|
|
namespace Osd {
|
|
|
|
static const char *clSource =
|
|
#include "clKernel.gen.h"
|
|
;
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
template <class T> cl_mem
|
|
createCLBuffer(std::vector<T> const & src, cl_context clContext) {
|
|
cl_int errNum = 0;
|
|
cl_mem devicePtr = clCreateBuffer(clContext,
|
|
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
|
|
src.size()*sizeof(T),
|
|
(void*)(&src.at(0)),
|
|
&errNum);
|
|
|
|
if (errNum != CL_SUCCESS) {
|
|
Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", errNum);
|
|
}
|
|
|
|
return devicePtr;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
CLStencilTable::CLStencilTable(Far::StencilTable const *stencilTable,
|
|
cl_context clContext) {
|
|
_numStencils = stencilTable->GetNumStencils();
|
|
|
|
if (_numStencils > 0) {
|
|
_sizes = createCLBuffer(stencilTable->GetSizes(), clContext);
|
|
_offsets = createCLBuffer(stencilTable->GetOffsets(), clContext);
|
|
_indices = createCLBuffer(stencilTable->GetControlIndices(),
|
|
clContext);
|
|
_weights = createCLBuffer(stencilTable->GetWeights(), clContext);
|
|
_duWeights = _dvWeights = NULL;
|
|
} else {
|
|
_sizes = _offsets = _indices = _weights = NULL;
|
|
_duWeights = _dvWeights = NULL;
|
|
}
|
|
}
|
|
|
|
CLStencilTable::CLStencilTable(Far::LimitStencilTable const *limitStencilTable,
|
|
cl_context clContext) {
|
|
_numStencils = limitStencilTable->GetNumStencils();
|
|
|
|
if (_numStencils > 0) {
|
|
_sizes = createCLBuffer(limitStencilTable->GetSizes(), clContext);
|
|
_offsets = createCLBuffer(limitStencilTable->GetOffsets(), clContext);
|
|
_indices = createCLBuffer(limitStencilTable->GetControlIndices(),
|
|
clContext);
|
|
_weights = createCLBuffer(limitStencilTable->GetWeights(), clContext);
|
|
_duWeights = createCLBuffer(
|
|
limitStencilTable->GetDuWeights(), clContext);
|
|
_dvWeights = createCLBuffer(
|
|
limitStencilTable->GetDvWeights(), clContext);
|
|
} else {
|
|
_sizes = _offsets = _indices = _weights = NULL;
|
|
_duWeights = _dvWeights = NULL;
|
|
}
|
|
}
|
|
|
|
CLStencilTable::~CLStencilTable() {
|
|
if (_sizes) clReleaseMemObject(_sizes);
|
|
if (_offsets) clReleaseMemObject(_offsets);
|
|
if (_indices) clReleaseMemObject(_indices);
|
|
if (_weights) clReleaseMemObject(_weights);
|
|
if (_duWeights) clReleaseMemObject(_duWeights);
|
|
if (_dvWeights) clReleaseMemObject(_dvWeights);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
CLEvaluator::CLEvaluator(cl_context context, cl_command_queue queue)
|
|
: _clContext(context), _clCommandQueue(queue),
|
|
_program(NULL), _stencilKernel(NULL), _stencilDerivKernel(NULL),
|
|
_patchKernel(NULL) {
|
|
}
|
|
|
|
CLEvaluator::~CLEvaluator() {
|
|
if (_stencilKernel) clReleaseKernel(_stencilKernel);
|
|
if (_stencilDerivKernel) clReleaseKernel(_stencilDerivKernel);
|
|
if (_patchKernel) clReleaseKernel(_patchKernel);
|
|
if (_program) clReleaseProgram(_program);
|
|
}
|
|
|
|
bool
|
|
CLEvaluator::Compile(BufferDescriptor const &srcDesc,
|
|
BufferDescriptor const &dstDesc,
|
|
BufferDescriptor const & /*duDesc*/,
|
|
BufferDescriptor const & /*dvDesc*/) {
|
|
if (srcDesc.length > dstDesc.length) {
|
|
Far::Error(Far::FAR_RUNTIME_ERROR,
|
|
"srcDesc length must be less than or equal to "
|
|
"dstDesc length.\n");
|
|
return false;
|
|
}
|
|
|
|
cl_int errNum;
|
|
|
|
std::ostringstream defines;
|
|
defines << "#define LENGTH " << srcDesc.length << "\n"
|
|
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
|
|
<< "#define DST_STRIDE " << dstDesc.stride << "\n";
|
|
std::string defineStr = defines.str();
|
|
|
|
const char *sources[] = { defineStr.c_str(), clSource };
|
|
_program = clCreateProgramWithSource(_clContext, 2, sources, 0, &errNum);
|
|
if (errNum != CL_SUCCESS) {
|
|
Far::Error(Far::FAR_RUNTIME_ERROR,
|
|
"clCreateProgramWithSource (%d)", errNum);
|
|
}
|
|
|
|
errNum = clBuildProgram(_program, 0, NULL, NULL, NULL, NULL);
|
|
if (errNum != CL_SUCCESS) {
|
|
Far::Error(Far::FAR_RUNTIME_ERROR, "clBuildProgram (%d) \n", errNum);
|
|
|
|
cl_int numDevices = 0;
|
|
clGetContextInfo(
|
|
_clContext, CL_CONTEXT_NUM_DEVICES,
|
|
sizeof(cl_uint), &numDevices, NULL);
|
|
|
|
cl_device_id *devices = new cl_device_id[numDevices];
|
|
clGetContextInfo(_clContext, CL_CONTEXT_DEVICES,
|
|
sizeof(cl_device_id)*numDevices, devices, NULL);
|
|
|
|
for (int i = 0; i < numDevices; ++i) {
|
|
char cBuildLog[10240];
|
|
clGetProgramBuildInfo(
|
|
_program, devices[i],
|
|
CL_PROGRAM_BUILD_LOG, sizeof(cBuildLog), cBuildLog, NULL);
|
|
Far::Error(Far::FAR_RUNTIME_ERROR, cBuildLog);
|
|
}
|
|
delete[] devices;
|
|
|
|
return false;
|
|
}
|
|
|
|
_stencilKernel = clCreateKernel(_program, "computeStencils", &errNum);
|
|
if (errNum != CL_SUCCESS) {
|
|
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
|
|
return false;
|
|
}
|
|
|
|
_stencilDerivKernel = clCreateKernel(_program,
|
|
"computeStencilsDerivatives", &errNum);
|
|
if (errNum != CL_SUCCESS) {
|
|
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
|
|
return false;
|
|
}
|
|
|
|
_patchKernel = clCreateKernel(_program, "computePatches", &errNum);
|
|
|
|
if (errNum != CL_SUCCESS) {
|
|
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
CLEvaluator::EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
|
|
cl_mem dst, BufferDescriptor const &dstDesc,
|
|
cl_mem sizes,
|
|
cl_mem offsets,
|
|
cl_mem indices,
|
|
cl_mem weights,
|
|
int start, int end,
|
|
unsigned int numStartEvents,
|
|
const cl_event* startEvents,
|
|
cl_event* endEvent) const {
|
|
if (end <= start) return true;
|
|
|
|
size_t globalWorkSize = (size_t)(end - start);
|
|
|
|
clSetKernelArg(_stencilKernel, 0, sizeof(cl_mem), &src);
|
|
clSetKernelArg(_stencilKernel, 1, sizeof(int), &srcDesc.offset);
|
|
clSetKernelArg(_stencilKernel, 2, sizeof(cl_mem), &dst);
|
|
clSetKernelArg(_stencilKernel, 3, sizeof(int), &dstDesc.offset);
|
|
clSetKernelArg(_stencilKernel, 4, sizeof(cl_mem), &sizes);
|
|
clSetKernelArg(_stencilKernel, 5, sizeof(cl_mem), &offsets);
|
|
clSetKernelArg(_stencilKernel, 6, sizeof(cl_mem), &indices);
|
|
clSetKernelArg(_stencilKernel, 7, sizeof(cl_mem), &weights);
|
|
clSetKernelArg(_stencilKernel, 8, sizeof(int), &start);
|
|
clSetKernelArg(_stencilKernel, 9, sizeof(int), &end);
|
|
|
|
cl_int errNum = clEnqueueNDRangeKernel(
|
|
_clCommandQueue, _stencilKernel, 1, NULL,
|
|
&globalWorkSize, NULL, numStartEvents, startEvents, endEvent);
|
|
|
|
if (errNum != CL_SUCCESS) {
|
|
Far::Error(Far::FAR_RUNTIME_ERROR,
|
|
"ApplyStencilKernel (%d) ", errNum);
|
|
return false;
|
|
}
|
|
|
|
if (endEvent == NULL)
|
|
{
|
|
clFinish(_clCommandQueue);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
CLEvaluator::EvalStencils(cl_mem src, BufferDescriptor const &srcDesc,
|
|
cl_mem dst, BufferDescriptor const &dstDesc,
|
|
cl_mem du, BufferDescriptor const &duDesc,
|
|
cl_mem dv, BufferDescriptor const &dvDesc,
|
|
cl_mem sizes,
|
|
cl_mem offsets,
|
|
cl_mem indices,
|
|
cl_mem weights,
|
|
cl_mem duWeights,
|
|
cl_mem dvWeights,
|
|
int start, int end,
|
|
unsigned int numStartEvents,
|
|
const cl_event* startEvents,
|
|
cl_event* endEvent) const {
|
|
if (end <= start) return true;
|
|
|
|
size_t globalWorkSize = (size_t)(end - start);
|
|
|
|
clSetKernelArg(_stencilDerivKernel, 0, sizeof(cl_mem), &src);
|
|
clSetKernelArg(_stencilDerivKernel, 1, sizeof(int), &srcDesc.offset);
|
|
clSetKernelArg(_stencilDerivKernel, 2, sizeof(cl_mem), &dst);
|
|
clSetKernelArg(_stencilDerivKernel, 3, sizeof(int), &dstDesc.offset);
|
|
clSetKernelArg(_stencilDerivKernel, 4, sizeof(cl_mem), &du);
|
|
clSetKernelArg(_stencilDerivKernel, 5, sizeof(int), &duDesc.offset);
|
|
clSetKernelArg(_stencilDerivKernel, 6, sizeof(int), &duDesc.stride);
|
|
clSetKernelArg(_stencilDerivKernel, 7, sizeof(cl_mem), &dv);
|
|
clSetKernelArg(_stencilDerivKernel, 8, sizeof(int), &dvDesc.offset);
|
|
clSetKernelArg(_stencilDerivKernel, 9, sizeof(int), &dvDesc.stride);
|
|
clSetKernelArg(_stencilDerivKernel, 10, sizeof(cl_mem), &sizes);
|
|
clSetKernelArg(_stencilDerivKernel, 11, sizeof(cl_mem), &offsets);
|
|
clSetKernelArg(_stencilDerivKernel, 12, sizeof(cl_mem), &indices);
|
|
clSetKernelArg(_stencilDerivKernel, 13, sizeof(cl_mem), &weights);
|
|
clSetKernelArg(_stencilDerivKernel, 14, sizeof(cl_mem), &duWeights);
|
|
clSetKernelArg(_stencilDerivKernel, 15, sizeof(cl_mem), &dvWeights);
|
|
clSetKernelArg(_stencilDerivKernel, 16, sizeof(int), &start);
|
|
clSetKernelArg(_stencilDerivKernel, 17, sizeof(int), &end);
|
|
|
|
cl_int errNum = clEnqueueNDRangeKernel(
|
|
_clCommandQueue, _stencilDerivKernel, 1, NULL,
|
|
&globalWorkSize, NULL, numStartEvents, startEvents, endEvent);
|
|
|
|
if (errNum != CL_SUCCESS) {
|
|
Far::Error(Far::FAR_RUNTIME_ERROR,
|
|
"ApplyStencilKernel (%d) ", errNum);
|
|
return false;
|
|
}
|
|
|
|
if (endEvent == NULL)
|
|
{
|
|
clFinish(_clCommandQueue);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
CLEvaluator::EvalPatches(cl_mem src, BufferDescriptor const &srcDesc,
|
|
cl_mem dst, BufferDescriptor const &dstDesc,
|
|
cl_mem du, BufferDescriptor const &duDesc,
|
|
cl_mem dv, BufferDescriptor const &dvDesc,
|
|
int numPatchCoords,
|
|
cl_mem patchCoordsBuffer,
|
|
cl_mem patchArrayBuffer,
|
|
cl_mem patchIndexBuffer,
|
|
cl_mem patchParamBuffer,
|
|
unsigned int numStartEvents,
|
|
const cl_event* startEvents,
|
|
cl_event* endEvent) const {
|
|
|
|
size_t globalWorkSize = (size_t)(numPatchCoords);
|
|
|
|
clSetKernelArg(_patchKernel, 0, sizeof(cl_mem), &src);
|
|
clSetKernelArg(_patchKernel, 1, sizeof(int), &srcDesc.offset);
|
|
clSetKernelArg(_patchKernel, 2, sizeof(cl_mem), &dst);
|
|
clSetKernelArg(_patchKernel, 3, sizeof(int), &dstDesc.offset);
|
|
clSetKernelArg(_patchKernel, 4, sizeof(cl_mem), &du);
|
|
clSetKernelArg(_patchKernel, 5, sizeof(int), &duDesc.offset);
|
|
clSetKernelArg(_patchKernel, 6, sizeof(int), &duDesc.stride);
|
|
clSetKernelArg(_patchKernel, 7, sizeof(cl_mem), &dv);
|
|
clSetKernelArg(_patchKernel, 8, sizeof(int), &dvDesc.offset);
|
|
clSetKernelArg(_patchKernel, 9, sizeof(int), &dvDesc.stride);
|
|
clSetKernelArg(_patchKernel, 10, sizeof(cl_mem), &patchCoordsBuffer);
|
|
clSetKernelArg(_patchKernel, 11, sizeof(cl_mem), &patchArrayBuffer);
|
|
clSetKernelArg(_patchKernel, 12, sizeof(cl_mem), &patchIndexBuffer);
|
|
clSetKernelArg(_patchKernel, 13, sizeof(cl_mem), &patchParamBuffer);
|
|
|
|
cl_int errNum = clEnqueueNDRangeKernel(
|
|
_clCommandQueue, _patchKernel, 1, NULL,
|
|
&globalWorkSize, NULL, numStartEvents, startEvents, endEvent);
|
|
|
|
if (errNum != CL_SUCCESS) {
|
|
Far::Error(Far::FAR_RUNTIME_ERROR,
|
|
"ApplyPatchKernel (%d) ", errNum);
|
|
return false;
|
|
}
|
|
|
|
if (endEvent == NULL)
|
|
{
|
|
clFinish(_clCommandQueue);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
|
|
/* static */
|
|
void
|
|
CLEvaluator::Synchronize(cl_command_queue clCommandQueue) {
|
|
clFinish(clCommandQueue);
|
|
}
|
|
|
|
} // end namespace Osd
|
|
|
|
} // end namespace OPENSUBDIV_VERSION
|
|
} // end namespace OpenSubdiv
|