OpenSubdiv/opensubdiv/osd/clComputeController.cpp
Takahito Tejima 8da827336d Removes FarKernelBatch.
This is the first step to tease off Osd compute controller/contexts
from Far API.
Currently FarStencilTable only creates a kernelbatch for the entire range,
so we can use [0, numStencils) for all cases instead of KernelBatch.
This might not be true if we apply non-factorized level-wise stencils,
then we'll add another modular utility to serve those cases.
2015-04-10 11:08:24 -07:00

272 lines
8.6 KiB
C++

//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/clComputeController.h"
#include "../far/error.h"
#if defined(_WIN32)
#include <windows.h>
#endif
#include <algorithm>
#include <string.h>
#include <sstream>
#include <cassert>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
static const char *clSource =
#include "clKernel.gen.h"
;
// -----------------------------------------------------------------------------
static cl_kernel buildKernel(cl_program prog, const char * name) {
cl_int errNum;
cl_kernel k = clCreateKernel(prog, name, &errNum);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel '%s' (%d)\n", name, errNum);
}
return k;
}
// -----------------------------------------------------------------------------
class CLComputeController::KernelBundle :
NonCopyable<CLComputeController::KernelBundle> {
public:
bool Compile(cl_context clContext, VertexBufferDescriptor const & desc) {
cl_int errNum;
_desc = VertexBufferDescriptor(0, desc.length, desc.stride);
std::ostringstream defines;
defines << "#define OFFSET " << _desc.offset << "\n"
<< "#define LENGTH " << _desc.length << "\n"
<< "#define STRIDE " << _desc.stride << "\n";
std::string defineStr = defines.str();
const char *sources[] = { defineStr.c_str(), clSource };
_program = clCreateProgramWithSource(clContext, 2, sources, 0, &errNum);
if (errNum!=CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"clCreateProgramWithSource (%d)", errNum);
}
errNum = clBuildProgram(_program, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR, "clBuildProgram (%d) \n", errNum);
cl_int numDevices = 0;
clGetContextInfo(clContext,
CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL);
cl_device_id *devices = new cl_device_id[numDevices];
clGetContextInfo(clContext, CL_CONTEXT_DEVICES,
sizeof(cl_device_id)*numDevices, devices, NULL);
for (int i = 0; i < numDevices; ++i) {
char cBuildLog[10240];
clGetProgramBuildInfo(_program, devices[i],
CL_PROGRAM_BUILD_LOG, sizeof(cBuildLog), cBuildLog, NULL);
Far::Error(Far::FAR_RUNTIME_ERROR, cBuildLog);
}
delete[] devices;
return false;
}
// compile all cl compute kernels
_stencilsKernel = buildKernel(_program, "computeStencils");
return true;
}
cl_kernel GetStencilsKernel() const {
return _stencilsKernel;
}
struct Match {
Match(VertexBufferDescriptor const & d) : desc(d) { }
bool operator() (KernelBundle const * kernel) {
return (desc.length==kernel->_desc.length and
desc.stride==kernel->_desc.stride);
}
VertexBufferDescriptor desc;
};
private:
cl_program _program;
cl_kernel _stencilsKernel;
VertexBufferDescriptor _desc;
};
// ----------------------------------------------------------------------------
void
CLComputeController::ApplyStencilTableKernel(ComputeContext const *context) {
assert(context);
cl_int errNum;
size_t globalWorkSize = 0;
int ncvs = context->GetNumControlVertices();
if (context->HasVertexStencilTables()) {
int start = 0;
int end = context->GetNumStencilsInVertexStencilTables();
globalWorkSize = (size_t)(end - start);
KernelBundle const * bundle = getKernel(_currentBindState.vertexDesc);
cl_kernel kernel = bundle->GetStencilsKernel();
cl_mem sizes = context->GetVertexStencilTablesSizes(),
offsets = context->GetVertexStencilTablesOffsets(),
indices = context->GetVertexStencilTablesIndices(),
weights = context->GetVertexStencilTablesWeights();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &sizes);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &offsets);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &indices);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &weights);
clSetKernelArg(kernel, 5, sizeof(int), &start);
clSetKernelArg(kernel, 6, sizeof(int), &end);
clSetKernelArg(kernel, 7, sizeof(int), &_currentBindState.vertexDesc.offset);
clSetKernelArg(kernel, 8, sizeof(int), &ncvs);
errNum = clEnqueueNDRangeKernel(
_clQueue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL);
if (errNum!=CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyStencilTableKernel (%d) ", errNum);
}
}
if (context->HasVaryingStencilTables()) {
int start = 0;
int end = context->GetNumStencilsInVaryingStencilTables();
globalWorkSize = (size_t)(end - start);
KernelBundle const * bundle = getKernel(_currentBindState.varyingDesc);
cl_kernel kernel = bundle->GetStencilsKernel();
cl_mem sizes = context->GetVaryingStencilTablesSizes(),
offsets = context->GetVaryingStencilTablesOffsets(),
indices = context->GetVaryingStencilTablesIndices(),
weights = context->GetVaryingStencilTablesWeights();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.varyingBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &sizes);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &offsets);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &indices);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &weights);
clSetKernelArg(kernel, 5, sizeof(int), &start);
clSetKernelArg(kernel, 6, sizeof(int), &end);
clSetKernelArg(kernel, 7, sizeof(int), &_currentBindState.varyingDesc.offset);
clSetKernelArg(kernel, 8, sizeof(int), &ncvs);
errNum = clEnqueueNDRangeKernel(
_clQueue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL);
if (errNum!=CL_SUCCESS) {
Far::Error(Far::FAR_RUNTIME_ERROR,
"ApplyStencilTableKernel (%d)", errNum);
}
}
}
// ----------------------------------------------------------------------------
CLComputeController::KernelBundle const *
CLComputeController::getKernel(VertexBufferDescriptor const &desc) {
KernelRegistry::iterator it =
std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(),
KernelBundle::Match(desc));
if (it != _kernelRegistry.end()) {
return *it;
} else {
KernelBundle * kernelBundle = new KernelBundle();
kernelBundle->Compile(_clContext, desc);
_kernelRegistry.push_back(kernelBundle);
return kernelBundle;
}
}
// ----------------------------------------------------------------------------
CLComputeController::CLComputeController(
cl_context clContext, cl_command_queue queue) :
_clContext(clContext), _clQueue(queue) {
}
CLComputeController::~CLComputeController() {
for (KernelRegistry::iterator it = _kernelRegistry.begin();
it != _kernelRegistry.end(); ++it) {
delete *it;
}
}
// ----------------------------------------------------------------------------
void
CLComputeController::Synchronize() {
clFinish(_clQueue);
}
// -----------------------------------------------------------------------------
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv