OpenSubdiv/opensubdiv/osd/cudaEvaluator.cpp
David G Yu fe38ad8cda Added missing Osd Evaluator methods for deriv eval
Now that Far::LimitStencilTable and Far::PatchTable
support evaluation of 1st and 2nd derivatives the
Osd Evaluator API for evaluating stencils and patches
has been updated to match.
2017-01-27 17:14:18 -08:00

386 lines
14 KiB
C++

//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/cudaEvaluator.h"
#include <cuda_runtime.h>
#include <vector>
#include "../far/stencilTable.h"
#include "../osd/types.h"
extern "C" {
void CudaEvalStencils(const float *src,
float *dst,
int length,
int srcStride,
int dstStride,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end);
void CudaEvalPatches(
const float *src, float *dst,
int length, int srcStride, int dstStride,
int numPatchCoords,
const void *patchCoords,
const void *patchArrays,
const int *patchIndices,
const void *patchParams);
void CudaEvalPatchesWithDerivatives(
const float *src, float *dst,
float *du, float *dv,
float *duu, float *duv, float *dvv,
int length, int srcStride, int dstStride,
int duStride, int dvStride,
int duuStride, int duvStride, int dvvStride,
int numPatchCoords,
const void *patchCoords,
const void *patchArrays,
const int *patchIndices,
const void *patchParams);
}
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
template <class T> void *
createCudaBuffer(std::vector<T> const & src) {
if (src.empty()) {
return NULL;
}
void * devicePtr = 0;
size_t size = src.size()*sizeof(T);
cudaError_t err = cudaMalloc(&devicePtr, size);
if (err != cudaSuccess) {
return devicePtr;
}
err = cudaMemcpy(devicePtr, &src.at(0), size, cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
cudaFree(devicePtr);
return 0;
}
return devicePtr;
}
// ----------------------------------------------------------------------------
CudaStencilTable::CudaStencilTable(Far::StencilTable const *stencilTable) {
_numStencils = stencilTable->GetNumStencils();
if (_numStencils > 0) {
_sizes = createCudaBuffer(stencilTable->GetSizes());
_offsets = createCudaBuffer(stencilTable->GetOffsets());
_indices = createCudaBuffer(stencilTable->GetControlIndices());
_weights = createCudaBuffer(stencilTable->GetWeights());
_duWeights = _dvWeights = NULL;
_duuWeights = _duvWeights = _dvvWeights = NULL;
} else {
_sizes = _offsets = _indices = _weights = NULL;
_duWeights = _dvWeights = NULL;
_duuWeights = _duvWeights = _dvvWeights = NULL;
}
}
CudaStencilTable::CudaStencilTable(Far::LimitStencilTable const *limitStencilTable) {
_numStencils = limitStencilTable->GetNumStencils();
if (_numStencils > 0) {
_sizes = createCudaBuffer(limitStencilTable->GetSizes());
_offsets = createCudaBuffer(limitStencilTable->GetOffsets());
_indices = createCudaBuffer(limitStencilTable->GetControlIndices());
_weights = createCudaBuffer(limitStencilTable->GetWeights());
_duWeights = createCudaBuffer(limitStencilTable->GetDuWeights());
_dvWeights = createCudaBuffer(limitStencilTable->GetDvWeights());
_duuWeights = createCudaBuffer(limitStencilTable->GetDuuWeights());
_duvWeights = createCudaBuffer(limitStencilTable->GetDuvWeights());
_dvvWeights = createCudaBuffer(limitStencilTable->GetDvvWeights());
} else {
_sizes = _offsets = _indices = _weights = NULL;
_duWeights = _dvWeights = NULL;
_duuWeights = _duvWeights = _dvvWeights = NULL;
}
}
CudaStencilTable::~CudaStencilTable() {
if (_sizes) cudaFree(_sizes);
if (_offsets) cudaFree(_offsets);
if (_indices) cudaFree(_indices);
if (_weights) cudaFree(_weights);
if (_duWeights) cudaFree(_duWeights);
if (_dvWeights) cudaFree(_dvWeights);
if (_duuWeights) cudaFree(_duuWeights);
if (_duvWeights) cudaFree(_duvWeights);
if (_dvvWeights) cudaFree(_dvvWeights);
}
// ---------------------------------------------------------------------------
/* static */
bool
CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start,
int end) {
if (dst == NULL) return false;
CudaEvalStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDesc.stride,
sizes, offsets, indices, weights,
start, end);
return true;
}
/* static */
bool
CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start,
int end) {
// PERFORMANCE: need to combine 3 launches together
if (dst) {
CudaEvalStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDesc.stride,
sizes, offsets, indices, weights,
start, end);
}
if (du) {
CudaEvalStencils(src + srcDesc.offset,
du + duDesc.offset,
srcDesc.length,
srcDesc.stride,
duDesc.stride,
sizes, offsets, indices, duWeights,
start, end);
}
if (dv) {
CudaEvalStencils(src + srcDesc.offset,
dv + dvDesc.offset,
srcDesc.length,
srcDesc.stride,
dvDesc.stride,
sizes, offsets, indices, dvWeights,
start, end);
}
return true;
}
/* static */
bool
CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
const float * duuWeights,
const float * duvWeights,
const float * dvvWeights,
int start,
int end) {
// PERFORMANCE: need to combine 3 launches together
if (dst) {
CudaEvalStencils(src + srcDesc.offset,
dst + dstDesc.offset,
srcDesc.length,
srcDesc.stride,
dstDesc.stride,
sizes, offsets, indices, weights,
start, end);
}
if (du) {
CudaEvalStencils(src + srcDesc.offset,
du + duDesc.offset,
srcDesc.length,
srcDesc.stride,
duDesc.stride,
sizes, offsets, indices, duWeights,
start, end);
}
if (dv) {
CudaEvalStencils(src + srcDesc.offset,
dv + dvDesc.offset,
srcDesc.length,
srcDesc.stride,
dvDesc.stride,
sizes, offsets, indices, dvWeights,
start, end);
}
if (duu) {
CudaEvalStencils(src + srcDesc.offset,
duu + duuDesc.offset,
srcDesc.length,
srcDesc.stride,
duuDesc.stride,
sizes, offsets, indices, duuWeights,
start, end);
}
if (duv) {
CudaEvalStencils(src + srcDesc.offset,
duv + duvDesc.offset,
srcDesc.length,
srcDesc.stride,
duvDesc.stride,
sizes, offsets, indices, duvWeights,
start, end);
}
if (dvv) {
CudaEvalStencils(src + srcDesc.offset,
dvv + dvvDesc.offset,
srcDesc.length,
srcDesc.stride,
dvvDesc.stride,
sizes, offsets, indices, dvvWeights,
start, end);
}
return true;
}
/* static */
bool
CudaEvaluator::EvalPatches(const float *src,
BufferDescriptor const &srcDesc,
float *dst,
BufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams) {
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
CudaEvalPatches(src, dst,
srcDesc.length, srcDesc.stride, dstDesc.stride,
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
return true;
}
/* static */
bool
CudaEvaluator::EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams) {
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
CudaEvalPatchesWithDerivatives(
src, dst, du, dv, NULL, NULL, NULL,
srcDesc.length, srcDesc.stride, dstDesc.stride,
duDesc.stride, dvDesc.stride, 0, 0, 0,
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
return true;
}
/* static */
bool
CudaEvaluator::EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
float *duu, BufferDescriptor const &duuDesc,
float *duv, BufferDescriptor const &duvDesc,
float *dvv, BufferDescriptor const &dvvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndices,
const PatchParam *patchParams) {
if (src) src += srcDesc.offset;
if (dst) dst += dstDesc.offset;
if (du) du += duDesc.offset;
if (dv) dv += dvDesc.offset;
if (duu) duu += duuDesc.offset;
if (duv) duv += duvDesc.offset;
if (dvv) dvv += dvvDesc.offset;
CudaEvalPatchesWithDerivatives(
src, dst, du, dv, duu, duv, dvv,
srcDesc.length, srcDesc.stride, dstDesc.stride,
duDesc.stride, dvDesc.stride,
duuDesc.stride, duvDesc.stride, dvvDesc.stride,
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
return true;
}
/* static */
void
CudaEvaluator::Synchronize(void * /*deviceContext*/) {
cudaThreadSynchronize();
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv