mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2024-12-13 20:40:09 +00:00
fe38ad8cda
Now that Far::LimitStencilTable and Far::PatchTable support evaluation of 1st and 2nd derivatives the Osd Evaluator API for evaluating stencils and patches has been updated to match.
386 lines
14 KiB
C++
386 lines
14 KiB
C++
//
|
|
// Copyright 2015 Pixar
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
|
// with the following modification; you may not use this file except in
|
|
// compliance with the Apache License and the following modification to it:
|
|
// Section 6. Trademarks. is deleted and replaced with:
|
|
//
|
|
// 6. Trademarks. This License does not grant permission to use the trade
|
|
// names, trademarks, service marks, or product names of the Licensor
|
|
// and its affiliates, except as required to comply with Section 4(c) of
|
|
// the License and to reproduce the content of the NOTICE file.
|
|
//
|
|
// You may obtain a copy of the Apache License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the Apache License with the above modification is
|
|
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the Apache License for the specific
|
|
// language governing permissions and limitations under the Apache License.
|
|
//
|
|
|
|
#include "../osd/cudaEvaluator.h"
|
|
|
|
#include <cuda_runtime.h>
|
|
#include <vector>
|
|
|
|
#include "../far/stencilTable.h"
|
|
#include "../osd/types.h"
|
|
|
|
extern "C" {
|
|
void CudaEvalStencils(const float *src,
|
|
float *dst,
|
|
int length,
|
|
int srcStride,
|
|
int dstStride,
|
|
const int * sizes,
|
|
const int * offsets,
|
|
const int * indices,
|
|
const float * weights,
|
|
int start,
|
|
int end);
|
|
|
|
void CudaEvalPatches(
|
|
const float *src, float *dst,
|
|
int length, int srcStride, int dstStride,
|
|
int numPatchCoords,
|
|
const void *patchCoords,
|
|
const void *patchArrays,
|
|
const int *patchIndices,
|
|
const void *patchParams);
|
|
|
|
void CudaEvalPatchesWithDerivatives(
|
|
const float *src, float *dst,
|
|
float *du, float *dv,
|
|
float *duu, float *duv, float *dvv,
|
|
int length, int srcStride, int dstStride,
|
|
int duStride, int dvStride,
|
|
int duuStride, int duvStride, int dvvStride,
|
|
int numPatchCoords,
|
|
const void *patchCoords,
|
|
const void *patchArrays,
|
|
const int *patchIndices,
|
|
const void *patchParams);
|
|
|
|
}
|
|
|
|
namespace OpenSubdiv {
|
|
namespace OPENSUBDIV_VERSION {
|
|
|
|
namespace Osd {
|
|
|
|
template <class T> void *
|
|
createCudaBuffer(std::vector<T> const & src) {
|
|
if (src.empty()) {
|
|
return NULL;
|
|
}
|
|
|
|
void * devicePtr = 0;
|
|
|
|
size_t size = src.size()*sizeof(T);
|
|
|
|
cudaError_t err = cudaMalloc(&devicePtr, size);
|
|
if (err != cudaSuccess) {
|
|
return devicePtr;
|
|
}
|
|
|
|
err = cudaMemcpy(devicePtr, &src.at(0), size, cudaMemcpyHostToDevice);
|
|
if (err != cudaSuccess) {
|
|
cudaFree(devicePtr);
|
|
return 0;
|
|
}
|
|
return devicePtr;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
CudaStencilTable::CudaStencilTable(Far::StencilTable const *stencilTable) {
|
|
_numStencils = stencilTable->GetNumStencils();
|
|
if (_numStencils > 0) {
|
|
_sizes = createCudaBuffer(stencilTable->GetSizes());
|
|
_offsets = createCudaBuffer(stencilTable->GetOffsets());
|
|
_indices = createCudaBuffer(stencilTable->GetControlIndices());
|
|
_weights = createCudaBuffer(stencilTable->GetWeights());
|
|
_duWeights = _dvWeights = NULL;
|
|
_duuWeights = _duvWeights = _dvvWeights = NULL;
|
|
} else {
|
|
_sizes = _offsets = _indices = _weights = NULL;
|
|
_duWeights = _dvWeights = NULL;
|
|
_duuWeights = _duvWeights = _dvvWeights = NULL;
|
|
}
|
|
}
|
|
|
|
CudaStencilTable::CudaStencilTable(Far::LimitStencilTable const *limitStencilTable) {
|
|
_numStencils = limitStencilTable->GetNumStencils();
|
|
if (_numStencils > 0) {
|
|
_sizes = createCudaBuffer(limitStencilTable->GetSizes());
|
|
_offsets = createCudaBuffer(limitStencilTable->GetOffsets());
|
|
_indices = createCudaBuffer(limitStencilTable->GetControlIndices());
|
|
_weights = createCudaBuffer(limitStencilTable->GetWeights());
|
|
_duWeights = createCudaBuffer(limitStencilTable->GetDuWeights());
|
|
_dvWeights = createCudaBuffer(limitStencilTable->GetDvWeights());
|
|
_duuWeights = createCudaBuffer(limitStencilTable->GetDuuWeights());
|
|
_duvWeights = createCudaBuffer(limitStencilTable->GetDuvWeights());
|
|
_dvvWeights = createCudaBuffer(limitStencilTable->GetDvvWeights());
|
|
} else {
|
|
_sizes = _offsets = _indices = _weights = NULL;
|
|
_duWeights = _dvWeights = NULL;
|
|
_duuWeights = _duvWeights = _dvvWeights = NULL;
|
|
}
|
|
}
|
|
|
|
CudaStencilTable::~CudaStencilTable() {
|
|
if (_sizes) cudaFree(_sizes);
|
|
if (_offsets) cudaFree(_offsets);
|
|
if (_indices) cudaFree(_indices);
|
|
if (_weights) cudaFree(_weights);
|
|
if (_duWeights) cudaFree(_duWeights);
|
|
if (_dvWeights) cudaFree(_dvWeights);
|
|
if (_duuWeights) cudaFree(_duuWeights);
|
|
if (_duvWeights) cudaFree(_duvWeights);
|
|
if (_dvvWeights) cudaFree(_dvvWeights);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/* static */
|
|
bool
|
|
CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
|
float *dst, BufferDescriptor const &dstDesc,
|
|
const int * sizes,
|
|
const int * offsets,
|
|
const int * indices,
|
|
const float * weights,
|
|
int start,
|
|
int end) {
|
|
if (dst == NULL) return false;
|
|
|
|
CudaEvalStencils(src + srcDesc.offset,
|
|
dst + dstDesc.offset,
|
|
srcDesc.length,
|
|
srcDesc.stride,
|
|
dstDesc.stride,
|
|
sizes, offsets, indices, weights,
|
|
start, end);
|
|
return true;
|
|
}
|
|
|
|
/* static */
|
|
bool
|
|
CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
|
float *dst, BufferDescriptor const &dstDesc,
|
|
float *du, BufferDescriptor const &duDesc,
|
|
float *dv, BufferDescriptor const &dvDesc,
|
|
const int * sizes,
|
|
const int * offsets,
|
|
const int * indices,
|
|
const float * weights,
|
|
const float * duWeights,
|
|
const float * dvWeights,
|
|
int start,
|
|
int end) {
|
|
// PERFORMANCE: need to combine 3 launches together
|
|
if (dst) {
|
|
CudaEvalStencils(src + srcDesc.offset,
|
|
dst + dstDesc.offset,
|
|
srcDesc.length,
|
|
srcDesc.stride,
|
|
dstDesc.stride,
|
|
sizes, offsets, indices, weights,
|
|
start, end);
|
|
}
|
|
if (du) {
|
|
CudaEvalStencils(src + srcDesc.offset,
|
|
du + duDesc.offset,
|
|
srcDesc.length,
|
|
srcDesc.stride,
|
|
duDesc.stride,
|
|
sizes, offsets, indices, duWeights,
|
|
start, end);
|
|
}
|
|
if (dv) {
|
|
CudaEvalStencils(src + srcDesc.offset,
|
|
dv + dvDesc.offset,
|
|
srcDesc.length,
|
|
srcDesc.stride,
|
|
dvDesc.stride,
|
|
sizes, offsets, indices, dvWeights,
|
|
start, end);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* static */
|
|
bool
|
|
CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
|
float *dst, BufferDescriptor const &dstDesc,
|
|
float *du, BufferDescriptor const &duDesc,
|
|
float *dv, BufferDescriptor const &dvDesc,
|
|
float *duu, BufferDescriptor const &duuDesc,
|
|
float *duv, BufferDescriptor const &duvDesc,
|
|
float *dvv, BufferDescriptor const &dvvDesc,
|
|
const int * sizes,
|
|
const int * offsets,
|
|
const int * indices,
|
|
const float * weights,
|
|
const float * duWeights,
|
|
const float * dvWeights,
|
|
const float * duuWeights,
|
|
const float * duvWeights,
|
|
const float * dvvWeights,
|
|
int start,
|
|
int end) {
|
|
// PERFORMANCE: need to combine 3 launches together
|
|
if (dst) {
|
|
CudaEvalStencils(src + srcDesc.offset,
|
|
dst + dstDesc.offset,
|
|
srcDesc.length,
|
|
srcDesc.stride,
|
|
dstDesc.stride,
|
|
sizes, offsets, indices, weights,
|
|
start, end);
|
|
}
|
|
if (du) {
|
|
CudaEvalStencils(src + srcDesc.offset,
|
|
du + duDesc.offset,
|
|
srcDesc.length,
|
|
srcDesc.stride,
|
|
duDesc.stride,
|
|
sizes, offsets, indices, duWeights,
|
|
start, end);
|
|
}
|
|
if (dv) {
|
|
CudaEvalStencils(src + srcDesc.offset,
|
|
dv + dvDesc.offset,
|
|
srcDesc.length,
|
|
srcDesc.stride,
|
|
dvDesc.stride,
|
|
sizes, offsets, indices, dvWeights,
|
|
start, end);
|
|
}
|
|
if (duu) {
|
|
CudaEvalStencils(src + srcDesc.offset,
|
|
duu + duuDesc.offset,
|
|
srcDesc.length,
|
|
srcDesc.stride,
|
|
duuDesc.stride,
|
|
sizes, offsets, indices, duuWeights,
|
|
start, end);
|
|
}
|
|
if (duv) {
|
|
CudaEvalStencils(src + srcDesc.offset,
|
|
duv + duvDesc.offset,
|
|
srcDesc.length,
|
|
srcDesc.stride,
|
|
duvDesc.stride,
|
|
sizes, offsets, indices, duvWeights,
|
|
start, end);
|
|
}
|
|
if (dvv) {
|
|
CudaEvalStencils(src + srcDesc.offset,
|
|
dvv + dvvDesc.offset,
|
|
srcDesc.length,
|
|
srcDesc.stride,
|
|
dvvDesc.stride,
|
|
sizes, offsets, indices, dvvWeights,
|
|
start, end);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* static */
|
|
bool
|
|
CudaEvaluator::EvalPatches(const float *src,
|
|
BufferDescriptor const &srcDesc,
|
|
float *dst,
|
|
BufferDescriptor const &dstDesc,
|
|
int numPatchCoords,
|
|
const PatchCoord *patchCoords,
|
|
const PatchArray *patchArrays,
|
|
const int *patchIndices,
|
|
const PatchParam *patchParams) {
|
|
if (src) src += srcDesc.offset;
|
|
if (dst) dst += dstDesc.offset;
|
|
|
|
CudaEvalPatches(src, dst,
|
|
srcDesc.length, srcDesc.stride, dstDesc.stride,
|
|
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
|
|
|
|
return true;
|
|
}
|
|
|
|
/* static */
|
|
bool
|
|
CudaEvaluator::EvalPatches(
|
|
const float *src, BufferDescriptor const &srcDesc,
|
|
float *dst, BufferDescriptor const &dstDesc,
|
|
float *du, BufferDescriptor const &duDesc,
|
|
float *dv, BufferDescriptor const &dvDesc,
|
|
int numPatchCoords,
|
|
const PatchCoord *patchCoords,
|
|
const PatchArray *patchArrays,
|
|
const int *patchIndices,
|
|
const PatchParam *patchParams) {
|
|
|
|
if (src) src += srcDesc.offset;
|
|
if (dst) dst += dstDesc.offset;
|
|
if (du) du += duDesc.offset;
|
|
if (dv) dv += dvDesc.offset;
|
|
|
|
CudaEvalPatchesWithDerivatives(
|
|
src, dst, du, dv, NULL, NULL, NULL,
|
|
srcDesc.length, srcDesc.stride, dstDesc.stride,
|
|
duDesc.stride, dvDesc.stride, 0, 0, 0,
|
|
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
|
|
return true;
|
|
}
|
|
|
|
/* static */
|
|
bool
|
|
CudaEvaluator::EvalPatches(
|
|
const float *src, BufferDescriptor const &srcDesc,
|
|
float *dst, BufferDescriptor const &dstDesc,
|
|
float *du, BufferDescriptor const &duDesc,
|
|
float *dv, BufferDescriptor const &dvDesc,
|
|
float *duu, BufferDescriptor const &duuDesc,
|
|
float *duv, BufferDescriptor const &duvDesc,
|
|
float *dvv, BufferDescriptor const &dvvDesc,
|
|
int numPatchCoords,
|
|
const PatchCoord *patchCoords,
|
|
const PatchArray *patchArrays,
|
|
const int *patchIndices,
|
|
const PatchParam *patchParams) {
|
|
|
|
if (src) src += srcDesc.offset;
|
|
if (dst) dst += dstDesc.offset;
|
|
if (du) du += duDesc.offset;
|
|
if (dv) dv += dvDesc.offset;
|
|
if (duu) duu += duuDesc.offset;
|
|
if (duv) duv += duvDesc.offset;
|
|
if (dvv) dvv += dvvDesc.offset;
|
|
|
|
CudaEvalPatchesWithDerivatives(
|
|
src, dst, du, dv, duu, duv, dvv,
|
|
srcDesc.length, srcDesc.stride, dstDesc.stride,
|
|
duDesc.stride, dvDesc.stride,
|
|
duuDesc.stride, duvDesc.stride, dvvDesc.stride,
|
|
numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams);
|
|
return true;
|
|
}
|
|
|
|
|
|
|
|
/* static */
|
|
void
|
|
CudaEvaluator::Synchronize(void * /*deviceContext*/) {
|
|
cudaThreadSynchronize();
|
|
}
|
|
|
|
} // end namespace Osd
|
|
|
|
} // end namespace OPENSUBDIV_VERSION
|
|
} // end namespace OpenSubdiv
|