// // Copyright 2015 Pixar // // Licensed under the Apache License, Version 2.0 (the "Apache License") // with the following modification; you may not use this file except in // compliance with the Apache License and the following modification to it: // Section 6. Trademarks. is deleted and replaced with: // // 6. Trademarks. This License does not grant permission to use the trade // names, trademarks, service marks, or product names of the Licensor // and its affiliates, except as required to comply with Section 4(c) of // the License and to reproduce the content of the NOTICE file. // // You may obtain a copy of the Apache License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the Apache License with the above modification is // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the Apache License for the specific // language governing permissions and limitations under the Apache License. // #include "../osd/cudaEvaluator.h" #include #include #include "../far/stencilTable.h" #include "../osd/types.h" extern "C" { void CudaEvalStencils(const float *src, float *dst, int length, int srcStride, int dstStride, const int * sizes, const int * offsets, const int * indices, const float * weights, int start, int end); void CudaEvalPatches( const float *src, float *dst, int length, int srcStride, int dstStride, int numPatchCoords, const void *patchCoords, const void *patchArrays, const int *patchIndices, const void *patchParams); void CudaEvalPatchesWithDerivatives( const float *src, float *dst, float *du, float *dv, float *duu, float *duv, float *dvv, int length, int srcStride, int dstStride, int duStride, int dvStride, int duuStride, int duvStride, int dvvStride, int numPatchCoords, const void *patchCoords, const void *patchArrays, const int *patchIndices, const void *patchParams); } namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Osd { template void * createCudaBuffer(std::vector const & src) { if (src.empty()) { return NULL; } void * devicePtr = 0; size_t size = src.size()*sizeof(T); cudaError_t err = cudaMalloc(&devicePtr, size); if (err != cudaSuccess) { return devicePtr; } err = cudaMemcpy(devicePtr, &src.at(0), size, cudaMemcpyHostToDevice); if (err != cudaSuccess) { cudaFree(devicePtr); return 0; } return devicePtr; } // ---------------------------------------------------------------------------- CudaStencilTable::CudaStencilTable(Far::StencilTable const *stencilTable) { _numStencils = stencilTable->GetNumStencils(); if (_numStencils > 0) { _sizes = createCudaBuffer(stencilTable->GetSizes()); _offsets = createCudaBuffer(stencilTable->GetOffsets()); _indices = createCudaBuffer(stencilTable->GetControlIndices()); _weights = createCudaBuffer(stencilTable->GetWeights()); _duWeights = _dvWeights = NULL; _duuWeights = _duvWeights = _dvvWeights = NULL; } else { _sizes = _offsets = _indices = _weights = NULL; _duWeights = _dvWeights = NULL; _duuWeights = _duvWeights = _dvvWeights = NULL; } } CudaStencilTable::CudaStencilTable(Far::LimitStencilTable const *limitStencilTable) { _numStencils = limitStencilTable->GetNumStencils(); if (_numStencils > 0) { _sizes = createCudaBuffer(limitStencilTable->GetSizes()); _offsets = createCudaBuffer(limitStencilTable->GetOffsets()); _indices = createCudaBuffer(limitStencilTable->GetControlIndices()); _weights = createCudaBuffer(limitStencilTable->GetWeights()); _duWeights = createCudaBuffer(limitStencilTable->GetDuWeights()); _dvWeights = createCudaBuffer(limitStencilTable->GetDvWeights()); _duuWeights = createCudaBuffer(limitStencilTable->GetDuuWeights()); _duvWeights = createCudaBuffer(limitStencilTable->GetDuvWeights()); _dvvWeights = createCudaBuffer(limitStencilTable->GetDvvWeights()); } else { _sizes = _offsets = _indices = _weights = NULL; _duWeights = _dvWeights = NULL; _duuWeights = _duvWeights = _dvvWeights = NULL; } } CudaStencilTable::~CudaStencilTable() { if (_sizes) cudaFree(_sizes); if (_offsets) cudaFree(_offsets); if (_indices) cudaFree(_indices); if (_weights) cudaFree(_weights); if (_duWeights) cudaFree(_duWeights); if (_dvWeights) cudaFree(_dvWeights); if (_duuWeights) cudaFree(_duuWeights); if (_duvWeights) cudaFree(_duvWeights); if (_dvvWeights) cudaFree(_dvvWeights); } // --------------------------------------------------------------------------- /* static */ bool CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc, float *dst, BufferDescriptor const &dstDesc, const int * sizes, const int * offsets, const int * indices, const float * weights, int start, int end) { if (dst == NULL) return false; CudaEvalStencils(src + srcDesc.offset, dst + dstDesc.offset, srcDesc.length, srcDesc.stride, dstDesc.stride, sizes, offsets, indices, weights, start, end); return true; } /* static */ bool CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc, float *dst, BufferDescriptor const &dstDesc, float *du, BufferDescriptor const &duDesc, float *dv, BufferDescriptor const &dvDesc, const int * sizes, const int * offsets, const int * indices, const float * weights, const float * duWeights, const float * dvWeights, int start, int end) { // PERFORMANCE: need to combine 3 launches together if (dst) { CudaEvalStencils(src + srcDesc.offset, dst + dstDesc.offset, srcDesc.length, srcDesc.stride, dstDesc.stride, sizes, offsets, indices, weights, start, end); } if (du) { CudaEvalStencils(src + srcDesc.offset, du + duDesc.offset, srcDesc.length, srcDesc.stride, duDesc.stride, sizes, offsets, indices, duWeights, start, end); } if (dv) { CudaEvalStencils(src + srcDesc.offset, dv + dvDesc.offset, srcDesc.length, srcDesc.stride, dvDesc.stride, sizes, offsets, indices, dvWeights, start, end); } return true; } /* static */ bool CudaEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc, float *dst, BufferDescriptor const &dstDesc, float *du, BufferDescriptor const &duDesc, float *dv, BufferDescriptor const &dvDesc, float *duu, BufferDescriptor const &duuDesc, float *duv, BufferDescriptor const &duvDesc, float *dvv, BufferDescriptor const &dvvDesc, const int * sizes, const int * offsets, const int * indices, const float * weights, const float * duWeights, const float * dvWeights, const float * duuWeights, const float * duvWeights, const float * dvvWeights, int start, int end) { // PERFORMANCE: need to combine 3 launches together if (dst) { CudaEvalStencils(src + srcDesc.offset, dst + dstDesc.offset, srcDesc.length, srcDesc.stride, dstDesc.stride, sizes, offsets, indices, weights, start, end); } if (du) { CudaEvalStencils(src + srcDesc.offset, du + duDesc.offset, srcDesc.length, srcDesc.stride, duDesc.stride, sizes, offsets, indices, duWeights, start, end); } if (dv) { CudaEvalStencils(src + srcDesc.offset, dv + dvDesc.offset, srcDesc.length, srcDesc.stride, dvDesc.stride, sizes, offsets, indices, dvWeights, start, end); } if (duu) { CudaEvalStencils(src + srcDesc.offset, duu + duuDesc.offset, srcDesc.length, srcDesc.stride, duuDesc.stride, sizes, offsets, indices, duuWeights, start, end); } if (duv) { CudaEvalStencils(src + srcDesc.offset, duv + duvDesc.offset, srcDesc.length, srcDesc.stride, duvDesc.stride, sizes, offsets, indices, duvWeights, start, end); } if (dvv) { CudaEvalStencils(src + srcDesc.offset, dvv + dvvDesc.offset, srcDesc.length, srcDesc.stride, dvvDesc.stride, sizes, offsets, indices, dvvWeights, start, end); } return true; } /* static */ bool CudaEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc, float *dst, BufferDescriptor const &dstDesc, int numPatchCoords, const PatchCoord *patchCoords, const PatchArray *patchArrays, const int *patchIndices, const PatchParam *patchParams) { if (src) src += srcDesc.offset; if (dst) dst += dstDesc.offset; CudaEvalPatches(src, dst, srcDesc.length, srcDesc.stride, dstDesc.stride, numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams); return true; } /* static */ bool CudaEvaluator::EvalPatches( const float *src, BufferDescriptor const &srcDesc, float *dst, BufferDescriptor const &dstDesc, float *du, BufferDescriptor const &duDesc, float *dv, BufferDescriptor const &dvDesc, int numPatchCoords, const PatchCoord *patchCoords, const PatchArray *patchArrays, const int *patchIndices, const PatchParam *patchParams) { if (src) src += srcDesc.offset; if (dst) dst += dstDesc.offset; if (du) du += duDesc.offset; if (dv) dv += dvDesc.offset; CudaEvalPatchesWithDerivatives( src, dst, du, dv, NULL, NULL, NULL, srcDesc.length, srcDesc.stride, dstDesc.stride, duDesc.stride, dvDesc.stride, 0, 0, 0, numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams); return true; } /* static */ bool CudaEvaluator::EvalPatches( const float *src, BufferDescriptor const &srcDesc, float *dst, BufferDescriptor const &dstDesc, float *du, BufferDescriptor const &duDesc, float *dv, BufferDescriptor const &dvDesc, float *duu, BufferDescriptor const &duuDesc, float *duv, BufferDescriptor const &duvDesc, float *dvv, BufferDescriptor const &dvvDesc, int numPatchCoords, const PatchCoord *patchCoords, const PatchArray *patchArrays, const int *patchIndices, const PatchParam *patchParams) { if (src) src += srcDesc.offset; if (dst) dst += dstDesc.offset; if (du) du += duDesc.offset; if (dv) dv += dvDesc.offset; if (duu) duu += duuDesc.offset; if (duv) duv += duvDesc.offset; if (dvv) dvv += dvvDesc.offset; CudaEvalPatchesWithDerivatives( src, dst, du, dv, duu, duv, dvv, srcDesc.length, srcDesc.stride, dstDesc.stride, duDesc.stride, dvDesc.stride, duuDesc.stride, duvDesc.stride, dvvDesc.stride, numPatchCoords, patchCoords, patchArrays, patchIndices, patchParams); return true; } /* static */ void CudaEvaluator::Synchronize(void * /*deviceContext*/) { cudaThreadSynchronize(); } } // end namespace Osd } // end namespace OPENSUBDIV_VERSION } // end namespace OpenSubdiv