Add ISPC limit surface evaluation

This commit is contained in:
Sheng Fu 2015-07-20 14:12:11 -07:00
parent bd7b017c02
commit d3f8725e79
13 changed files with 1953 additions and 30 deletions

View File

@ -197,6 +197,8 @@ if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANGCC OR CMAKE_COMPILER_IS_IC
endif()
endforeach()
list(APPEND OSD_COMPILER_FLAGS -std=c++11)
endif()
elseif(MSVC)
@ -321,6 +323,9 @@ endif()
if(NOT NO_TBB)
find_package(TBB 4.0)
endif()
if(NOT NO_ISPC)
find_package(ISPC 1.6)
endif()
if (NOT NO_OPENGL)
find_package(OpenGL)
endif()
@ -539,6 +544,12 @@ if (NOT NO_MAYA)
endif()
endif()
if(ISPC_FOUND)
add_definitions(
-DOPENSUBDIV_HAS_ISPC
)
endif()
# Link examples & regressions dynamically against Osd
set( OSD_LINK_TARGET osd_dynamic_cpu osd_dynamic_gpu )

94
cmake/FindISPC.cmake Normal file
View File

@ -0,0 +1,94 @@
#
# Copyright 2013 Pixar
#
# Licensed under the Apache License, Version 2.0 (the "Apache License")
# with the following modification; you may not use this file except in
# compliance with the Apache License and the following modification to it:
# Section 6. Trademarks. is deleted and replaced with:
#
# 6. Trademarks. This License does not grant permission to use the trade
# names, trademarks, service marks, or product names of the Licensor
# and its affiliates, except as required to comply with Section 4(c) of
# the License and to reproduce the content of the NOTICE file.
#
# You may obtain a copy of the Apache License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License with the above modification is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the Apache License for the specific
# language governing permissions and limitations under the Apache License.
#
# - Try to find Intel's ISPC
# Once done this will define
#
# ISPC_FOUND - System has ISPC
# ISPC_DIR - The ISPC directory
# Obtain ISPC directory
if (WIN32)
#NOT IMPLEMENTED
elseif (APPLE)
#NOT IMPLEMENTED
else ()
find_path(ISPC_DIR
NAMES
ispc
PATHS
${ISPC_LOCATION}
NO_DEFAULT_PATH NO_SYSTEM_ENVIRONMENT_PATH
DOC "The directory where ISPC reside")
endif ()
if (ISPC_DIR)
execute_process(COMMAND ${ISPC_DIR}/ispc --version OUTPUT_VARIABLE ISPC_VERSION)
string(REGEX MATCH "[0-9].[0-9].[0-9]" ISPC_VERSION ${ISPC_VERSION})
endif ()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(ISPC
REQUIRED_VARS
ISPC_DIR
VERSION_VAR
ISPC_VERSION
)
mark_as_advanced( ISPC_DIR )
MACRO (ispc_compile)
SET(ISPC_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/osd_ispc_obj.dir)
SET(ISPC_OBJECTS "")
FOREACH(src ${ARGN})
GET_FILENAME_COMPONENT(fname ${src} NAME_WE)
SET(results "${ISPC_TARGET_DIR}/${fname}.dev.o")
ADD_CUSTOM_COMMAND(
OUTPUT ${results} ${ISPC_TARGET_DIR}/${fname}_ispc.h
COMMAND ${ISPC_DIR}/ispc
--pic
-O1
--wno-perf
--woff
-h ${ISPC_TARGET_DIR}/${fname}_ispc.h
-MMM ${ISPC_TARGET_DIR}/${fname}.dev.idep
-o ${ISPC_TARGET_DIR}/${fname}.dev.o
${CMAKE_CURRENT_SOURCE_DIR}/${src}
\;
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src}
)
SET(ISPC_OBJECTS ${ISPC_OBJECTS} ${results})
ENDFOREACH()
ENDMACRO()

View File

@ -34,6 +34,10 @@ GLFWmonitor* g_primary=0;
#include <osd/cpuGLVertexBuffer.h>
#include <osd/mesh.h>
#ifdef OPENSUBDIV_HAS_ISPC
#include <osd/ispcEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_TBB
#include <osd/tbbEvaluator.h>
#endif
@ -104,7 +108,8 @@ enum KernelType { kCPU = 0,
kCUDA = 3,
kCL = 4,
kGLXFB = 5,
kGLCompute = 6 };
kGLCompute = 6,
kISPC = 7 };
enum EndCap { kEndCapBSplineBasis,
kEndCapGregoryBasis };
@ -169,10 +174,10 @@ float g_currentTime = 0;
Stopwatch g_fpsTimer;
//------------------------------------------------------------------------------
int g_nParticles = 65536;
int g_nParticles = 655360;
bool g_randomStart = true;//false;
bool g_animParticles = true;
bool g_animParticles = false;
GLuint g_samplesVAO=0;
@ -439,7 +444,9 @@ updateGeom() {
assert(g_particles);
float elapsed = g_currentTime - g_prevTime;
g_particles->Update(elapsed);
if(elapsed != 0.0f) {
g_particles->Update(elapsed);
}
g_prevTime = g_currentTime;
std::vector<OpenSubdiv::Osd::PatchCoord> const &patchCoords
@ -464,7 +471,7 @@ updateGeom() {
}
s.Stop();
g_evalTime = float(s.GetElapsed());
}
@ -648,8 +655,20 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable,
&glComputeEvaluatorCache);
}
#endif
}
#if defined(OPENSUBDIV_HAS_ISPC) && defined(OPENSUBDIV_HAS_TBB)
else if(g_kernel == kISPC) {
g_evalOutput = new EvalOutput<Osd::CpuGLVertexBuffer,
Osd::CpuGLVertexBuffer,
Far::StencilTable,
Osd::CpuPatchTable,
Osd::IspcEvaluator>
(vertexStencils, varyingStencils,
nCoarseVertices, nverts, g_nParticles, g_patchTable);
}
#endif
// Create the 'uv particles' manager - this class manages the limit
// location samples (ptex face index, (s,t) and updates them between frames.
@ -875,7 +894,7 @@ display() {
}
if (g_endCap != kEndCapBSplineBasis &&
(g_kernel != kCPU && g_kernel != kOPENMP && g_kernel != kTBB)) {
(g_kernel != kCPU && g_kernel != kOPENMP && g_kernel != kTBB && g_kernel != kISPC)) {
static char msg[] =
"ERROR: This kernel only supports BSpline basis patches.";
g_hud.DrawString(g_width/4, g_height/4+20, 1, 0, 0, msg);
@ -1129,6 +1148,9 @@ initHUD() {
#ifdef OPENSUBDIV_HAS_TBB
g_hud.AddPullDownButton(compute_pulldown, "TBB", kTBB);
#endif
#if defined(OPENSUBDIV_HAS_ISPC) && defined(OPENSUBDIV_HAS_TBB)
g_hud.AddPullDownButton(compute_pulldown, "ISPC", kISPC);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
#endif

View File

@ -32,17 +32,17 @@
#ifdef OPENSUBDIV_HAS_TBB
#include <tbb/parallel_for.h>
#include <tbb/atomic.h>
tbb::atomic<int> g_tbbCounter;
class TbbUpdateKernel {
public:
TbbUpdateKernel(float speed,
STParticles::Position *positions,
float *velocities,
std::vector<STParticles::FaceInfo> const &adjacency,
OpenSubdiv::Osd::PatchCoord *patchCoords,
PatchHandleMap *patchHandleMap,
OpenSubdiv::Far::PatchMap const *patchMap) :
_speed(speed), _positions(positions), _velocities(velocities),
_adjacency(adjacency), _patchCoords(patchCoords), _patchMap(patchMap) {
_adjacency(adjacency), _patchHandleMap(patchHandleMap), _patchMap(patchMap) {
}
void operator () (tbb::blocked_range<int> const &r) const {
@ -76,9 +76,13 @@ public:
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
if (handle) {
int index = g_tbbCounter.fetch_and_add(1);
_patchCoords[index] =
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t);
PatchHandleMap::accessor a;
if( !_patchHandleMap->find(a, handle)) {
_patchHandleMap->insert(a, handle);
}
std::vector<float> &st = a->second;
st.push_back(p->s);
st.push_back(p->t);
}
}
}
@ -87,7 +91,7 @@ private:
STParticles::Position *_positions;
float *_velocities;
std::vector<STParticles::FaceInfo> const &_adjacency;
OpenSubdiv::Osd::PatchCoord *_patchCoords;
PatchHandleMap *_patchHandleMap;
OpenSubdiv::Far::PatchMap const *_patchMap;
};
#endif
@ -276,18 +280,36 @@ STParticles::Update(float deltaTime) {
if (deltaTime == 0) return;
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
_patchCoords.clear();
// XXX: this process should be parallelized.
#ifdef OPENSUBDIV_HAS_TBB
_patchCoords.resize((int)GetNumParticles());
_patchHandleMap.clear();
TbbUpdateKernel kernel(speed, &_positions[0], &_velocities[0],
_adjacency, &_patchCoords[0], _patchMap);;
g_tbbCounter = 0;
_adjacency, &_patchHandleMap, _patchMap);;
tbb::blocked_range<int> range(0, GetNumParticles(), 256);
tbb::parallel_for(range, kernel);
_patchCoords.resize(g_tbbCounter);
int nCoord = 0;
for(PatchHandleMap::iterator i = _patchHandleMap.begin();
i != _patchHandleMap.end();
i ++) {
nCoord += (i->second.size() / 2);
}
_patchCoords.resize(nCoord);
int index = 0;
for(PatchHandleMap::iterator i = _patchHandleMap.begin();
i != _patchHandleMap.end();
i ++) {
for(int j = 0; j < i->second.size(); j += 2) {
_patchCoords[index].handle = *(i->first);
_patchCoords[index].s = i->second[j];
_patchCoords[index].t = i->second[j+1];
index ++;
}
}
#else
Position * p = &_positions[0];
float * dp = &_velocities[0];
@ -323,7 +345,7 @@ STParticles::Update(float deltaTime) {
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t));
}
}
#endif
#endif
}
// Dump adjacency info

View File

@ -30,6 +30,11 @@
#include <osd/types.h>
#include <iostream>
#ifdef OPENSUBDIV_HAS_TBB
#include <tbb/concurrent_hash_map.h>
typedef tbb::concurrent_hash_map< OpenSubdiv::Far::PatchTable::PatchHandle const*, std::vector<float> > PatchHandleMap;
#endif
//
// In order to emphasize the dynamic nature of the EvalLimit API, where the
// locations can be arbitrarily updated before each evaluation, the glEvalLimit
@ -142,7 +147,7 @@ public:
return _velocities;
}
std::vector<OpenSubdiv::Osd::PatchCoord> GetPatchCoords() const {
std::vector<OpenSubdiv::Osd::PatchCoord> const &GetPatchCoords() const {
return _patchCoords;
}
@ -159,6 +164,10 @@ private:
std::vector<Position> _positions;
std::vector<float> _velocities;
#ifdef OPENSUBDIV_HAS_TBB
PatchHandleMap _patchHandleMap;
#endif
std::vector<OpenSubdiv::Osd::PatchCoord> _patchCoords;

View File

@ -147,9 +147,16 @@ if (NOT NO_LIB)
)
set_target_properties(osd_static_cpu PROPERTIES OUTPUT_NAME osdCPU CLEAN_DIRECT_OUTPUT 1)
target_link_libraries(osd_static_cpu
${PLATFORM_CPU_LIBRARIES}
)
if( ISPC_FOUND)
target_link_libraries(osd_static_cpu
osd_ispc_obj
${PLATFORM_CPU_LIBRARIES}
)
else()
target_link_libraries(osd_static_cpu
${PLATFORM_CPU_LIBRARIES}
)
endif()
install( TARGETS osd_static_cpu DESTINATION "${CMAKE_LIBDIR_BASE}" )
@ -200,9 +207,16 @@ if (NOT NO_LIB)
)
endif()
target_link_libraries(osd_dynamic_cpu
${PLATFORM_CPU_LIBRARIES}
)
if ( ISPC_FOUND)
target_link_libraries(osd_dynamic_cpu
osd_ispc_obj
${PLATFORM_CPU_LIBRARIES}
)
else()
target_link_libraries(osd_dynamic_cpu
${PLATFORM_CPU_LIBRARIES}
)
endif()
install( TARGETS osd_dynamic_cpu LIBRARY DESTINATION "${CMAKE_LIBDIR_BASE}" )

View File

@ -116,6 +116,15 @@ struct PatchParam {
///
void Normalize( float & u, float & v ) const;
/// This function is the reverse operation of function Normalize()
/// The (u,v) pair is converted from patch sub-parametric space to control
/// face parametric space.
///
/// @param u u parameter
/// @param v v parameter
///
void Denormalize( float & u, float & v) const;
unsigned int field0:32;
unsigned int field1:32;
};
@ -161,6 +170,20 @@ PatchParam::Normalize( float & u, float & v ) const {
v = (v - pv) / frac;
}
inline void
PatchParam::Denormalize( float & u, float & v ) const {
float frac = GetParamFraction();
// top left corner
float pu = (float)GetU()*frac;
float pv = (float)GetV()*frac;
// normalize u,v coordinates
u = u * frac + pu;
v = v * frac + pv;
}
} // end namespace Far
} // end namespace OPENSUBDIV_VERSION

View File

@ -68,6 +68,12 @@ public:
Index arrayIndex, // Array index of the patch
patchIndex, // Absolute Index of the patch
vertIndex; // Relative offset to the first CV of the patch in array
bool isEqual(const PatchHandle &other) {
return other.arrayIndex == arrayIndex &&
other.patchIndex == patchIndex &&
other.vertIndex == vertIndex;
}
};
public:

View File

@ -26,6 +26,7 @@
#-------------------------------------------------------------------------------
# source & headers
set(CPU_SOURCE_FILES
cpuEvaluator.cpp
cpuKernel.cpp
@ -33,8 +34,12 @@ set(CPU_SOURCE_FILES
cpuVertexBuffer.cpp
)
set(GPU_SOURCE_FILES )
if( ISPC_FOUND)
list(APPEND CPU_SOURCE_FILES ispcEvaluator.cpp)
endif()
set(GPU_SOURCE_FILES )
set(ISPC_SOURCE_FILES )
set(INC_FILES )
set(PRIVATE_HEADER_FILES
@ -296,6 +301,17 @@ if( CUDA_FOUND )
endif()
endif()
if( ISPC_FOUND)
list(APPEND ISPC_SOURCE_FILES
ispcEvalLimitKernel.ispc
)
# Compile ISPC code to objs
ispc_compile(${ISPC_SOURCE_FILES})
ADD_LIBRARY(osd_ispc_obj STATIC ${ISPC_OBJECTS})
SET_TARGET_PROPERTIES(osd_ispc_obj PROPERTIES LINKER_LANGUAGE C)
endif()
list(APPEND DOXY_HEADER_FILES ${CUDA_PUBLIC_HEADERS})
#-------------------------------------------------------------------------------

View File

@ -0,0 +1,880 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#define MAX_CHANNEL 4
struct BufferDescriptor {
int offset; // offset to desired element data
int length; // number or length of the data
int stride; // stride to the next element
};
struct Point {
float x;
float y;
float z;
};
inline struct Point operator+(struct Point a, struct Point b) {
struct Point result;
result.x = a.x + b.x;
result.y = a.y + b.y;
result.z = a.z + b.z;
return result;
}
inline uniform struct Point operator+(uniform struct Point a, uniform struct Point b) {
uniform struct Point result;
result.x = a.x + b.x;
result.y = a.y + b.y;
result.z = a.z + b.z;
return result;
}
inline struct Point operator-(struct Point a, struct Point b) {
struct Point result;
result.x = a.x - b.x;
result.y = a.y - b.y;
result.z = a.z - b.z;
return result;
}
inline uniform struct Point operator-(uniform struct Point a, uniform struct Point b) {
uniform struct Point result;
result.x = a.x - b.x;
result.y = a.y - b.y;
result.z = a.z - b.z;
return result;
}
inline struct Point operator*(struct Point a, float b) {
struct Point result;
result.x = a.x * b;
result.y = a.y * b;
result.z = a.z * b;
return result;
}
inline uniform struct Point operator*(uniform struct Point a, uniform float b) {
uniform struct Point result;
result.x = a.x * b;
result.y = a.y * b;
result.z = a.z * b;
return result;
}
inline struct Point operator*(float b, struct Point a) {
struct Point result;
result.x = b * a.x;
result.y = b * a.y;
result.z = b * a.z;
return result;
}
inline uniform struct Point operator*(uniform float b, uniform struct Point a) {
uniform struct Point result;
result.x = b * a.x;
result.y = b * a.y;
result.z = b * a.z;
return result;
}
inline struct Point operator/(struct Point a, float b) {
struct Point result;
result.x = a.x / b;
result.y = a.y / b;
result.z = a.z / b;
return result;
}
inline uniform struct Point operator/(uniform struct Point a, uniform float b) {
uniform struct Point result;
result.x = a.x / b;
result.y = a.y / b;
result.z = a.z / b;
return result;
}
inline void cross(struct Point &a, struct Point &b, struct Point &c)
{
c.x = a.y*b.z - a.z*b.y;
c.y = a.z*b.x - a.x*b.z;
c.z = a.x*b.y - a.y*b.x;
}
inline uniform bool
nonQuadRoot(uniform unsigned int bitField)
{
return (bitField >> 3) & 0x1;
}
inline uniform unsigned int getU(uniform unsigned int bitField)
{
return (uniform unsigned int)((bitField >> 22) & 0x3ff);
}
inline uniform unsigned int getV(uniform unsigned int bitField)
{
return (uniform unsigned int)((bitField >> 12) & 0x3ff);
}
inline uniform unsigned int getBoundary(uniform unsigned int bitField)
{
return (uniform unsigned int)((bitField >> 8) & 0xf);
}
inline uniform unsigned int getDepth(uniform unsigned int bitField)
{
return (uniform unsigned int)(bitField & 0xf);
}
inline uniform float
getParamFraction(uniform unsigned int bitField){
if (nonQuadRoot(bitField)) {
return 1.0f / (1 << (getDepth(bitField)-1));
} else {
return 1.0f / (1 << getDepth(bitField));
}
}
inline void
adjustBoundaryWeights(uniform unsigned int bitField,
float sWeights[4],
float tWeights[4]) {
uniform int boundary = getBoundary(bitField);
if (boundary & 1) {
tWeights[2] -= tWeights[0];
tWeights[1] += 2*tWeights[0];
tWeights[0] = 0;
}
if (boundary & 2) {
sWeights[1] -= sWeights[3];
sWeights[2] += 2*sWeights[3];
sWeights[3] = 0;
}
if (boundary & 4) {
tWeights[1] -= tWeights[3];
tWeights[2] += 2*tWeights[3];
tWeights[3] = 0;
}
if (boundary & 8) {
sWeights[2] -= sWeights[0];
sWeights[1] += 2*sWeights[0];
sWeights[0] = 0;
}
}
inline void
getBSplineWeights(float t, float point[4], float deriv[4]) {
// The four uniform cubic B-Spline basis functions evaluated at t:
float const one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
point[3] = one6th * ( t3);
// Derivatives of the above four basis functions at t:
deriv[0] = -0.5f*t2 + t - 0.5f;
deriv[1] = 1.5f*t2 - 2.0f*t;
deriv[2] = -1.5f*t2 + t + 0.5f;
deriv[3] = 0.5f*t2;
}
inline void
getBezierWeights(float t, float point[4], float deriv[4]) {
// The four uniform cubic Bezier basis functions (in terms of t and its
// complement tC) evaluated at t:
float t2 = t*t;
float tC = 1.0f - t;
float tC2 = tC * tC;
point[0] = tC2 * tC;
point[1] = tC2 * t * 3.0f;
point[2] = t2 * tC * 3.0f;
point[3] = t2 * t;
// Derivatives of the above four basis functions at t:
deriv[0] = -3.0f * tC2;
deriv[1] = 9.0f * t2 - 12.0f * t + 3.0f;
deriv[2] = -9.0f * t2 + 6.0f * t;
deriv[3] = 3.0f * t2;
}
inline void
getBSplineWeightsNoDerivative(float t, float point[4]) {
// The four uniform cubic B-Spline basis functions evaluated at t:
float const one6th = 1.0f / 6.0f;
float t2 = t * t;
float t3 = t * t2;
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
point[3] = one6th * ( t3);
}
inline void
getBezierWeightsNoDerivative(float t, float point[4]) {
// The four uniform cubic Bezier basis functions (in terms of t and its
// complement tC) evaluated at t:
float t2 = t*t;
float tC = 1.0f - t;
float tC2 = tC * tC;
point[0] = tC2 * tC;
point[1] = tC2 * t * 3.0f;
point[2] = t2 * tC * 3.0f;
point[3] = t2 * t;
}
export void
evalBilinear(uniform unsigned int bitField,
uniform int nPoint,
uniform const float * uniform u,
uniform const float * uniform v,
uniform const int * uniform vertexIndices,
uniform const BufferDescriptor &inDesc,
uniform const float * uniform inQ,
uniform const BufferDescriptor &outDesc,
uniform float *uniform outQ,
uniform const BufferDescriptor &duDesc,
uniform float *uniform outDQU,
uniform const BufferDescriptor &dvDesc,
uniform float *uniform outDQV)
{
uniform int nChannel = inDesc.length / 3;
assert(nChannel < MAX_CHANNEL);
uniform Point controlVertices[MAX_CHANNEL*4];
for(uniform int i=0; i<4; i++) {
uniform unsigned int id = vertexIndices[i];
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 4 + i;
controlVertices[offset].x = pVertex[0];
controlVertices[offset].y = pVertex[1];
controlVertices[offset].z = pVertex[2];
pVertex += 3;
}
}
foreach( n = 0 ... nPoint) {
float ou = 1.0f - u[n];
float ov = 1.0f - v[n];
float w[4] = { ov*ou, v[n]*ou, v[n]*u[n], ov*u[n] };
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
Point Q;
Q.x = Q.y = Q.z = 0.0;
for (uniform int i=0; i<4; ++i) {
Q = Q + w[i] * controlVertices[c * 4 + i];
}
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
}
}
uniform Point dU[MAX_CHANNEL], dV[MAX_CHANNEL];
for(uniform int c=0; c<nChannel; c++) {
dU[c] = 0.5 * (controlVertices[c * 4 + 3] - controlVertices[c * 4 + 0] +
controlVertices[c * 4 + 2] - controlVertices[c * 4 + 1] );
dV[c] = 0.5 * (controlVertices[c * 4 + 1] - controlVertices[c * 4 + 0] +
controlVertices[c * 4 + 2] - controlVertices[c * 4 + 3] );
}
foreach( n = 0 ... nPoint) {
float *pOutDQU = outDQU + duDesc.offset + n * duDesc.stride;
float *pOutDQV = outDQV + dvDesc.offset + n * dvDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
*pOutDQU ++ = dU[c].x, *pOutDQU ++ = dU[c].y, *pOutDQU ++ = dU[c].z;
*pOutDQV ++ = dV[c].x, *pOutDQV ++ = dV[c].y, *pOutDQV ++ = dV[c].z;
}
}
}
export void
evalBilinearNoDerivative(uniform unsigned int bitField,
uniform int nPoint,
uniform const float * uniform u,
uniform const float * uniform v,
uniform const int * uniform vertexIndices,
uniform const BufferDescriptor &inDesc,
uniform const float * uniform inQ,
uniform const BufferDescriptor &outDesc,
uniform float *uniform outQ)
{
uniform int nChannel = inDesc.length / 3;
assert(nChannel < MAX_CHANNEL);
uniform Point controlVertices[MAX_CHANNEL*4];
for(uniform int i=0; i<4; i++) {
uniform unsigned int id = vertexIndices[i];
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 4 + i;
controlVertices[offset].x = pVertex[0];
controlVertices[offset].y = pVertex[1];
controlVertices[offset].z = pVertex[2];
pVertex += 3;
}
}
foreach( n = 0 ... nPoint) {
float ou = 1.0f - u[n];
float ov = 1.0f - v[n];
float w[4] = { ov*ou, v[n]*ou, v[n]*u[n], ov*u[n] };
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
Point Q;
Q.x = Q.y = Q.z = 0.0;
for (uniform int i=0; i<4; ++i) {
Q = Q + w[i] * controlVertices[c * 4 + i];
}
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
}
}
}
export void
evalBSpline(uniform unsigned int bitField,
uniform int nPoint,
uniform const float * uniform u,
uniform const float * uniform v,
uniform const int * uniform vertexIndices,
uniform const BufferDescriptor &inDesc,
uniform const float * uniform inQ,
uniform const BufferDescriptor &outDesc,
uniform float *uniform outQ,
uniform const BufferDescriptor &duDesc,
uniform float *uniform outDQU,
uniform const BufferDescriptor &dvDesc,
uniform float *uniform outDQV)
{
uniform int nChannel = inDesc.length / 3;
assert(nChannel < MAX_CHANNEL);
uniform Point controlVertices[MAX_CHANNEL*16];
for(uniform int i=0; i<16; i++) {
uniform unsigned int id = vertexIndices[i];
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 16 + i;
controlVertices[offset].x = pVertex[0];
controlVertices[offset].y = pVertex[1];
controlVertices[offset].z = pVertex[2];
pVertex += 3;
}
}
uniform float dScale = (uniform float)(1 << getDepth(bitField));
uniform float frac = getParamFraction(bitField);
// top left corner
uniform float pu = (uniform float)getU(bitField)*frac;
uniform float pv = (uniform float)getV(bitField)*frac;
foreach( n = 0 ... nPoint) {
// normalize u,v coordinates
float s = (u[n] - pu) / frac;
float t = (v[n] - pv) / frac;
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
getBSplineWeights(s, sWeights, dsWeights);
getBSplineWeights(t, tWeights, dtWeights);
adjustBoundaryWeights(bitField, sWeights, tWeights);
adjustBoundaryWeights(bitField, dsWeights, dtWeights);
float weight[16];
for (uniform int i = 0; i < 4; ++i) {
for (uniform int j = 0; j < 4; ++j) {
weight[4*i+j] = sWeights[j] * tWeights[i];
}
}
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 16;
Point Q;
Q.x = Q.y = Q.z = 0.0;
for (uniform int i=0; i<16; ++i) {
Q = Q + weight[i] * controlVertices[offset + i];
}
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
}
float derivS[16], derivT[16];
for (uniform int i = 0; i < 4; ++i) {
for (uniform int j = 0; j < 4; ++j) {
derivS[4*i+j] = dsWeights[j] * tWeights[i] * dScale;
derivT[4*i+j] = sWeights[j] * dtWeights[i] * dScale;
}
}
float *pOutDQU = outDQU + duDesc.offset + n * duDesc.stride;
float *pOutDQV = outDQV + dvDesc.offset + n * dvDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 16;
Point DQU, DQV;
DQU.x = DQU.y = DQU.z = 0.0;
DQV.x = DQV.y = DQV.z = 0.0;
for (uniform int i=0; i<16; ++i) {
DQU = DQU + derivS[i] * controlVertices[offset + i];
DQV = DQV + derivT[i] * controlVertices[offset + i];
}
*pOutDQU ++ = DQU.x, *pOutDQU ++ = DQU.y, *pOutDQU ++ = DQU.z;
*pOutDQV ++ = DQV.x, *pOutDQV ++ = DQV.y, *pOutDQV ++ = DQV.z;
}
}
}
export void
evalBSplineNoDerivative(uniform unsigned int bitField,
uniform int nPoint,
uniform const float * uniform u,
uniform const float * uniform v,
uniform const int * uniform vertexIndices,
uniform const BufferDescriptor &inDesc,
uniform const float * uniform inQ,
uniform const BufferDescriptor &outDesc,
uniform float *uniform outQ)
{
uniform int nChannel = inDesc.length / 3;
assert(nChannel < MAX_CHANNEL);
uniform Point controlVertices[MAX_CHANNEL*16];
for(uniform int i=0; i<16; i++) {
uniform unsigned int id = vertexIndices[i];
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 16 + i;
controlVertices[offset].x = pVertex[0];
controlVertices[offset].y = pVertex[1];
controlVertices[offset].z = pVertex[2];
pVertex += 3;
}
}
uniform float frac = getParamFraction(bitField);
// top left corner
uniform float pu = (uniform float)getU(bitField)*frac;
uniform float pv = (uniform float)getV(bitField)*frac;
foreach( n = 0 ... nPoint) {
// normalize u,v coordinates
float s = (u[n] - pu) / frac;
float t = (v[n] - pv) / frac;
float sWeights[4], tWeights[4];
getBSplineWeightsNoDerivative(s, sWeights);
getBSplineWeightsNoDerivative(t, tWeights);
adjustBoundaryWeights(bitField, sWeights, tWeights);
float weight[16];
for (uniform int i = 0; i < 4; ++i) {
for (uniform int j = 0; j < 4; ++j) {
weight[4*i+j] = sWeights[j] * tWeights[i];
}
}
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 16;
Point Q;
Q.x = Q.y = Q.z = 0.0;
for (uniform int i=0; i<16; ++i) {
Q = Q + weight[i] * controlVertices[offset + i];
}
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
}
}
}
void getGregoryWeights(uniform unsigned int bitField,
float s, float t, float point[20], float deriv1[20], float deriv2[20]) {
//
// P3 e3- e2+ P2
// 15------17-------11--------10
// | | | |
// | | | |
// | | f3- | f2+ |
// | 19 13 |
// e3+ 16-----18 14-----12 e2-
// | f3+ f2- |
// | |
// | |
// | f0- f1+ |
// e0- 2------4 8------6 e1+
// | 3 9 |
// | | f0+ | f1- |
// | | | |
// | | | |
// O--------1--------7--------5
// P0 e0+ e1- P1
//
// Indices of boundary and interior points and their corresponding Bezier points
// (this can be reduced with more direct indexing and unrolling of loops):
//
static uniform int const boundaryGregory[12] = { 0, 1, 7, 5, 2, 6, 16, 12, 15, 17, 11, 10 };
static uniform int const boundaryBezSCol[12] = { 0, 1, 2, 3, 0, 3, 0, 3, 0, 1, 2, 3 };
static uniform int const boundaryBezTRow[12] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 };
static uniform int const interiorGregory[8] = { 3, 4, 8, 9, 13, 14, 18, 19 };
static uniform int const interiorBezSCol[8] = { 1, 1, 2, 2, 2, 2, 1, 1 };
static uniform int const interiorBezTRow[8] = { 1, 1, 1, 1, 2, 2, 2, 2 };
//
// Bezier basis functions are denoted with B while the rational multipliers for the
// interior points will be denoted G -- so we have B(s), B(t) and G(s,t):
//
// Directional Bezier basis functions B at s and t:
float Bs[4], Bds[4];
float Bt[4], Bdt[4];
getBezierWeights(s, Bs, Bds);
getBezierWeights(t, Bt, Bdt);
// Rational multipliers G at s and t:
float sC = 1.0f - s;
float tC = 1.0f - t;
// Use <= here to avoid compiler warnings -- the sums should always be non-negative:
float df0 = s + t; df0 = (df0 <= 0.0f) ? 1.0f : (1.0f / df0);
float df1 = sC + t; df1 = (df1 <= 0.0f) ? 1.0f : (1.0f / df1);
float df2 = sC + tC; df2 = (df2 <= 0.0f) ? 1.0f : (1.0f / df2);
float df3 = s + tC; df3 = (df3 <= 0.0f) ? 1.0f : (1.0f / df3);
float G[8] = { s*df0, t*df0, t*df1, sC*df1, sC*df2, tC*df2, tC*df3, s*df3 };
// Combined weights for boundary and interior points:
for (uniform int i = 0; i < 12; ++i) {
point[boundaryGregory[i]] = Bs[boundaryBezSCol[i]] * Bt[boundaryBezTRow[i]];
}
for (uniform int i = 0; i < 8; ++i) {
point[interiorGregory[i]] = Bs[interiorBezSCol[i]] * Bt[interiorBezTRow[i]] * G[i];
}
//
// For derivatives, the basis functions for the interior points are rational and ideally
// require appropriate differentiation, i.e. product rule for the combination of B and G
// and the quotient rule for the rational G itself. As initially proposed by Loop et al
// though, the approximation using the 16 Bezier points arising from the G(s,t) has
// proved adequate (and is what the GPU shaders use) so we continue to use that here.
//
// An implementation of the true derivatives is provided for future reference -- it is
// unclear if the approximations will hold up under surface analysis involving higher
// order differentiation.
//
// Remember to include derivative scaling in all assignments below:
uniform float dScale = (uniform float)(1 << getDepth(bitField));
// Combined weights for boundary points -- simple (scaled) tensor products:
for (uniform int i = 0; i < 12; ++i) {
uniform int iDst = boundaryGregory[i];
uniform int tRow = boundaryBezTRow[i];
uniform int sCol = boundaryBezSCol[i];
deriv1[iDst] = Bds[sCol] * Bt[tRow] * dScale;
deriv2[iDst] = Bdt[tRow] * Bs[sCol] * dScale;
}
#define _USE_BEZIER_PSEUDO_DERIVATIVES
#ifdef _USE_BEZIER_PSEUDO_DERIVATIVES
// Approximation to the true Gregory derivatives by differentiating the Bezier patch
// unique to the given (s,t), i.e. having F = (g^+ * f^+) + (g^- * f^-) as its four
// interior points:
//
// Combined weights for interior points -- (scaled) tensor products with G+ or G-:
for (uniform int i = 0; i < 8; ++i) {
uniform int iDst = interiorGregory[i];
uniform int tRow = interiorBezTRow[i];
uniform int sCol = interiorBezSCol[i];
deriv1[iDst] = Bds[sCol] * Bt[tRow] * G[i] * dScale;
deriv2[iDst] = Bdt[tRow] * Bs[sCol] * G[i] * dScale;
}
#else
// True Gregory derivatives using appropriate differentiation of composite functions:
//
// Note that for G(s,t) = N(s,t) / D(s,t), all N' and D' are trivial constants (which
// simplifies things for higher order derivatives). And while each pair of functions
// G (i.e. the G+ and G- corresponding to points f+ and f-) must sum to 1 to ensure
// Bezier equivalence (when f+ = f-), the pairs of G' must similarly sum to 0. So we
// can potentially compute only one of the pair and negate the result for the other
// (and with 4 or 8 computations involving these constants, this is all very SIMD
// friendly...) but for now we treat all 8 independently for simplicity.
//
//float N[8] = { s, t, t, sC, sC, tC, tC, s };
uniform float D[8] = { df0, df0, df1, df1, df2, df2, df3, df3 };
static uniform float const Nds[8] = { 1.0f, 0.0f, 0.0f, -1.0f, -1.0f, 0.0f, 0.0f, 1.0f };
static uniform float const Ndt[8] = { 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, -1.0f, -1.0f, 0.0f };
static uniform float const Dds[8] = { 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f };
static uniform float const Ddt[8] = { 1.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f };
// Combined weights for interior points -- (scaled) combinations of B, B', G and G':
for (uniform int i = 0; i < 8; ++i) {
uniform int iDst = interiorGregory[i];
uniform int tRow = interiorBezTRow[i];
uniform int sCol = interiorBezSCol[i];
// Quotient rule for G' (re-expressed in terms of G to simplify (and D = 1/D)):
float Gds = (Nds[i] - Dds[i] * G[i]) * D[i];
float Gdt = (Ndt[i] - Ddt[i] * G[i]) * D[i];
// Product rule combining B and B' with G and G' (and scaled):
deriv1[iDst] = (Bds[sCol] * G[i] + Bs[sCol] * Gds) * Bt[tRow] * dScale;
deriv2[iDst] = (Bdt[tRow] * G[i] + Bt[tRow] * Gdt) * Bs[sCol] * dScale;
}
#endif
}
void getGregoryWeightsNoDerivative(uniform unsigned int bitField, float s, float t, float point[20]) {
//
// P3 e3- e2+ P2
// 15------17-------11--------10
// | | | |
// | | | |
// | | f3- | f2+ |
// | 19 13 |
// e3+ 16-----18 14-----12 e2-
// | f3+ f2- |
// | |
// | |
// | f0- f1+ |
// e0- 2------4 8------6 e1+
// | 3 9 |
// | | f0+ | f1- |
// | | | |
// | | | |
// O--------1--------7--------5
// P0 e0+ e1- P1
//
// Indices of boundary and interior points and their corresponding Bezier points
// (this can be reduced with more direct indexing and unrolling of loops):
//
static uniform int const boundaryGregory[12] = { 0, 1, 7, 5, 2, 6, 16, 12, 15, 17, 11, 10 };
static uniform int const boundaryBezSCol[12] = { 0, 1, 2, 3, 0, 3, 0, 3, 0, 1, 2, 3 };
static uniform int const boundaryBezTRow[12] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 };
static uniform int const interiorGregory[8] = { 3, 4, 8, 9, 13, 14, 18, 19 };
static uniform int const interiorBezSCol[8] = { 1, 1, 2, 2, 2, 2, 1, 1 };
static uniform int const interiorBezTRow[8] = { 1, 1, 1, 1, 2, 2, 2, 2 };
//
// Bezier basis functions are denoted with B while the rational multipliers for the
// interior points will be denoted G -- so we have B(s), B(t) and G(s,t):
//
// Directional Bezier basis functions B at s and t:
float Bs[4];
float Bt[4];
getBezierWeightsNoDerivative(s, Bs);
getBezierWeightsNoDerivative(t, Bt);
// Rational multipliers G at s and t:
float sC = 1.0f - s;
float tC = 1.0f - t;
// Use <= here to avoid compiler warnings -- the sums should always be non-negative:
float df0 = s + t; df0 = (df0 <= 0.0f) ? 1.0f : (1.0f / df0);
float df1 = sC + t; df1 = (df1 <= 0.0f) ? 1.0f : (1.0f / df1);
float df2 = sC + tC; df2 = (df2 <= 0.0f) ? 1.0f : (1.0f / df2);
float df3 = s + tC; df3 = (df3 <= 0.0f) ? 1.0f : (1.0f / df3);
float G[8] = { s*df0, t*df0, t*df1, sC*df1, sC*df2, tC*df2, tC*df3, s*df3 };
// Combined weights for boundary and interior points:
for (uniform int i = 0; i < 12; ++i) {
point[boundaryGregory[i]] = Bs[boundaryBezSCol[i]] * Bt[boundaryBezTRow[i]];
}
for (uniform int i = 0; i < 8; ++i) {
point[interiorGregory[i]] = Bs[interiorBezSCol[i]] * Bt[interiorBezTRow[i]] * G[i];
}
}
export void
evalGregory(uniform unsigned int bitField,
uniform int nPoint,
uniform float u[],
uniform float v[],
uniform const unsigned int vertexIndices[],
uniform const BufferDescriptor &inDesc,
uniform const float inQ[],
uniform const BufferDescriptor &outDesc,
uniform float outQ[],
uniform const BufferDescriptor &duDesc,
uniform float outDQU[],
uniform const BufferDescriptor &dvDesc,
uniform float outDQV[])
{
uniform int nChannel = inDesc.length / 3;
assert(nChannel < MAX_CHANNEL);
uniform Point controlVertices[MAX_CHANNEL*20];
for(uniform int i=0; i<20; i++) {
uniform unsigned int id = vertexIndices[i];
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 20 + i;
controlVertices[offset].x = pVertex[0];
controlVertices[offset].y = pVertex[1];
controlVertices[offset].z = pVertex[2];
pVertex += 3;
}
}
uniform float frac = getParamFraction(bitField);
// top left corner
uniform float pu = (uniform float)getU(bitField)*frac;
uniform float pv = (uniform float)getV(bitField)*frac;
foreach( n = 0 ... nPoint) {
// normalize u,v coordinates
float s = (u[n] - pu) / frac;
float t = (v[n] - pv) / frac;
float point[20], deriv1[20], deriv2[20];
getGregoryWeights(bitField, s, t, point, deriv1, deriv2);
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 16;
Point Q;
Q.x = Q.y = Q.z = 0.0;
for (uniform int i=0; i<16; ++i) {
Q = Q + point[i] * controlVertices[offset + i];
}
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
}
float *pOutDQU = outDQU + duDesc.offset + n * duDesc.stride;
float *pOutDQV = outDQV + dvDesc.offset + n * dvDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 20;
Point DQU, DQV;
DQU.x = DQU.y = DQU.z = 0.0;
DQV.x = DQV.y = DQV.z = 0.0;
for (uniform int i=0; i<20; ++i) {
DQU = DQU + deriv1[i] * controlVertices[offset + i];
DQV = DQV + deriv2[i] * controlVertices[offset + i];
}
*pOutDQU ++ = DQU.x, *pOutDQU ++ = DQU.y, *pOutDQU ++ = DQU.z;
*pOutDQV ++ = DQV.x, *pOutDQV ++ = DQV.y, *pOutDQV ++ = DQV.z;
}
}
}
export void
evalGregoryNoDerivative(uniform unsigned int bitField,
uniform int nPoint,
uniform float u[],
uniform float v[],
uniform const unsigned int vertexIndices[],
uniform const BufferDescriptor &inDesc,
uniform const float inQ[],
uniform const BufferDescriptor &outDesc,
uniform float outQ[]
)
{
uniform int nChannel = inDesc.length / 3;
assert(nChannel < MAX_CHANNEL);
uniform Point controlVertices[MAX_CHANNEL*20];
for(uniform int i=0; i<20; i++) {
uniform unsigned int id = vertexIndices[i];
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 20 + i;
controlVertices[offset].x = pVertex[0];
controlVertices[offset].y = pVertex[1];
controlVertices[offset].z = pVertex[2];
pVertex += 3;
}
}
uniform float frac = getParamFraction(bitField);
// top left corner
uniform float pu = (uniform float)getU(bitField)*frac;
uniform float pv = (uniform float)getV(bitField)*frac;
foreach( n = 0 ... nPoint) {
// normalize u,v coordinates
float s = (u[n] - pu) / frac;
float t = (v[n] - pv) / frac;
float point[20];
getGregoryWeightsNoDerivative(bitField, s, t, point);
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
for(uniform int c=0; c<nChannel; c++) {
uniform int offset = c * 20;
Point Q;
Q.x = Q.y = Q.z = 0.0;
for (uniform int i=0; i<20; ++i) {
Q = Q + point[i] * controlVertices[offset + i];
}
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
}
}
}

View File

@ -0,0 +1,55 @@
//
// ispcEvalLimitKernel.isph
// (Header automatically generated by the ispc compiler.)
// DO NOT EDIT THIS FILE.
//
#ifndef ISPC_ISPCEVALLIMITKERNEL_ISPH
#define ISPC_ISPCEVALLIMITKERNEL_ISPH
#include <stdint.h>
#ifdef __cplusplus
namespace ispc { /* namespace */
#endif // __cplusplus
#ifndef __ISPC_STRUCT_BufferDescriptor__
#define __ISPC_STRUCT_BufferDescriptor__
struct BufferDescriptor {
int32_t offset;
int32_t length;
int32_t stride;
};
#endif
///////////////////////////////////////////////////////////////////////////
// Functions exported from ispc code
///////////////////////////////////////////////////////////////////////////
#if defined(__cplusplus) && !defined(__ISPC_NO_EXTERN_C)
extern "C" {
#endif // __cplusplus
extern void evalBSpline(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ, const struct BufferDescriptor &duDesc, float * outDQU, const struct BufferDescriptor &dvDesc, float * outDQV);
extern void evalBilinear(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ, const struct BufferDescriptor &duDesc, float * outDQU, const struct BufferDescriptor &dvDesc, float * outDQV);
extern void evalGregory(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ, const struct BufferDescriptor &duDesc, float * outDQU, const struct BufferDescriptor &dvDesc, float * outDQV);
extern void evalBSplineNoDerivative(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ);
extern void evalBilinearNoDerivative(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ);
extern void evalGregoryNoDerivative(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ);
extern void getSIMDWidth(int32_t &simdWidth);
#if defined(__cplusplus) && !defined(__ISPC_NO_EXTERN_C)
} /* end extern C */
#endif // __cplusplus
#ifdef __cplusplus
} /* namespace */
#endif // __cplusplus
#endif // ISPC_ISPCEVALLIMITKERNEL_ISPH

View File

@ -0,0 +1,289 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "ispcEvaluator.h"
#include "cpuKernel.h"
#include "../far/patchBasis.h"
#include "ispcEvalLimitKernel.isph"
#include <tbb/parallel_for.h>
#include <cstdlib>
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
#define grain_size 512
/* static */
bool
IspcEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
// XXX: we can probably expand cpuKernel.cpp to here.
CpuEvalStencils(src, srcDesc, dst, dstDesc,
sizes, offsets, indices, weights, start, end);
return true;
}
/* static */
bool
IspcEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end) {
if (end <= start) return true;
if (srcDesc.length != dstDesc.length) return false;
if (srcDesc.length != duDesc.length) return false;
if (srcDesc.length != dvDesc.length) return false;
CpuEvalStencils(src, srcDesc,
dst, dstDesc,
du, duDesc,
dv, dvDesc,
sizes, offsets, indices,
weights, duWeights, dvWeights,
start, end);
return true;
}
template <typename T>
struct BufferAdapter {
BufferAdapter(T *p, int length, int stride) :
_p(p), _length(length), _stride(stride) { }
void Clear() {
for (int i = 0; i < _length; ++i) _p[i] = 0;
}
void AddWithWeight(T const *src, float w) {
if (_p) {
for (int i = 0; i < _length; ++i) {
_p[i] += src[i] * w;
}
}
}
const T *operator[] (int index) const {
return _p + _stride * index;
}
BufferAdapter<T> & operator ++() {
if (_p) {
_p += _stride;
}
return *this;
}
T *_p;
int _length;
int _stride;
};
/* static */
bool
IspcEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
if (srcDesc.length != dstDesc.length) return false;
// Copy BufferDescriptor to ispc version
// Since memory alignment in ISPC may be different from C++,
// we use the assignment for each field instead of the assignment for
// the whole struct
ispc::BufferDescriptor ispcSrcDesc;
ispcSrcDesc.offset = srcDesc.offset;
ispcSrcDesc.length = srcDesc.length;
ispcSrcDesc.stride = srcDesc.stride;
tbb::blocked_range<int> range = tbb::blocked_range<int>(0, numPatchCoords, grain_size);
tbb::parallel_for(range, [&](const tbb::blocked_range<int> &r)
{
uint i = r.begin();
ispc::BufferDescriptor ispcDstDesc, ispcDuDesc, ispcDvDesc;
ispcDstDesc.offset = dstDesc.offset + dstDesc.offset + i * dstDesc.stride;
ispcDstDesc.length = dstDesc.length;
ispcDstDesc.stride = dstDesc.stride;
while (i < r.end()) {
// the patch coordinates are sorted by patch handle
// the following code searches the coordinates that
// belongs to the same patch so that they can be evalauated
// with ISPC
int nCoord = 1;
Far::PatchTable::PatchHandle handle = patchCoords[i].handle;
while(i + nCoord < r.end() &&
handle.isEqual(patchCoords[i + nCoord].handle) )
nCoord ++;
PatchArray const &array = patchArrays[handle.arrayIndex];
int patchType = array.GetPatchType();
Far::PatchParam const & param = patchParamBuffer[handle.patchIndex];
unsigned int bitField = param.field1;
const int *cvs = &patchIndexBuffer[array.indexBase + handle.vertIndex];
__declspec( align(64) ) float u[nCoord];
__declspec( align(64) ) float v[nCoord];
for(int n=0; n<nCoord; n++) {
u[n] = patchCoords[i + n].s;
v[n] = patchCoords[i + n].t;
}
if (patchType == Far::PatchDescriptor::REGULAR) {
ispc::evalBSplineNoDerivative(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
ispcDstDesc, dst);
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
ispc::evalGregoryNoDerivative(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
ispcDstDesc, dst);
} else if (patchType == Far::PatchDescriptor::QUADS) {
ispc::evalBilinearNoDerivative(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
ispcDstDesc, dst);
} else {
assert(0);
}
i += nCoord;
ispcDstDesc.offset = dstDesc.offset + i * dstDesc.stride;
}
});
return true;
}
/* static */
bool
IspcEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer) {
if (srcDesc.length != dstDesc.length) return false;
// Copy BufferDescriptor to ispc version
// Since memory alignment in ISPC may be different from C++,
// we use the assignment for each field instead of the assignment for
// the whole struct
ispc::BufferDescriptor ispcSrcDesc;
ispcSrcDesc.offset = srcDesc.offset;
ispcSrcDesc.length = srcDesc.length;
ispcSrcDesc.stride = srcDesc.stride;
tbb::blocked_range<int> range = tbb::blocked_range<int>(0, numPatchCoords, grain_size);
tbb::parallel_for(range, [&](const tbb::blocked_range<int> &r)
{
uint i = r.begin();
ispc::BufferDescriptor ispcDstDesc, ispcDuDesc, ispcDvDesc;
ispcDstDesc.offset = dstDesc.offset + dstDesc.offset + i * dstDesc.stride;
ispcDstDesc.length = dstDesc.length;
ispcDstDesc.stride = dstDesc.stride;
ispcDuDesc.offset = duDesc.offset + i * duDesc.stride;
ispcDuDesc.length = duDesc.length;
ispcDuDesc.stride = duDesc.stride;
ispcDvDesc.offset = dvDesc.offset + i * dvDesc.stride;
ispcDvDesc.length = dvDesc.length;
ispcDvDesc.stride = dvDesc.stride;
while (i < r.end()) {
// the patch coordinates are sorted by patch handle
// the following code searches the coordinates that
// belongs to the same patch so that they can be evalauated
// with ISPC
int nCoord = 1;
Far::PatchTable::PatchHandle handle = patchCoords[i].handle;
while(i + nCoord < r.end() &&
handle.isEqual(patchCoords[i + nCoord].handle) )
nCoord ++;
PatchArray const &array = patchArrays[handle.arrayIndex];
int patchType = array.GetPatchType();
Far::PatchParam const & param = patchParamBuffer[handle.patchIndex];
unsigned int bitField = param.field1;
const int *cvs = &patchIndexBuffer[array.indexBase + handle.vertIndex];
__declspec( align(64) ) float u[nCoord];
__declspec( align(64) ) float v[nCoord];
for(int n=0; n<nCoord; n++) {
u[n] = patchCoords[i + n].s;
v[n] = patchCoords[i + n].t;
}
if (patchType == Far::PatchDescriptor::REGULAR) {
ispc::evalBSpline(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
ispcDstDesc, dst, ispcDuDesc, du, ispcDvDesc, dv);
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
ispc::evalGregory(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
ispcDstDesc, dst, ispcDuDesc, du, ispcDvDesc, dv);
} else if (patchType == Far::PatchDescriptor::QUADS) {
ispc::evalBilinear(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
ispcDstDesc, dst, ispcDuDesc, du, ispcDvDesc, dv);
} else {
assert(0);
}
i += nCoord;
ispcDstDesc.offset = dstDesc.offset + i * dstDesc.stride;
ispcDuDesc.offset = duDesc.offset + i * duDesc.stride;
ispcDvDesc.offset = dvDesc.offset + i * dvDesc.stride;
}
});
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv

View File

@ -0,0 +1,482 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OPENSUBDIV3_OSD_ISPC_EVALUATOR_H
#define OPENSUBDIV3_OSD_ISPC_EVALUATOR_H
#include "../version.h"
#include <cstddef>
#include <vector>
#include "../osd/bufferDescriptor.h"
#include "../osd/types.h"
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
class IspcEvaluator {
public:
/// ----------------------------------------------------------------------
///
/// Stencil evaluations with StencilTable
///
/// ----------------------------------------------------------------------
/// \brief Generic static eval stencils function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way from OsdMesh template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the cpu kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
STENCIL_TABLE const *stencilTable,
const IspcEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
if (stencilTable->GetNumStencils() == 0)
return false;
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function which takes raw CPU pointers for
/// input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
/// to apply for the range [start, end)
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
int start, int end);
/// \brief Generic static eval stencils function with derivatives.
/// This function has a same signature as other device kernels
/// have so that it can be called in the same way from OsdMesh
/// template interface.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dvBuffer Output V-derivative buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param stencilTable Far::StencilTable or equivalent
///
/// @param instance not used in the cpu kernel
/// (declared as a typed pointer to prevent
/// undesirable template resolution)
///
/// @param deviceContext not used in the cpu kernel
///
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
static bool EvalStencils(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
STENCIL_TABLE const *stencilTable,
const IspcEvaluator *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
&stencilTable->GetSizes()[0],
&stencilTable->GetOffsets()[0],
&stencilTable->GetControlIndices()[0],
&stencilTable->GetWeights()[0],
&stencilTable->GetDuWeights()[0],
&stencilTable->GetDvWeights()[0],
/*start = */ 0,
/*end = */ stencilTable->GetNumStencils());
}
/// \brief Static eval stencils function with derivatives, which takes
/// raw CPU pointers for input and output.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the output buffer
///
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the output buffer
///
/// @param sizes pointer to the sizes buffer of the stencil table
///
/// @param offsets pointer to the offsets buffer of the stencil table
///
/// @param indices pointer to the indices buffer of the stencil table
///
/// @param weights pointer to the weights buffer of the stencil table
///
/// @param duWeights pointer to the du-weights buffer of the stencil table
///
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
///
/// @param start start index of stencil table
///
/// @param end end index of stencil table
///
static bool EvalStencils(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
const int * sizes,
const int * offsets,
const int * indices,
const float * weights,
const float * duWeights,
const float * dvWeights,
int start, int end);
/// ----------------------------------------------------------------------
///
/// Limit evaluations with PatchTable
///
/// ----------------------------------------------------------------------
/// \brief Generic limit eval function. This function has a same
/// signature as other device kernels have so that it can be called
/// in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
IspcEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Generic limit eval function with derivatives. This function has
/// a same signature as other device kernels have so that it can be
/// called in the same way.
///
/// @param srcBuffer Input primvar buffer.
/// must have BindCpuBuffer() method returning a
/// const float pointer for read
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dstBuffer Output primvar buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param duBuffer Output U-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param duDesc vertex buffer descriptor for the duBuffer
///
/// @param dvBuffer Output V-derivatives buffer
/// must have BindCpuBuffer() method returning a
/// float pointer for write
///
/// @param dvDesc vertex buffer descriptor for the dvBuffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchTable CpuPatchTable or equivalent
/// XXX: currently Far::PatchTable can't be used
/// due to interface mismatch
///
/// @param instance not used in the cpu evaluator
///
/// @param deviceContext not used in the cpu evaluator
///
template <typename SRC_BUFFER, typename DST_BUFFER,
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
static bool EvalPatches(
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
PATCHCOORD_BUFFER *patchCoords,
PATCH_TABLE *patchTable,
IspcEvaluator const *instance = NULL,
void * deviceContext = NULL) {
(void)instance; // unused
(void)deviceContext; // unused
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
// ideally all buffer classes should have templated by datatype
// so that downcast isn't needed there.
// (e.g. Osd::CpuBuffer<PatchCoord> )
//
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
dstBuffer->BindCpuBuffer(), dstDesc,
duBuffer->BindCpuBuffer(), duDesc,
dvBuffer->BindCpuBuffer(), dvDesc,
numPatchCoords,
(const PatchCoord*)patchCoords->BindCpuBuffer(),
patchTable->GetPatchArrayBuffer(),
patchTable->GetPatchIndexBuffer(),
patchTable->GetPatchParamBuffer());
}
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
int numPatchCoords,
const PatchCoord *patchCoords,
const PatchArray *patchArrays,
const int *patchIndexBuffer,
const PatchParam *patchParamBuffer);
/// \brief Static limit eval function. It takes an array of PatchCoord
/// and evaluate limit values on given PatchTable.
///
/// @param src Input primvar pointer. An offset of srcDesc
/// will be applied internally (i.e. the pointer
/// should not include the offset)
///
/// @param srcDesc vertex buffer descriptor for the input buffer
///
/// @param dst Output primvar pointer. An offset of dstDesc
/// will be applied internally.
///
/// @param dstDesc vertex buffer descriptor for the output buffer
///
/// @param du Output U-derivatives pointer. An offset of
/// duDesc will be applied internally.
///
/// @param duDesc vertex buffer descriptor for the du buffer
///
/// @param dv Output V-derivatives pointer. An offset of
/// dvDesc will be applied internally.
///
/// @param dvDesc vertex buffer descriptor for the dv buffer
///
/// @param numPatchCoords number of patchCoords.
///
/// @param patchCoords array of locations to be evaluated.
///
/// @param patchArrays an array of Osd::PatchArray struct
/// indexed by PatchCoord::arrayIndex
///
/// @param patchIndexBuffer an array of patch indices
/// indexed by PatchCoord::vertIndex
///
/// @param patchParamBuffer an array of Osd::PatchParam struct
/// indexed by PatchCoord::patchIndex
///
static bool EvalPatches(
const float *src, BufferDescriptor const &srcDesc,
float *dst, BufferDescriptor const &dstDesc,
float *du, BufferDescriptor const &duDesc,
float *dv, BufferDescriptor const &dvDesc,
int numPatchCoords,
PatchCoord const *patchCoords,
PatchArray const *patchArrays,
const int *patchIndexBuffer,
PatchParam const *patchParamBuffer);
/// ----------------------------------------------------------------------
///
/// Other methods
///
/// ----------------------------------------------------------------------
/// \brief synchronize all asynchronous computation invoked on this device.
static void Synchronize(void * /*deviceContext = NULL*/) {
// nothing.
}
};
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;
} // end namespace OpenSubdiv
#endif // OPENSUBDIV3_OSD_CPU_EVALUATOR_H