mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2024-11-09 22:00:06 +00:00
Add ISPC limit surface evaluation
This commit is contained in:
parent
bd7b017c02
commit
d3f8725e79
@ -197,6 +197,8 @@ if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANGCC OR CMAKE_COMPILER_IS_IC
|
||||
endif()
|
||||
|
||||
endforeach()
|
||||
|
||||
list(APPEND OSD_COMPILER_FLAGS -std=c++11)
|
||||
endif()
|
||||
|
||||
elseif(MSVC)
|
||||
@ -321,6 +323,9 @@ endif()
|
||||
if(NOT NO_TBB)
|
||||
find_package(TBB 4.0)
|
||||
endif()
|
||||
if(NOT NO_ISPC)
|
||||
find_package(ISPC 1.6)
|
||||
endif()
|
||||
if (NOT NO_OPENGL)
|
||||
find_package(OpenGL)
|
||||
endif()
|
||||
@ -539,6 +544,12 @@ if (NOT NO_MAYA)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ISPC_FOUND)
|
||||
add_definitions(
|
||||
-DOPENSUBDIV_HAS_ISPC
|
||||
)
|
||||
endif()
|
||||
|
||||
# Link examples & regressions dynamically against Osd
|
||||
set( OSD_LINK_TARGET osd_dynamic_cpu osd_dynamic_gpu )
|
||||
|
||||
|
94
cmake/FindISPC.cmake
Normal file
94
cmake/FindISPC.cmake
Normal file
@ -0,0 +1,94 @@
|
||||
#
|
||||
# Copyright 2013 Pixar
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
# with the following modification; you may not use this file except in
|
||||
# compliance with the Apache License and the following modification to it:
|
||||
# Section 6. Trademarks. is deleted and replaced with:
|
||||
#
|
||||
# 6. Trademarks. This License does not grant permission to use the trade
|
||||
# names, trademarks, service marks, or product names of the Licensor
|
||||
# and its affiliates, except as required to comply with Section 4(c) of
|
||||
# the License and to reproduce the content of the NOTICE file.
|
||||
#
|
||||
# You may obtain a copy of the Apache License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the Apache License with the above modification is
|
||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the Apache License for the specific
|
||||
# language governing permissions and limitations under the Apache License.
|
||||
#
|
||||
|
||||
# - Try to find Intel's ISPC
|
||||
# Once done this will define
|
||||
#
|
||||
# ISPC_FOUND - System has ISPC
|
||||
# ISPC_DIR - The ISPC directory
|
||||
|
||||
# Obtain ISPC directory
|
||||
if (WIN32)
|
||||
#NOT IMPLEMENTED
|
||||
elseif (APPLE)
|
||||
#NOT IMPLEMENTED
|
||||
else ()
|
||||
find_path(ISPC_DIR
|
||||
NAMES
|
||||
ispc
|
||||
PATHS
|
||||
${ISPC_LOCATION}
|
||||
NO_DEFAULT_PATH NO_SYSTEM_ENVIRONMENT_PATH
|
||||
DOC "The directory where ISPC reside")
|
||||
endif ()
|
||||
|
||||
if (ISPC_DIR)
|
||||
execute_process(COMMAND ${ISPC_DIR}/ispc --version OUTPUT_VARIABLE ISPC_VERSION)
|
||||
string(REGEX MATCH "[0-9].[0-9].[0-9]" ISPC_VERSION ${ISPC_VERSION})
|
||||
endif ()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
find_package_handle_standard_args(ISPC
|
||||
REQUIRED_VARS
|
||||
ISPC_DIR
|
||||
VERSION_VAR
|
||||
ISPC_VERSION
|
||||
)
|
||||
|
||||
mark_as_advanced( ISPC_DIR )
|
||||
|
||||
MACRO (ispc_compile)
|
||||
|
||||
SET(ISPC_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/osd_ispc_obj.dir)
|
||||
|
||||
SET(ISPC_OBJECTS "")
|
||||
|
||||
FOREACH(src ${ARGN})
|
||||
|
||||
GET_FILENAME_COMPONENT(fname ${src} NAME_WE)
|
||||
|
||||
SET(results "${ISPC_TARGET_DIR}/${fname}.dev.o")
|
||||
|
||||
ADD_CUSTOM_COMMAND(
|
||||
OUTPUT ${results} ${ISPC_TARGET_DIR}/${fname}_ispc.h
|
||||
COMMAND ${ISPC_DIR}/ispc
|
||||
--pic
|
||||
-O1
|
||||
--wno-perf
|
||||
--woff
|
||||
-h ${ISPC_TARGET_DIR}/${fname}_ispc.h
|
||||
-MMM ${ISPC_TARGET_DIR}/${fname}.dev.idep
|
||||
-o ${ISPC_TARGET_DIR}/${fname}.dev.o
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/${src}
|
||||
\;
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src}
|
||||
)
|
||||
|
||||
SET(ISPC_OBJECTS ${ISPC_OBJECTS} ${results})
|
||||
|
||||
ENDFOREACH()
|
||||
|
||||
ENDMACRO()
|
||||
|
@ -34,6 +34,10 @@ GLFWmonitor* g_primary=0;
|
||||
#include <osd/cpuGLVertexBuffer.h>
|
||||
#include <osd/mesh.h>
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_ISPC
|
||||
#include <osd/ispcEvaluator.h>
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
#include <osd/tbbEvaluator.h>
|
||||
#endif
|
||||
@ -104,7 +108,8 @@ enum KernelType { kCPU = 0,
|
||||
kCUDA = 3,
|
||||
kCL = 4,
|
||||
kGLXFB = 5,
|
||||
kGLCompute = 6 };
|
||||
kGLCompute = 6,
|
||||
kISPC = 7 };
|
||||
|
||||
enum EndCap { kEndCapBSplineBasis,
|
||||
kEndCapGregoryBasis };
|
||||
@ -169,10 +174,10 @@ float g_currentTime = 0;
|
||||
Stopwatch g_fpsTimer;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
int g_nParticles = 65536;
|
||||
int g_nParticles = 655360;
|
||||
|
||||
bool g_randomStart = true;//false;
|
||||
bool g_animParticles = true;
|
||||
bool g_animParticles = false;
|
||||
|
||||
GLuint g_samplesVAO=0;
|
||||
|
||||
@ -439,7 +444,9 @@ updateGeom() {
|
||||
assert(g_particles);
|
||||
|
||||
float elapsed = g_currentTime - g_prevTime;
|
||||
g_particles->Update(elapsed);
|
||||
if(elapsed != 0.0f) {
|
||||
g_particles->Update(elapsed);
|
||||
}
|
||||
g_prevTime = g_currentTime;
|
||||
|
||||
std::vector<OpenSubdiv::Osd::PatchCoord> const &patchCoords
|
||||
@ -464,7 +471,7 @@ updateGeom() {
|
||||
}
|
||||
|
||||
s.Stop();
|
||||
|
||||
|
||||
g_evalTime = float(s.GetElapsed());
|
||||
}
|
||||
|
||||
@ -648,8 +655,20 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable,
|
||||
&glComputeEvaluatorCache);
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#if defined(OPENSUBDIV_HAS_ISPC) && defined(OPENSUBDIV_HAS_TBB)
|
||||
else if(g_kernel == kISPC) {
|
||||
g_evalOutput = new EvalOutput<Osd::CpuGLVertexBuffer,
|
||||
Osd::CpuGLVertexBuffer,
|
||||
Far::StencilTable,
|
||||
Osd::CpuPatchTable,
|
||||
Osd::IspcEvaluator>
|
||||
(vertexStencils, varyingStencils,
|
||||
nCoarseVertices, nverts, g_nParticles, g_patchTable);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Create the 'uv particles' manager - this class manages the limit
|
||||
// location samples (ptex face index, (s,t) and updates them between frames.
|
||||
@ -875,7 +894,7 @@ display() {
|
||||
}
|
||||
|
||||
if (g_endCap != kEndCapBSplineBasis &&
|
||||
(g_kernel != kCPU && g_kernel != kOPENMP && g_kernel != kTBB)) {
|
||||
(g_kernel != kCPU && g_kernel != kOPENMP && g_kernel != kTBB && g_kernel != kISPC)) {
|
||||
static char msg[] =
|
||||
"ERROR: This kernel only supports BSpline basis patches.";
|
||||
g_hud.DrawString(g_width/4, g_height/4+20, 1, 0, 0, msg);
|
||||
@ -1129,6 +1148,9 @@ initHUD() {
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
g_hud.AddPullDownButton(compute_pulldown, "TBB", kTBB);
|
||||
#endif
|
||||
#if defined(OPENSUBDIV_HAS_ISPC) && defined(OPENSUBDIV_HAS_TBB)
|
||||
g_hud.AddPullDownButton(compute_pulldown, "ISPC", kISPC);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
|
||||
#endif
|
||||
|
@ -32,17 +32,17 @@
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
#include <tbb/parallel_for.h>
|
||||
#include <tbb/atomic.h>
|
||||
tbb::atomic<int> g_tbbCounter;
|
||||
|
||||
class TbbUpdateKernel {
|
||||
public:
|
||||
TbbUpdateKernel(float speed,
|
||||
STParticles::Position *positions,
|
||||
float *velocities,
|
||||
std::vector<STParticles::FaceInfo> const &adjacency,
|
||||
OpenSubdiv::Osd::PatchCoord *patchCoords,
|
||||
PatchHandleMap *patchHandleMap,
|
||||
OpenSubdiv::Far::PatchMap const *patchMap) :
|
||||
_speed(speed), _positions(positions), _velocities(velocities),
|
||||
_adjacency(adjacency), _patchCoords(patchCoords), _patchMap(patchMap) {
|
||||
_adjacency(adjacency), _patchHandleMap(patchHandleMap), _patchMap(patchMap) {
|
||||
}
|
||||
|
||||
void operator () (tbb::blocked_range<int> const &r) const {
|
||||
@ -76,9 +76,13 @@ public:
|
||||
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
|
||||
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
|
||||
if (handle) {
|
||||
int index = g_tbbCounter.fetch_and_add(1);
|
||||
_patchCoords[index] =
|
||||
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t);
|
||||
PatchHandleMap::accessor a;
|
||||
if( !_patchHandleMap->find(a, handle)) {
|
||||
_patchHandleMap->insert(a, handle);
|
||||
}
|
||||
std::vector<float> &st = a->second;
|
||||
st.push_back(p->s);
|
||||
st.push_back(p->t);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -87,7 +91,7 @@ private:
|
||||
STParticles::Position *_positions;
|
||||
float *_velocities;
|
||||
std::vector<STParticles::FaceInfo> const &_adjacency;
|
||||
OpenSubdiv::Osd::PatchCoord *_patchCoords;
|
||||
PatchHandleMap *_patchHandleMap;
|
||||
OpenSubdiv::Far::PatchMap const *_patchMap;
|
||||
};
|
||||
#endif
|
||||
@ -276,18 +280,36 @@ STParticles::Update(float deltaTime) {
|
||||
if (deltaTime == 0) return;
|
||||
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
|
||||
|
||||
_patchCoords.clear();
|
||||
|
||||
// XXX: this process should be parallelized.
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
|
||||
_patchCoords.resize((int)GetNumParticles());
|
||||
_patchHandleMap.clear();
|
||||
|
||||
TbbUpdateKernel kernel(speed, &_positions[0], &_velocities[0],
|
||||
_adjacency, &_patchCoords[0], _patchMap);;
|
||||
g_tbbCounter = 0;
|
||||
_adjacency, &_patchHandleMap, _patchMap);;
|
||||
tbb::blocked_range<int> range(0, GetNumParticles(), 256);
|
||||
tbb::parallel_for(range, kernel);
|
||||
_patchCoords.resize(g_tbbCounter);
|
||||
|
||||
|
||||
int nCoord = 0;
|
||||
for(PatchHandleMap::iterator i = _patchHandleMap.begin();
|
||||
i != _patchHandleMap.end();
|
||||
i ++) {
|
||||
nCoord += (i->second.size() / 2);
|
||||
}
|
||||
|
||||
_patchCoords.resize(nCoord);
|
||||
|
||||
int index = 0;
|
||||
for(PatchHandleMap::iterator i = _patchHandleMap.begin();
|
||||
i != _patchHandleMap.end();
|
||||
i ++) {
|
||||
for(int j = 0; j < i->second.size(); j += 2) {
|
||||
_patchCoords[index].handle = *(i->first);
|
||||
_patchCoords[index].s = i->second[j];
|
||||
_patchCoords[index].t = i->second[j+1];
|
||||
index ++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
Position * p = &_positions[0];
|
||||
float * dp = &_velocities[0];
|
||||
@ -323,7 +345,7 @@ STParticles::Update(float deltaTime) {
|
||||
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
// Dump adjacency info
|
||||
|
@ -30,6 +30,11 @@
|
||||
#include <osd/types.h>
|
||||
#include <iostream>
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
#include <tbb/concurrent_hash_map.h>
|
||||
typedef tbb::concurrent_hash_map< OpenSubdiv::Far::PatchTable::PatchHandle const*, std::vector<float> > PatchHandleMap;
|
||||
#endif
|
||||
|
||||
//
|
||||
// In order to emphasize the dynamic nature of the EvalLimit API, where the
|
||||
// locations can be arbitrarily updated before each evaluation, the glEvalLimit
|
||||
@ -142,7 +147,7 @@ public:
|
||||
return _velocities;
|
||||
}
|
||||
|
||||
std::vector<OpenSubdiv::Osd::PatchCoord> GetPatchCoords() const {
|
||||
std::vector<OpenSubdiv::Osd::PatchCoord> const &GetPatchCoords() const {
|
||||
return _patchCoords;
|
||||
}
|
||||
|
||||
@ -159,6 +164,10 @@ private:
|
||||
std::vector<Position> _positions;
|
||||
|
||||
std::vector<float> _velocities;
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
PatchHandleMap _patchHandleMap;
|
||||
#endif
|
||||
|
||||
std::vector<OpenSubdiv::Osd::PatchCoord> _patchCoords;
|
||||
|
||||
|
@ -147,9 +147,16 @@ if (NOT NO_LIB)
|
||||
)
|
||||
set_target_properties(osd_static_cpu PROPERTIES OUTPUT_NAME osdCPU CLEAN_DIRECT_OUTPUT 1)
|
||||
|
||||
target_link_libraries(osd_static_cpu
|
||||
${PLATFORM_CPU_LIBRARIES}
|
||||
)
|
||||
if( ISPC_FOUND)
|
||||
target_link_libraries(osd_static_cpu
|
||||
osd_ispc_obj
|
||||
${PLATFORM_CPU_LIBRARIES}
|
||||
)
|
||||
else()
|
||||
target_link_libraries(osd_static_cpu
|
||||
${PLATFORM_CPU_LIBRARIES}
|
||||
)
|
||||
endif()
|
||||
|
||||
install( TARGETS osd_static_cpu DESTINATION "${CMAKE_LIBDIR_BASE}" )
|
||||
|
||||
@ -200,9 +207,16 @@ if (NOT NO_LIB)
|
||||
)
|
||||
endif()
|
||||
|
||||
target_link_libraries(osd_dynamic_cpu
|
||||
${PLATFORM_CPU_LIBRARIES}
|
||||
)
|
||||
if ( ISPC_FOUND)
|
||||
target_link_libraries(osd_dynamic_cpu
|
||||
osd_ispc_obj
|
||||
${PLATFORM_CPU_LIBRARIES}
|
||||
)
|
||||
else()
|
||||
target_link_libraries(osd_dynamic_cpu
|
||||
${PLATFORM_CPU_LIBRARIES}
|
||||
)
|
||||
endif()
|
||||
|
||||
install( TARGETS osd_dynamic_cpu LIBRARY DESTINATION "${CMAKE_LIBDIR_BASE}" )
|
||||
|
||||
|
@ -116,6 +116,15 @@ struct PatchParam {
|
||||
///
|
||||
void Normalize( float & u, float & v ) const;
|
||||
|
||||
/// This function is the reverse operation of function Normalize()
|
||||
/// The (u,v) pair is converted from patch sub-parametric space to control
|
||||
/// face parametric space.
|
||||
///
|
||||
/// @param u u parameter
|
||||
/// @param v v parameter
|
||||
///
|
||||
void Denormalize( float & u, float & v) const;
|
||||
|
||||
unsigned int field0:32;
|
||||
unsigned int field1:32;
|
||||
};
|
||||
@ -161,6 +170,20 @@ PatchParam::Normalize( float & u, float & v ) const {
|
||||
v = (v - pv) / frac;
|
||||
}
|
||||
|
||||
inline void
|
||||
PatchParam::Denormalize( float & u, float & v ) const {
|
||||
|
||||
float frac = GetParamFraction();
|
||||
|
||||
// top left corner
|
||||
float pu = (float)GetU()*frac;
|
||||
float pv = (float)GetV()*frac;
|
||||
|
||||
// normalize u,v coordinates
|
||||
u = u * frac + pu;
|
||||
v = v * frac + pv;
|
||||
}
|
||||
|
||||
} // end namespace Far
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
|
@ -68,6 +68,12 @@ public:
|
||||
Index arrayIndex, // Array index of the patch
|
||||
patchIndex, // Absolute Index of the patch
|
||||
vertIndex; // Relative offset to the first CV of the patch in array
|
||||
|
||||
bool isEqual(const PatchHandle &other) {
|
||||
return other.arrayIndex == arrayIndex &&
|
||||
other.patchIndex == patchIndex &&
|
||||
other.vertIndex == vertIndex;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# source & headers
|
||||
|
||||
set(CPU_SOURCE_FILES
|
||||
cpuEvaluator.cpp
|
||||
cpuKernel.cpp
|
||||
@ -33,8 +34,12 @@ set(CPU_SOURCE_FILES
|
||||
cpuVertexBuffer.cpp
|
||||
)
|
||||
|
||||
set(GPU_SOURCE_FILES )
|
||||
if( ISPC_FOUND)
|
||||
list(APPEND CPU_SOURCE_FILES ispcEvaluator.cpp)
|
||||
endif()
|
||||
|
||||
set(GPU_SOURCE_FILES )
|
||||
set(ISPC_SOURCE_FILES )
|
||||
set(INC_FILES )
|
||||
|
||||
set(PRIVATE_HEADER_FILES
|
||||
@ -296,6 +301,17 @@ if( CUDA_FOUND )
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if( ISPC_FOUND)
|
||||
list(APPEND ISPC_SOURCE_FILES
|
||||
ispcEvalLimitKernel.ispc
|
||||
)
|
||||
|
||||
# Compile ISPC code to objs
|
||||
ispc_compile(${ISPC_SOURCE_FILES})
|
||||
ADD_LIBRARY(osd_ispc_obj STATIC ${ISPC_OBJECTS})
|
||||
SET_TARGET_PROPERTIES(osd_ispc_obj PROPERTIES LINKER_LANGUAGE C)
|
||||
endif()
|
||||
|
||||
list(APPEND DOXY_HEADER_FILES ${CUDA_PUBLIC_HEADERS})
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
|
880
opensubdiv/osd/ispcEvalLimitKernel.ispc
Normal file
880
opensubdiv/osd/ispcEvalLimitKernel.ispc
Normal file
@ -0,0 +1,880 @@
|
||||
//
|
||||
// Copyright 2013 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
|
||||
#define MAX_CHANNEL 4
|
||||
|
||||
struct BufferDescriptor {
|
||||
int offset; // offset to desired element data
|
||||
int length; // number or length of the data
|
||||
int stride; // stride to the next element
|
||||
};
|
||||
|
||||
struct Point {
|
||||
float x;
|
||||
float y;
|
||||
float z;
|
||||
};
|
||||
|
||||
inline struct Point operator+(struct Point a, struct Point b) {
|
||||
struct Point result;
|
||||
result.x = a.x + b.x;
|
||||
result.y = a.y + b.y;
|
||||
result.z = a.z + b.z;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uniform struct Point operator+(uniform struct Point a, uniform struct Point b) {
|
||||
uniform struct Point result;
|
||||
result.x = a.x + b.x;
|
||||
result.y = a.y + b.y;
|
||||
result.z = a.z + b.z;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline struct Point operator-(struct Point a, struct Point b) {
|
||||
struct Point result;
|
||||
result.x = a.x - b.x;
|
||||
result.y = a.y - b.y;
|
||||
result.z = a.z - b.z;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uniform struct Point operator-(uniform struct Point a, uniform struct Point b) {
|
||||
uniform struct Point result;
|
||||
result.x = a.x - b.x;
|
||||
result.y = a.y - b.y;
|
||||
result.z = a.z - b.z;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline struct Point operator*(struct Point a, float b) {
|
||||
struct Point result;
|
||||
result.x = a.x * b;
|
||||
result.y = a.y * b;
|
||||
result.z = a.z * b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uniform struct Point operator*(uniform struct Point a, uniform float b) {
|
||||
uniform struct Point result;
|
||||
result.x = a.x * b;
|
||||
result.y = a.y * b;
|
||||
result.z = a.z * b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline struct Point operator*(float b, struct Point a) {
|
||||
struct Point result;
|
||||
result.x = b * a.x;
|
||||
result.y = b * a.y;
|
||||
result.z = b * a.z;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uniform struct Point operator*(uniform float b, uniform struct Point a) {
|
||||
uniform struct Point result;
|
||||
result.x = b * a.x;
|
||||
result.y = b * a.y;
|
||||
result.z = b * a.z;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline struct Point operator/(struct Point a, float b) {
|
||||
struct Point result;
|
||||
result.x = a.x / b;
|
||||
result.y = a.y / b;
|
||||
result.z = a.z / b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline uniform struct Point operator/(uniform struct Point a, uniform float b) {
|
||||
uniform struct Point result;
|
||||
result.x = a.x / b;
|
||||
result.y = a.y / b;
|
||||
result.z = a.z / b;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void cross(struct Point &a, struct Point &b, struct Point &c)
|
||||
{
|
||||
c.x = a.y*b.z - a.z*b.y;
|
||||
c.y = a.z*b.x - a.x*b.z;
|
||||
c.z = a.x*b.y - a.y*b.x;
|
||||
}
|
||||
|
||||
inline uniform bool
|
||||
nonQuadRoot(uniform unsigned int bitField)
|
||||
{
|
||||
return (bitField >> 3) & 0x1;
|
||||
}
|
||||
|
||||
inline uniform unsigned int getU(uniform unsigned int bitField)
|
||||
{
|
||||
return (uniform unsigned int)((bitField >> 22) & 0x3ff);
|
||||
}
|
||||
|
||||
inline uniform unsigned int getV(uniform unsigned int bitField)
|
||||
{
|
||||
return (uniform unsigned int)((bitField >> 12) & 0x3ff);
|
||||
}
|
||||
|
||||
inline uniform unsigned int getBoundary(uniform unsigned int bitField)
|
||||
{
|
||||
return (uniform unsigned int)((bitField >> 8) & 0xf);
|
||||
}
|
||||
|
||||
inline uniform unsigned int getDepth(uniform unsigned int bitField)
|
||||
{
|
||||
return (uniform unsigned int)(bitField & 0xf);
|
||||
}
|
||||
|
||||
inline uniform float
|
||||
getParamFraction(uniform unsigned int bitField){
|
||||
if (nonQuadRoot(bitField)) {
|
||||
return 1.0f / (1 << (getDepth(bitField)-1));
|
||||
} else {
|
||||
return 1.0f / (1 << getDepth(bitField));
|
||||
}
|
||||
}
|
||||
|
||||
inline void
|
||||
adjustBoundaryWeights(uniform unsigned int bitField,
|
||||
float sWeights[4],
|
||||
float tWeights[4]) {
|
||||
|
||||
uniform int boundary = getBoundary(bitField);
|
||||
|
||||
if (boundary & 1) {
|
||||
tWeights[2] -= tWeights[0];
|
||||
tWeights[1] += 2*tWeights[0];
|
||||
tWeights[0] = 0;
|
||||
}
|
||||
if (boundary & 2) {
|
||||
sWeights[1] -= sWeights[3];
|
||||
sWeights[2] += 2*sWeights[3];
|
||||
sWeights[3] = 0;
|
||||
}
|
||||
if (boundary & 4) {
|
||||
tWeights[1] -= tWeights[3];
|
||||
tWeights[2] += 2*tWeights[3];
|
||||
tWeights[3] = 0;
|
||||
}
|
||||
if (boundary & 8) {
|
||||
sWeights[2] -= sWeights[0];
|
||||
sWeights[1] += 2*sWeights[0];
|
||||
sWeights[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline void
|
||||
getBSplineWeights(float t, float point[4], float deriv[4]) {
|
||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
||||
float const one6th = 1.0f / 6.0f;
|
||||
|
||||
float t2 = t * t;
|
||||
float t3 = t * t2;
|
||||
|
||||
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
||||
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
||||
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
||||
point[3] = one6th * ( t3);
|
||||
|
||||
// Derivatives of the above four basis functions at t:
|
||||
deriv[0] = -0.5f*t2 + t - 0.5f;
|
||||
deriv[1] = 1.5f*t2 - 2.0f*t;
|
||||
deriv[2] = -1.5f*t2 + t + 0.5f;
|
||||
deriv[3] = 0.5f*t2;
|
||||
}
|
||||
|
||||
inline void
|
||||
getBezierWeights(float t, float point[4], float deriv[4]) {
|
||||
// The four uniform cubic Bezier basis functions (in terms of t and its
|
||||
// complement tC) evaluated at t:
|
||||
float t2 = t*t;
|
||||
float tC = 1.0f - t;
|
||||
float tC2 = tC * tC;
|
||||
|
||||
point[0] = tC2 * tC;
|
||||
point[1] = tC2 * t * 3.0f;
|
||||
point[2] = t2 * tC * 3.0f;
|
||||
point[3] = t2 * t;
|
||||
|
||||
// Derivatives of the above four basis functions at t:
|
||||
deriv[0] = -3.0f * tC2;
|
||||
deriv[1] = 9.0f * t2 - 12.0f * t + 3.0f;
|
||||
deriv[2] = -9.0f * t2 + 6.0f * t;
|
||||
deriv[3] = 3.0f * t2;
|
||||
}
|
||||
|
||||
inline void
|
||||
getBSplineWeightsNoDerivative(float t, float point[4]) {
|
||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
||||
float const one6th = 1.0f / 6.0f;
|
||||
|
||||
float t2 = t * t;
|
||||
float t3 = t * t2;
|
||||
|
||||
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
||||
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
||||
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
||||
point[3] = one6th * ( t3);
|
||||
}
|
||||
|
||||
inline void
|
||||
getBezierWeightsNoDerivative(float t, float point[4]) {
|
||||
// The four uniform cubic Bezier basis functions (in terms of t and its
|
||||
// complement tC) evaluated at t:
|
||||
float t2 = t*t;
|
||||
float tC = 1.0f - t;
|
||||
float tC2 = tC * tC;
|
||||
|
||||
point[0] = tC2 * tC;
|
||||
point[1] = tC2 * t * 3.0f;
|
||||
point[2] = t2 * tC * 3.0f;
|
||||
point[3] = t2 * t;
|
||||
}
|
||||
|
||||
export void
|
||||
evalBilinear(uniform unsigned int bitField,
|
||||
uniform int nPoint,
|
||||
uniform const float * uniform u,
|
||||
uniform const float * uniform v,
|
||||
uniform const int * uniform vertexIndices,
|
||||
uniform const BufferDescriptor &inDesc,
|
||||
uniform const float * uniform inQ,
|
||||
uniform const BufferDescriptor &outDesc,
|
||||
uniform float *uniform outQ,
|
||||
uniform const BufferDescriptor &duDesc,
|
||||
uniform float *uniform outDQU,
|
||||
uniform const BufferDescriptor &dvDesc,
|
||||
uniform float *uniform outDQV)
|
||||
{
|
||||
uniform int nChannel = inDesc.length / 3;
|
||||
assert(nChannel < MAX_CHANNEL);
|
||||
|
||||
uniform Point controlVertices[MAX_CHANNEL*4];
|
||||
for(uniform int i=0; i<4; i++) {
|
||||
uniform unsigned int id = vertexIndices[i];
|
||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 4 + i;
|
||||
controlVertices[offset].x = pVertex[0];
|
||||
controlVertices[offset].y = pVertex[1];
|
||||
controlVertices[offset].z = pVertex[2];
|
||||
pVertex += 3;
|
||||
}
|
||||
}
|
||||
|
||||
foreach( n = 0 ... nPoint) {
|
||||
float ou = 1.0f - u[n];
|
||||
float ov = 1.0f - v[n];
|
||||
float w[4] = { ov*ou, v[n]*ou, v[n]*u[n], ov*u[n] };
|
||||
|
||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
Point Q;
|
||||
Q.x = Q.y = Q.z = 0.0;
|
||||
for (uniform int i=0; i<4; ++i) {
|
||||
Q = Q + w[i] * controlVertices[c * 4 + i];
|
||||
}
|
||||
|
||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
||||
}
|
||||
}
|
||||
|
||||
uniform Point dU[MAX_CHANNEL], dV[MAX_CHANNEL];
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
dU[c] = 0.5 * (controlVertices[c * 4 + 3] - controlVertices[c * 4 + 0] +
|
||||
controlVertices[c * 4 + 2] - controlVertices[c * 4 + 1] );
|
||||
|
||||
dV[c] = 0.5 * (controlVertices[c * 4 + 1] - controlVertices[c * 4 + 0] +
|
||||
controlVertices[c * 4 + 2] - controlVertices[c * 4 + 3] );
|
||||
}
|
||||
|
||||
foreach( n = 0 ... nPoint) {
|
||||
float *pOutDQU = outDQU + duDesc.offset + n * duDesc.stride;
|
||||
float *pOutDQV = outDQV + dvDesc.offset + n * dvDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
*pOutDQU ++ = dU[c].x, *pOutDQU ++ = dU[c].y, *pOutDQU ++ = dU[c].z;
|
||||
*pOutDQV ++ = dV[c].x, *pOutDQV ++ = dV[c].y, *pOutDQV ++ = dV[c].z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export void
|
||||
evalBilinearNoDerivative(uniform unsigned int bitField,
|
||||
uniform int nPoint,
|
||||
uniform const float * uniform u,
|
||||
uniform const float * uniform v,
|
||||
uniform const int * uniform vertexIndices,
|
||||
uniform const BufferDescriptor &inDesc,
|
||||
uniform const float * uniform inQ,
|
||||
uniform const BufferDescriptor &outDesc,
|
||||
uniform float *uniform outQ)
|
||||
{
|
||||
uniform int nChannel = inDesc.length / 3;
|
||||
assert(nChannel < MAX_CHANNEL);
|
||||
|
||||
uniform Point controlVertices[MAX_CHANNEL*4];
|
||||
for(uniform int i=0; i<4; i++) {
|
||||
uniform unsigned int id = vertexIndices[i];
|
||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 4 + i;
|
||||
controlVertices[offset].x = pVertex[0];
|
||||
controlVertices[offset].y = pVertex[1];
|
||||
controlVertices[offset].z = pVertex[2];
|
||||
pVertex += 3;
|
||||
}
|
||||
}
|
||||
|
||||
foreach( n = 0 ... nPoint) {
|
||||
float ou = 1.0f - u[n];
|
||||
float ov = 1.0f - v[n];
|
||||
float w[4] = { ov*ou, v[n]*ou, v[n]*u[n], ov*u[n] };
|
||||
|
||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
Point Q;
|
||||
Q.x = Q.y = Q.z = 0.0;
|
||||
for (uniform int i=0; i<4; ++i) {
|
||||
Q = Q + w[i] * controlVertices[c * 4 + i];
|
||||
}
|
||||
|
||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export void
|
||||
evalBSpline(uniform unsigned int bitField,
|
||||
uniform int nPoint,
|
||||
uniform const float * uniform u,
|
||||
uniform const float * uniform v,
|
||||
uniform const int * uniform vertexIndices,
|
||||
uniform const BufferDescriptor &inDesc,
|
||||
uniform const float * uniform inQ,
|
||||
uniform const BufferDescriptor &outDesc,
|
||||
uniform float *uniform outQ,
|
||||
uniform const BufferDescriptor &duDesc,
|
||||
uniform float *uniform outDQU,
|
||||
uniform const BufferDescriptor &dvDesc,
|
||||
uniform float *uniform outDQV)
|
||||
{
|
||||
uniform int nChannel = inDesc.length / 3;
|
||||
assert(nChannel < MAX_CHANNEL);
|
||||
|
||||
uniform Point controlVertices[MAX_CHANNEL*16];
|
||||
for(uniform int i=0; i<16; i++) {
|
||||
uniform unsigned int id = vertexIndices[i];
|
||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 16 + i;
|
||||
controlVertices[offset].x = pVertex[0];
|
||||
controlVertices[offset].y = pVertex[1];
|
||||
controlVertices[offset].z = pVertex[2];
|
||||
pVertex += 3;
|
||||
}
|
||||
}
|
||||
|
||||
uniform float dScale = (uniform float)(1 << getDepth(bitField));
|
||||
|
||||
uniform float frac = getParamFraction(bitField);
|
||||
|
||||
// top left corner
|
||||
uniform float pu = (uniform float)getU(bitField)*frac;
|
||||
uniform float pv = (uniform float)getV(bitField)*frac;
|
||||
|
||||
foreach( n = 0 ... nPoint) {
|
||||
// normalize u,v coordinates
|
||||
float s = (u[n] - pu) / frac;
|
||||
float t = (v[n] - pv) / frac;
|
||||
|
||||
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
|
||||
|
||||
getBSplineWeights(s, sWeights, dsWeights);
|
||||
getBSplineWeights(t, tWeights, dtWeights);
|
||||
|
||||
adjustBoundaryWeights(bitField, sWeights, tWeights);
|
||||
adjustBoundaryWeights(bitField, dsWeights, dtWeights);
|
||||
|
||||
float weight[16];
|
||||
for (uniform int i = 0; i < 4; ++i) {
|
||||
for (uniform int j = 0; j < 4; ++j) {
|
||||
weight[4*i+j] = sWeights[j] * tWeights[i];
|
||||
}
|
||||
}
|
||||
|
||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 16;
|
||||
Point Q;
|
||||
Q.x = Q.y = Q.z = 0.0;
|
||||
for (uniform int i=0; i<16; ++i) {
|
||||
Q = Q + weight[i] * controlVertices[offset + i];
|
||||
}
|
||||
|
||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
||||
}
|
||||
|
||||
float derivS[16], derivT[16];
|
||||
for (uniform int i = 0; i < 4; ++i) {
|
||||
for (uniform int j = 0; j < 4; ++j) {
|
||||
derivS[4*i+j] = dsWeights[j] * tWeights[i] * dScale;
|
||||
derivT[4*i+j] = sWeights[j] * dtWeights[i] * dScale;
|
||||
}
|
||||
}
|
||||
|
||||
float *pOutDQU = outDQU + duDesc.offset + n * duDesc.stride;
|
||||
float *pOutDQV = outDQV + dvDesc.offset + n * dvDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 16;
|
||||
Point DQU, DQV;
|
||||
DQU.x = DQU.y = DQU.z = 0.0;
|
||||
DQV.x = DQV.y = DQV.z = 0.0;
|
||||
for (uniform int i=0; i<16; ++i) {
|
||||
DQU = DQU + derivS[i] * controlVertices[offset + i];
|
||||
DQV = DQV + derivT[i] * controlVertices[offset + i];
|
||||
}
|
||||
|
||||
*pOutDQU ++ = DQU.x, *pOutDQU ++ = DQU.y, *pOutDQU ++ = DQU.z;
|
||||
*pOutDQV ++ = DQV.x, *pOutDQV ++ = DQV.y, *pOutDQV ++ = DQV.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export void
|
||||
evalBSplineNoDerivative(uniform unsigned int bitField,
|
||||
uniform int nPoint,
|
||||
uniform const float * uniform u,
|
||||
uniform const float * uniform v,
|
||||
uniform const int * uniform vertexIndices,
|
||||
uniform const BufferDescriptor &inDesc,
|
||||
uniform const float * uniform inQ,
|
||||
uniform const BufferDescriptor &outDesc,
|
||||
uniform float *uniform outQ)
|
||||
{
|
||||
uniform int nChannel = inDesc.length / 3;
|
||||
assert(nChannel < MAX_CHANNEL);
|
||||
|
||||
uniform Point controlVertices[MAX_CHANNEL*16];
|
||||
for(uniform int i=0; i<16; i++) {
|
||||
uniform unsigned int id = vertexIndices[i];
|
||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 16 + i;
|
||||
controlVertices[offset].x = pVertex[0];
|
||||
controlVertices[offset].y = pVertex[1];
|
||||
controlVertices[offset].z = pVertex[2];
|
||||
pVertex += 3;
|
||||
}
|
||||
}
|
||||
|
||||
uniform float frac = getParamFraction(bitField);
|
||||
|
||||
// top left corner
|
||||
uniform float pu = (uniform float)getU(bitField)*frac;
|
||||
uniform float pv = (uniform float)getV(bitField)*frac;
|
||||
|
||||
foreach( n = 0 ... nPoint) {
|
||||
// normalize u,v coordinates
|
||||
float s = (u[n] - pu) / frac;
|
||||
float t = (v[n] - pv) / frac;
|
||||
|
||||
float sWeights[4], tWeights[4];
|
||||
|
||||
getBSplineWeightsNoDerivative(s, sWeights);
|
||||
getBSplineWeightsNoDerivative(t, tWeights);
|
||||
|
||||
adjustBoundaryWeights(bitField, sWeights, tWeights);
|
||||
|
||||
float weight[16];
|
||||
for (uniform int i = 0; i < 4; ++i) {
|
||||
for (uniform int j = 0; j < 4; ++j) {
|
||||
weight[4*i+j] = sWeights[j] * tWeights[i];
|
||||
}
|
||||
}
|
||||
|
||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 16;
|
||||
Point Q;
|
||||
Q.x = Q.y = Q.z = 0.0;
|
||||
for (uniform int i=0; i<16; ++i) {
|
||||
Q = Q + weight[i] * controlVertices[offset + i];
|
||||
}
|
||||
|
||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void getGregoryWeights(uniform unsigned int bitField,
|
||||
float s, float t, float point[20], float deriv1[20], float deriv2[20]) {
|
||||
//
|
||||
// P3 e3- e2+ P2
|
||||
// 15------17-------11--------10
|
||||
// | | | |
|
||||
// | | | |
|
||||
// | | f3- | f2+ |
|
||||
// | 19 13 |
|
||||
// e3+ 16-----18 14-----12 e2-
|
||||
// | f3+ f2- |
|
||||
// | |
|
||||
// | |
|
||||
// | f0- f1+ |
|
||||
// e0- 2------4 8------6 e1+
|
||||
// | 3 9 |
|
||||
// | | f0+ | f1- |
|
||||
// | | | |
|
||||
// | | | |
|
||||
// O--------1--------7--------5
|
||||
// P0 e0+ e1- P1
|
||||
//
|
||||
|
||||
// Indices of boundary and interior points and their corresponding Bezier points
|
||||
// (this can be reduced with more direct indexing and unrolling of loops):
|
||||
//
|
||||
static uniform int const boundaryGregory[12] = { 0, 1, 7, 5, 2, 6, 16, 12, 15, 17, 11, 10 };
|
||||
static uniform int const boundaryBezSCol[12] = { 0, 1, 2, 3, 0, 3, 0, 3, 0, 1, 2, 3 };
|
||||
static uniform int const boundaryBezTRow[12] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 };
|
||||
|
||||
static uniform int const interiorGregory[8] = { 3, 4, 8, 9, 13, 14, 18, 19 };
|
||||
static uniform int const interiorBezSCol[8] = { 1, 1, 2, 2, 2, 2, 1, 1 };
|
||||
static uniform int const interiorBezTRow[8] = { 1, 1, 1, 1, 2, 2, 2, 2 };
|
||||
|
||||
//
|
||||
// Bezier basis functions are denoted with B while the rational multipliers for the
|
||||
// interior points will be denoted G -- so we have B(s), B(t) and G(s,t):
|
||||
//
|
||||
// Directional Bezier basis functions B at s and t:
|
||||
float Bs[4], Bds[4];
|
||||
float Bt[4], Bdt[4];
|
||||
|
||||
getBezierWeights(s, Bs, Bds);
|
||||
getBezierWeights(t, Bt, Bdt);
|
||||
|
||||
// Rational multipliers G at s and t:
|
||||
float sC = 1.0f - s;
|
||||
float tC = 1.0f - t;
|
||||
|
||||
// Use <= here to avoid compiler warnings -- the sums should always be non-negative:
|
||||
float df0 = s + t; df0 = (df0 <= 0.0f) ? 1.0f : (1.0f / df0);
|
||||
float df1 = sC + t; df1 = (df1 <= 0.0f) ? 1.0f : (1.0f / df1);
|
||||
float df2 = sC + tC; df2 = (df2 <= 0.0f) ? 1.0f : (1.0f / df2);
|
||||
float df3 = s + tC; df3 = (df3 <= 0.0f) ? 1.0f : (1.0f / df3);
|
||||
|
||||
float G[8] = { s*df0, t*df0, t*df1, sC*df1, sC*df2, tC*df2, tC*df3, s*df3 };
|
||||
|
||||
// Combined weights for boundary and interior points:
|
||||
for (uniform int i = 0; i < 12; ++i) {
|
||||
point[boundaryGregory[i]] = Bs[boundaryBezSCol[i]] * Bt[boundaryBezTRow[i]];
|
||||
}
|
||||
for (uniform int i = 0; i < 8; ++i) {
|
||||
point[interiorGregory[i]] = Bs[interiorBezSCol[i]] * Bt[interiorBezTRow[i]] * G[i];
|
||||
}
|
||||
|
||||
//
|
||||
// For derivatives, the basis functions for the interior points are rational and ideally
|
||||
// require appropriate differentiation, i.e. product rule for the combination of B and G
|
||||
// and the quotient rule for the rational G itself. As initially proposed by Loop et al
|
||||
// though, the approximation using the 16 Bezier points arising from the G(s,t) has
|
||||
// proved adequate (and is what the GPU shaders use) so we continue to use that here.
|
||||
//
|
||||
// An implementation of the true derivatives is provided for future reference -- it is
|
||||
// unclear if the approximations will hold up under surface analysis involving higher
|
||||
// order differentiation.
|
||||
//
|
||||
|
||||
// Remember to include derivative scaling in all assignments below:
|
||||
uniform float dScale = (uniform float)(1 << getDepth(bitField));
|
||||
|
||||
// Combined weights for boundary points -- simple (scaled) tensor products:
|
||||
for (uniform int i = 0; i < 12; ++i) {
|
||||
uniform int iDst = boundaryGregory[i];
|
||||
uniform int tRow = boundaryBezTRow[i];
|
||||
uniform int sCol = boundaryBezSCol[i];
|
||||
|
||||
deriv1[iDst] = Bds[sCol] * Bt[tRow] * dScale;
|
||||
deriv2[iDst] = Bdt[tRow] * Bs[sCol] * dScale;
|
||||
}
|
||||
|
||||
#define _USE_BEZIER_PSEUDO_DERIVATIVES
|
||||
#ifdef _USE_BEZIER_PSEUDO_DERIVATIVES
|
||||
// Approximation to the true Gregory derivatives by differentiating the Bezier patch
|
||||
// unique to the given (s,t), i.e. having F = (g^+ * f^+) + (g^- * f^-) as its four
|
||||
// interior points:
|
||||
//
|
||||
// Combined weights for interior points -- (scaled) tensor products with G+ or G-:
|
||||
for (uniform int i = 0; i < 8; ++i) {
|
||||
uniform int iDst = interiorGregory[i];
|
||||
uniform int tRow = interiorBezTRow[i];
|
||||
uniform int sCol = interiorBezSCol[i];
|
||||
deriv1[iDst] = Bds[sCol] * Bt[tRow] * G[i] * dScale;
|
||||
deriv2[iDst] = Bdt[tRow] * Bs[sCol] * G[i] * dScale;
|
||||
}
|
||||
#else
|
||||
// True Gregory derivatives using appropriate differentiation of composite functions:
|
||||
//
|
||||
// Note that for G(s,t) = N(s,t) / D(s,t), all N' and D' are trivial constants (which
|
||||
// simplifies things for higher order derivatives). And while each pair of functions
|
||||
// G (i.e. the G+ and G- corresponding to points f+ and f-) must sum to 1 to ensure
|
||||
// Bezier equivalence (when f+ = f-), the pairs of G' must similarly sum to 0. So we
|
||||
// can potentially compute only one of the pair and negate the result for the other
|
||||
// (and with 4 or 8 computations involving these constants, this is all very SIMD
|
||||
// friendly...) but for now we treat all 8 independently for simplicity.
|
||||
//
|
||||
//float N[8] = { s, t, t, sC, sC, tC, tC, s };
|
||||
uniform float D[8] = { df0, df0, df1, df1, df2, df2, df3, df3 };
|
||||
|
||||
static uniform float const Nds[8] = { 1.0f, 0.0f, 0.0f, -1.0f, -1.0f, 0.0f, 0.0f, 1.0f };
|
||||
static uniform float const Ndt[8] = { 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, -1.0f, -1.0f, 0.0f };
|
||||
|
||||
static uniform float const Dds[8] = { 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f };
|
||||
static uniform float const Ddt[8] = { 1.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f };
|
||||
|
||||
// Combined weights for interior points -- (scaled) combinations of B, B', G and G':
|
||||
for (uniform int i = 0; i < 8; ++i) {
|
||||
uniform int iDst = interiorGregory[i];
|
||||
uniform int tRow = interiorBezTRow[i];
|
||||
uniform int sCol = interiorBezSCol[i];
|
||||
|
||||
// Quotient rule for G' (re-expressed in terms of G to simplify (and D = 1/D)):
|
||||
float Gds = (Nds[i] - Dds[i] * G[i]) * D[i];
|
||||
float Gdt = (Ndt[i] - Ddt[i] * G[i]) * D[i];
|
||||
|
||||
// Product rule combining B and B' with G and G' (and scaled):
|
||||
deriv1[iDst] = (Bds[sCol] * G[i] + Bs[sCol] * Gds) * Bt[tRow] * dScale;
|
||||
deriv2[iDst] = (Bdt[tRow] * G[i] + Bt[tRow] * Gdt) * Bs[sCol] * dScale;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void getGregoryWeightsNoDerivative(uniform unsigned int bitField, float s, float t, float point[20]) {
|
||||
//
|
||||
// P3 e3- e2+ P2
|
||||
// 15------17-------11--------10
|
||||
// | | | |
|
||||
// | | | |
|
||||
// | | f3- | f2+ |
|
||||
// | 19 13 |
|
||||
// e3+ 16-----18 14-----12 e2-
|
||||
// | f3+ f2- |
|
||||
// | |
|
||||
// | |
|
||||
// | f0- f1+ |
|
||||
// e0- 2------4 8------6 e1+
|
||||
// | 3 9 |
|
||||
// | | f0+ | f1- |
|
||||
// | | | |
|
||||
// | | | |
|
||||
// O--------1--------7--------5
|
||||
// P0 e0+ e1- P1
|
||||
//
|
||||
|
||||
// Indices of boundary and interior points and their corresponding Bezier points
|
||||
// (this can be reduced with more direct indexing and unrolling of loops):
|
||||
//
|
||||
static uniform int const boundaryGregory[12] = { 0, 1, 7, 5, 2, 6, 16, 12, 15, 17, 11, 10 };
|
||||
static uniform int const boundaryBezSCol[12] = { 0, 1, 2, 3, 0, 3, 0, 3, 0, 1, 2, 3 };
|
||||
static uniform int const boundaryBezTRow[12] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 };
|
||||
|
||||
static uniform int const interiorGregory[8] = { 3, 4, 8, 9, 13, 14, 18, 19 };
|
||||
static uniform int const interiorBezSCol[8] = { 1, 1, 2, 2, 2, 2, 1, 1 };
|
||||
static uniform int const interiorBezTRow[8] = { 1, 1, 1, 1, 2, 2, 2, 2 };
|
||||
|
||||
//
|
||||
// Bezier basis functions are denoted with B while the rational multipliers for the
|
||||
// interior points will be denoted G -- so we have B(s), B(t) and G(s,t):
|
||||
//
|
||||
// Directional Bezier basis functions B at s and t:
|
||||
float Bs[4];
|
||||
float Bt[4];
|
||||
|
||||
getBezierWeightsNoDerivative(s, Bs);
|
||||
getBezierWeightsNoDerivative(t, Bt);
|
||||
|
||||
// Rational multipliers G at s and t:
|
||||
float sC = 1.0f - s;
|
||||
float tC = 1.0f - t;
|
||||
|
||||
// Use <= here to avoid compiler warnings -- the sums should always be non-negative:
|
||||
float df0 = s + t; df0 = (df0 <= 0.0f) ? 1.0f : (1.0f / df0);
|
||||
float df1 = sC + t; df1 = (df1 <= 0.0f) ? 1.0f : (1.0f / df1);
|
||||
float df2 = sC + tC; df2 = (df2 <= 0.0f) ? 1.0f : (1.0f / df2);
|
||||
float df3 = s + tC; df3 = (df3 <= 0.0f) ? 1.0f : (1.0f / df3);
|
||||
|
||||
float G[8] = { s*df0, t*df0, t*df1, sC*df1, sC*df2, tC*df2, tC*df3, s*df3 };
|
||||
|
||||
// Combined weights for boundary and interior points:
|
||||
for (uniform int i = 0; i < 12; ++i) {
|
||||
point[boundaryGregory[i]] = Bs[boundaryBezSCol[i]] * Bt[boundaryBezTRow[i]];
|
||||
}
|
||||
for (uniform int i = 0; i < 8; ++i) {
|
||||
point[interiorGregory[i]] = Bs[interiorBezSCol[i]] * Bt[interiorBezTRow[i]] * G[i];
|
||||
}
|
||||
}
|
||||
|
||||
export void
|
||||
evalGregory(uniform unsigned int bitField,
|
||||
uniform int nPoint,
|
||||
uniform float u[],
|
||||
uniform float v[],
|
||||
uniform const unsigned int vertexIndices[],
|
||||
uniform const BufferDescriptor &inDesc,
|
||||
uniform const float inQ[],
|
||||
uniform const BufferDescriptor &outDesc,
|
||||
uniform float outQ[],
|
||||
uniform const BufferDescriptor &duDesc,
|
||||
uniform float outDQU[],
|
||||
uniform const BufferDescriptor &dvDesc,
|
||||
uniform float outDQV[])
|
||||
{
|
||||
uniform int nChannel = inDesc.length / 3;
|
||||
assert(nChannel < MAX_CHANNEL);
|
||||
|
||||
uniform Point controlVertices[MAX_CHANNEL*20];
|
||||
for(uniform int i=0; i<20; i++) {
|
||||
uniform unsigned int id = vertexIndices[i];
|
||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 20 + i;
|
||||
controlVertices[offset].x = pVertex[0];
|
||||
controlVertices[offset].y = pVertex[1];
|
||||
controlVertices[offset].z = pVertex[2];
|
||||
pVertex += 3;
|
||||
}
|
||||
}
|
||||
|
||||
uniform float frac = getParamFraction(bitField);
|
||||
|
||||
// top left corner
|
||||
uniform float pu = (uniform float)getU(bitField)*frac;
|
||||
uniform float pv = (uniform float)getV(bitField)*frac;
|
||||
|
||||
foreach( n = 0 ... nPoint) {
|
||||
// normalize u,v coordinates
|
||||
float s = (u[n] - pu) / frac;
|
||||
float t = (v[n] - pv) / frac;
|
||||
|
||||
float point[20], deriv1[20], deriv2[20];
|
||||
getGregoryWeights(bitField, s, t, point, deriv1, deriv2);
|
||||
|
||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 16;
|
||||
Point Q;
|
||||
Q.x = Q.y = Q.z = 0.0;
|
||||
for (uniform int i=0; i<16; ++i) {
|
||||
Q = Q + point[i] * controlVertices[offset + i];
|
||||
}
|
||||
|
||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
||||
}
|
||||
|
||||
float *pOutDQU = outDQU + duDesc.offset + n * duDesc.stride;
|
||||
float *pOutDQV = outDQV + dvDesc.offset + n * dvDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 20;
|
||||
Point DQU, DQV;
|
||||
DQU.x = DQU.y = DQU.z = 0.0;
|
||||
DQV.x = DQV.y = DQV.z = 0.0;
|
||||
for (uniform int i=0; i<20; ++i) {
|
||||
DQU = DQU + deriv1[i] * controlVertices[offset + i];
|
||||
DQV = DQV + deriv2[i] * controlVertices[offset + i];
|
||||
}
|
||||
|
||||
*pOutDQU ++ = DQU.x, *pOutDQU ++ = DQU.y, *pOutDQU ++ = DQU.z;
|
||||
*pOutDQV ++ = DQV.x, *pOutDQV ++ = DQV.y, *pOutDQV ++ = DQV.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export void
|
||||
evalGregoryNoDerivative(uniform unsigned int bitField,
|
||||
uniform int nPoint,
|
||||
uniform float u[],
|
||||
uniform float v[],
|
||||
uniform const unsigned int vertexIndices[],
|
||||
uniform const BufferDescriptor &inDesc,
|
||||
uniform const float inQ[],
|
||||
uniform const BufferDescriptor &outDesc,
|
||||
uniform float outQ[]
|
||||
)
|
||||
{
|
||||
uniform int nChannel = inDesc.length / 3;
|
||||
assert(nChannel < MAX_CHANNEL);
|
||||
|
||||
uniform Point controlVertices[MAX_CHANNEL*20];
|
||||
for(uniform int i=0; i<20; i++) {
|
||||
uniform unsigned int id = vertexIndices[i];
|
||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 20 + i;
|
||||
controlVertices[offset].x = pVertex[0];
|
||||
controlVertices[offset].y = pVertex[1];
|
||||
controlVertices[offset].z = pVertex[2];
|
||||
pVertex += 3;
|
||||
}
|
||||
}
|
||||
|
||||
uniform float frac = getParamFraction(bitField);
|
||||
|
||||
// top left corner
|
||||
uniform float pu = (uniform float)getU(bitField)*frac;
|
||||
uniform float pv = (uniform float)getV(bitField)*frac;
|
||||
|
||||
foreach( n = 0 ... nPoint) {
|
||||
// normalize u,v coordinates
|
||||
float s = (u[n] - pu) / frac;
|
||||
float t = (v[n] - pv) / frac;
|
||||
|
||||
float point[20];
|
||||
getGregoryWeightsNoDerivative(bitField, s, t, point);
|
||||
|
||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
||||
for(uniform int c=0; c<nChannel; c++) {
|
||||
uniform int offset = c * 20;
|
||||
Point Q;
|
||||
Q.x = Q.y = Q.z = 0.0;
|
||||
for (uniform int i=0; i<20; ++i) {
|
||||
Q = Q + point[i] * controlVertices[offset + i];
|
||||
}
|
||||
|
||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
55
opensubdiv/osd/ispcEvalLimitKernel.isph
Normal file
55
opensubdiv/osd/ispcEvalLimitKernel.isph
Normal file
@ -0,0 +1,55 @@
|
||||
//
|
||||
// ispcEvalLimitKernel.isph
|
||||
// (Header automatically generated by the ispc compiler.)
|
||||
// DO NOT EDIT THIS FILE.
|
||||
//
|
||||
|
||||
#ifndef ISPC_ISPCEVALLIMITKERNEL_ISPH
|
||||
#define ISPC_ISPCEVALLIMITKERNEL_ISPH
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace ispc { /* namespace */
|
||||
#endif // __cplusplus
|
||||
#ifndef __ISPC_STRUCT_BufferDescriptor__
|
||||
#define __ISPC_STRUCT_BufferDescriptor__
|
||||
struct BufferDescriptor {
|
||||
int32_t offset;
|
||||
int32_t length;
|
||||
int32_t stride;
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Functions exported from ispc code
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
#if defined(__cplusplus) && !defined(__ISPC_NO_EXTERN_C)
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
extern void evalBSpline(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ, const struct BufferDescriptor &duDesc, float * outDQU, const struct BufferDescriptor &dvDesc, float * outDQV);
|
||||
|
||||
extern void evalBilinear(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ, const struct BufferDescriptor &duDesc, float * outDQU, const struct BufferDescriptor &dvDesc, float * outDQV);
|
||||
|
||||
extern void evalGregory(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ, const struct BufferDescriptor &duDesc, float * outDQU, const struct BufferDescriptor &dvDesc, float * outDQV);
|
||||
|
||||
extern void evalBSplineNoDerivative(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ);
|
||||
|
||||
extern void evalBilinearNoDerivative(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ);
|
||||
|
||||
extern void evalGregoryNoDerivative(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ);
|
||||
|
||||
extern void getSIMDWidth(int32_t &simdWidth);
|
||||
#if defined(__cplusplus) && !defined(__ISPC_NO_EXTERN_C)
|
||||
} /* end extern C */
|
||||
#endif // __cplusplus
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* namespace */
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // ISPC_ISPCEVALLIMITKERNEL_ISPH
|
289
opensubdiv/osd/ispcEvaluator.cpp
Normal file
289
opensubdiv/osd/ispcEvaluator.cpp
Normal file
@ -0,0 +1,289 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#include "ispcEvaluator.h"
|
||||
#include "cpuKernel.h"
|
||||
#include "../far/patchBasis.h"
|
||||
#include "ispcEvalLimitKernel.isph"
|
||||
|
||||
#include <tbb/parallel_for.h>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
#define grain_size 512
|
||||
|
||||
/* static */
|
||||
bool
|
||||
IspcEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end) {
|
||||
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
// XXX: we can probably expand cpuKernel.cpp to here.
|
||||
CpuEvalStencils(src, srcDesc, dst, dstDesc,
|
||||
sizes, offsets, indices, weights, start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
IspcEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end) {
|
||||
if (end <= start) return true;
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
if (srcDesc.length != duDesc.length) return false;
|
||||
if (srcDesc.length != dvDesc.length) return false;
|
||||
|
||||
CpuEvalStencils(src, srcDesc,
|
||||
dst, dstDesc,
|
||||
du, duDesc,
|
||||
dv, dvDesc,
|
||||
sizes, offsets, indices,
|
||||
weights, duWeights, dvWeights,
|
||||
start, end);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct BufferAdapter {
|
||||
BufferAdapter(T *p, int length, int stride) :
|
||||
_p(p), _length(length), _stride(stride) { }
|
||||
void Clear() {
|
||||
for (int i = 0; i < _length; ++i) _p[i] = 0;
|
||||
}
|
||||
void AddWithWeight(T const *src, float w) {
|
||||
if (_p) {
|
||||
for (int i = 0; i < _length; ++i) {
|
||||
_p[i] += src[i] * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
const T *operator[] (int index) const {
|
||||
return _p + _stride * index;
|
||||
}
|
||||
BufferAdapter<T> & operator ++() {
|
||||
if (_p) {
|
||||
_p += _stride;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
T *_p;
|
||||
int _length;
|
||||
int _stride;
|
||||
};
|
||||
|
||||
/* static */
|
||||
bool
|
||||
IspcEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
// Copy BufferDescriptor to ispc version
|
||||
// Since memory alignment in ISPC may be different from C++,
|
||||
// we use the assignment for each field instead of the assignment for
|
||||
// the whole struct
|
||||
ispc::BufferDescriptor ispcSrcDesc;
|
||||
ispcSrcDesc.offset = srcDesc.offset;
|
||||
ispcSrcDesc.length = srcDesc.length;
|
||||
ispcSrcDesc.stride = srcDesc.stride;
|
||||
|
||||
tbb::blocked_range<int> range = tbb::blocked_range<int>(0, numPatchCoords, grain_size);
|
||||
tbb::parallel_for(range, [&](const tbb::blocked_range<int> &r)
|
||||
{
|
||||
uint i = r.begin();
|
||||
|
||||
ispc::BufferDescriptor ispcDstDesc, ispcDuDesc, ispcDvDesc;
|
||||
ispcDstDesc.offset = dstDesc.offset + dstDesc.offset + i * dstDesc.stride;
|
||||
ispcDstDesc.length = dstDesc.length;
|
||||
ispcDstDesc.stride = dstDesc.stride;
|
||||
|
||||
while (i < r.end()) {
|
||||
// the patch coordinates are sorted by patch handle
|
||||
// the following code searches the coordinates that
|
||||
// belongs to the same patch so that they can be evalauated
|
||||
// with ISPC
|
||||
int nCoord = 1;
|
||||
Far::PatchTable::PatchHandle handle = patchCoords[i].handle;
|
||||
while(i + nCoord < r.end() &&
|
||||
handle.isEqual(patchCoords[i + nCoord].handle) )
|
||||
nCoord ++;
|
||||
|
||||
PatchArray const &array = patchArrays[handle.arrayIndex];
|
||||
int patchType = array.GetPatchType();
|
||||
Far::PatchParam const & param = patchParamBuffer[handle.patchIndex];
|
||||
|
||||
unsigned int bitField = param.field1;
|
||||
|
||||
const int *cvs = &patchIndexBuffer[array.indexBase + handle.vertIndex];
|
||||
|
||||
__declspec( align(64) ) float u[nCoord];
|
||||
__declspec( align(64) ) float v[nCoord];
|
||||
|
||||
for(int n=0; n<nCoord; n++) {
|
||||
u[n] = patchCoords[i + n].s;
|
||||
v[n] = patchCoords[i + n].t;
|
||||
}
|
||||
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
ispc::evalBSplineNoDerivative(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
||||
ispcDstDesc, dst);
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
ispc::evalGregoryNoDerivative(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
||||
ispcDstDesc, dst);
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
ispc::evalBilinearNoDerivative(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
||||
ispcDstDesc, dst);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
i += nCoord;
|
||||
ispcDstDesc.offset = dstDesc.offset + i * dstDesc.stride;
|
||||
}
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
bool
|
||||
IspcEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer) {
|
||||
if (srcDesc.length != dstDesc.length) return false;
|
||||
|
||||
// Copy BufferDescriptor to ispc version
|
||||
// Since memory alignment in ISPC may be different from C++,
|
||||
// we use the assignment for each field instead of the assignment for
|
||||
// the whole struct
|
||||
ispc::BufferDescriptor ispcSrcDesc;
|
||||
ispcSrcDesc.offset = srcDesc.offset;
|
||||
ispcSrcDesc.length = srcDesc.length;
|
||||
ispcSrcDesc.stride = srcDesc.stride;
|
||||
|
||||
tbb::blocked_range<int> range = tbb::blocked_range<int>(0, numPatchCoords, grain_size);
|
||||
tbb::parallel_for(range, [&](const tbb::blocked_range<int> &r)
|
||||
{
|
||||
uint i = r.begin();
|
||||
|
||||
ispc::BufferDescriptor ispcDstDesc, ispcDuDesc, ispcDvDesc;
|
||||
ispcDstDesc.offset = dstDesc.offset + dstDesc.offset + i * dstDesc.stride;
|
||||
ispcDstDesc.length = dstDesc.length;
|
||||
ispcDstDesc.stride = dstDesc.stride;
|
||||
|
||||
ispcDuDesc.offset = duDesc.offset + i * duDesc.stride;
|
||||
ispcDuDesc.length = duDesc.length;
|
||||
ispcDuDesc.stride = duDesc.stride;
|
||||
|
||||
ispcDvDesc.offset = dvDesc.offset + i * dvDesc.stride;
|
||||
ispcDvDesc.length = dvDesc.length;
|
||||
ispcDvDesc.stride = dvDesc.stride;
|
||||
while (i < r.end()) {
|
||||
// the patch coordinates are sorted by patch handle
|
||||
// the following code searches the coordinates that
|
||||
// belongs to the same patch so that they can be evalauated
|
||||
// with ISPC
|
||||
int nCoord = 1;
|
||||
Far::PatchTable::PatchHandle handle = patchCoords[i].handle;
|
||||
while(i + nCoord < r.end() &&
|
||||
handle.isEqual(patchCoords[i + nCoord].handle) )
|
||||
nCoord ++;
|
||||
|
||||
PatchArray const &array = patchArrays[handle.arrayIndex];
|
||||
int patchType = array.GetPatchType();
|
||||
Far::PatchParam const & param = patchParamBuffer[handle.patchIndex];
|
||||
|
||||
unsigned int bitField = param.field1;
|
||||
|
||||
const int *cvs = &patchIndexBuffer[array.indexBase + handle.vertIndex];
|
||||
|
||||
__declspec( align(64) ) float u[nCoord];
|
||||
__declspec( align(64) ) float v[nCoord];
|
||||
|
||||
for(int n=0; n<nCoord; n++) {
|
||||
u[n] = patchCoords[i + n].s;
|
||||
v[n] = patchCoords[i + n].t;
|
||||
}
|
||||
|
||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
||||
ispc::evalBSpline(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
||||
ispcDstDesc, dst, ispcDuDesc, du, ispcDvDesc, dv);
|
||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
||||
ispc::evalGregory(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
||||
ispcDstDesc, dst, ispcDuDesc, du, ispcDvDesc, dv);
|
||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
||||
ispc::evalBilinear(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
||||
ispcDstDesc, dst, ispcDuDesc, du, ispcDvDesc, dv);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
i += nCoord;
|
||||
ispcDstDesc.offset = dstDesc.offset + i * dstDesc.stride;
|
||||
ispcDuDesc.offset = duDesc.offset + i * duDesc.stride;
|
||||
ispcDvDesc.offset = dvDesc.offset + i * dvDesc.stride;
|
||||
}
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
} // end namespace OpenSubdiv
|
482
opensubdiv/osd/ispcEvaluator.h
Normal file
482
opensubdiv/osd/ispcEvaluator.h
Normal file
@ -0,0 +1,482 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OPENSUBDIV3_OSD_ISPC_EVALUATOR_H
|
||||
#define OPENSUBDIV3_OSD_ISPC_EVALUATOR_H
|
||||
|
||||
#include "../version.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "../osd/bufferDescriptor.h"
|
||||
#include "../osd/types.h"
|
||||
|
||||
namespace OpenSubdiv {
|
||||
namespace OPENSUBDIV_VERSION {
|
||||
|
||||
namespace Osd {
|
||||
|
||||
class IspcEvaluator {
|
||||
public:
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Stencil evaluations with StencilTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic static eval stencils function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way from OsdMesh template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const IspcEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
if (stencilTable->GetNumStencils() == 0)
|
||||
return false;
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function which takes raw CPU pointers for
|
||||
/// input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
/// to apply for the range [start, end)
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
int start, int end);
|
||||
|
||||
/// \brief Generic static eval stencils function with derivatives.
|
||||
/// This function has a same signature as other device kernels
|
||||
/// have so that it can be called in the same way from OsdMesh
|
||||
/// template interface.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivative buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param stencilTable Far::StencilTable or equivalent
|
||||
///
|
||||
/// @param instance not used in the cpu kernel
|
||||
/// (declared as a typed pointer to prevent
|
||||
/// undesirable template resolution)
|
||||
///
|
||||
/// @param deviceContext not used in the cpu kernel
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
||||
static bool EvalStencils(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
STENCIL_TABLE const *stencilTable,
|
||||
const IspcEvaluator *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
&stencilTable->GetSizes()[0],
|
||||
&stencilTable->GetOffsets()[0],
|
||||
&stencilTable->GetControlIndices()[0],
|
||||
&stencilTable->GetWeights()[0],
|
||||
&stencilTable->GetDuWeights()[0],
|
||||
&stencilTable->GetDvWeights()[0],
|
||||
/*start = */ 0,
|
||||
/*end = */ stencilTable->GetNumStencils());
|
||||
}
|
||||
|
||||
/// \brief Static eval stencils function with derivatives, which takes
|
||||
/// raw CPU pointers for input and output.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
||||
///
|
||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
||||
///
|
||||
/// @param indices pointer to the indices buffer of the stencil table
|
||||
///
|
||||
/// @param weights pointer to the weights buffer of the stencil table
|
||||
///
|
||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
||||
///
|
||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
||||
///
|
||||
/// @param start start index of stencil table
|
||||
///
|
||||
/// @param end end index of stencil table
|
||||
///
|
||||
static bool EvalStencils(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
const int * sizes,
|
||||
const int * offsets,
|
||||
const int * indices,
|
||||
const float * weights,
|
||||
const float * duWeights,
|
||||
const float * dvWeights,
|
||||
int start, int end);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Limit evaluations with PatchTable
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief Generic limit eval function. This function has a same
|
||||
/// signature as other device kernels have so that it can be called
|
||||
/// in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
IspcEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Generic limit eval function with derivatives. This function has
|
||||
/// a same signature as other device kernels have so that it can be
|
||||
/// called in the same way.
|
||||
///
|
||||
/// @param srcBuffer Input primvar buffer.
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// const float pointer for read
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dstBuffer Output primvar buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param duBuffer Output U-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
||||
///
|
||||
/// @param dvBuffer Output V-derivatives buffer
|
||||
/// must have BindCpuBuffer() method returning a
|
||||
/// float pointer for write
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchTable CpuPatchTable or equivalent
|
||||
/// XXX: currently Far::PatchTable can't be used
|
||||
/// due to interface mismatch
|
||||
///
|
||||
/// @param instance not used in the cpu evaluator
|
||||
///
|
||||
/// @param deviceContext not used in the cpu evaluator
|
||||
///
|
||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
||||
static bool EvalPatches(
|
||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PATCHCOORD_BUFFER *patchCoords,
|
||||
PATCH_TABLE *patchTable,
|
||||
IspcEvaluator const *instance = NULL,
|
||||
void * deviceContext = NULL) {
|
||||
(void)instance; // unused
|
||||
(void)deviceContext; // unused
|
||||
|
||||
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
|
||||
// ideally all buffer classes should have templated by datatype
|
||||
// so that downcast isn't needed there.
|
||||
// (e.g. Osd::CpuBuffer<PatchCoord> )
|
||||
//
|
||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
||||
duBuffer->BindCpuBuffer(), duDesc,
|
||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
||||
numPatchCoords,
|
||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
||||
patchTable->GetPatchArrayBuffer(),
|
||||
patchTable->GetPatchIndexBuffer(),
|
||||
patchTable->GetPatchParamBuffer());
|
||||
}
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
int numPatchCoords,
|
||||
const PatchCoord *patchCoords,
|
||||
const PatchArray *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
const PatchParam *patchParamBuffer);
|
||||
|
||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
||||
/// and evaluate limit values on given PatchTable.
|
||||
///
|
||||
/// @param src Input primvar pointer. An offset of srcDesc
|
||||
/// will be applied internally (i.e. the pointer
|
||||
/// should not include the offset)
|
||||
///
|
||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
||||
///
|
||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
||||
/// will be applied internally.
|
||||
///
|
||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
||||
///
|
||||
/// @param du Output U-derivatives pointer. An offset of
|
||||
/// duDesc will be applied internally.
|
||||
///
|
||||
/// @param duDesc vertex buffer descriptor for the du buffer
|
||||
///
|
||||
/// @param dv Output V-derivatives pointer. An offset of
|
||||
/// dvDesc will be applied internally.
|
||||
///
|
||||
/// @param dvDesc vertex buffer descriptor for the dv buffer
|
||||
///
|
||||
/// @param numPatchCoords number of patchCoords.
|
||||
///
|
||||
/// @param patchCoords array of locations to be evaluated.
|
||||
///
|
||||
/// @param patchArrays an array of Osd::PatchArray struct
|
||||
/// indexed by PatchCoord::arrayIndex
|
||||
///
|
||||
/// @param patchIndexBuffer an array of patch indices
|
||||
/// indexed by PatchCoord::vertIndex
|
||||
///
|
||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
||||
/// indexed by PatchCoord::patchIndex
|
||||
///
|
||||
static bool EvalPatches(
|
||||
const float *src, BufferDescriptor const &srcDesc,
|
||||
float *dst, BufferDescriptor const &dstDesc,
|
||||
float *du, BufferDescriptor const &duDesc,
|
||||
float *dv, BufferDescriptor const &dvDesc,
|
||||
int numPatchCoords,
|
||||
PatchCoord const *patchCoords,
|
||||
PatchArray const *patchArrays,
|
||||
const int *patchIndexBuffer,
|
||||
PatchParam const *patchParamBuffer);
|
||||
|
||||
/// ----------------------------------------------------------------------
|
||||
///
|
||||
/// Other methods
|
||||
///
|
||||
/// ----------------------------------------------------------------------
|
||||
|
||||
/// \brief synchronize all asynchronous computation invoked on this device.
|
||||
static void Synchronize(void * /*deviceContext = NULL*/) {
|
||||
// nothing.
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
} // end namespace Osd
|
||||
|
||||
} // end namespace OPENSUBDIV_VERSION
|
||||
using namespace OPENSUBDIV_VERSION;
|
||||
|
||||
} // end namespace OpenSubdiv
|
||||
|
||||
|
||||
#endif // OPENSUBDIV3_OSD_CPU_EVALUATOR_H
|
Loading…
Reference in New Issue
Block a user