mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2025-01-03 13:41:06 +00:00
Revert "Add ISPC limit surface evaluation"
This commit is contained in:
parent
b006dc328e
commit
8a8771c97d
@ -197,8 +197,6 @@ if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANGCC OR CMAKE_COMPILER_IS_IC
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
list(APPEND OSD_COMPILER_FLAGS -std=c++11)
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
elseif(MSVC)
|
elseif(MSVC)
|
||||||
@ -323,9 +321,6 @@ endif()
|
|||||||
if(NOT NO_TBB)
|
if(NOT NO_TBB)
|
||||||
find_package(TBB 4.0)
|
find_package(TBB 4.0)
|
||||||
endif()
|
endif()
|
||||||
if(NOT NO_ISPC)
|
|
||||||
find_package(ISPC 1.6)
|
|
||||||
endif()
|
|
||||||
if (NOT NO_OPENGL)
|
if (NOT NO_OPENGL)
|
||||||
find_package(OpenGL)
|
find_package(OpenGL)
|
||||||
endif()
|
endif()
|
||||||
@ -544,12 +539,6 @@ if (NOT NO_MAYA)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ISPC_FOUND)
|
|
||||||
add_definitions(
|
|
||||||
-DOPENSUBDIV_HAS_ISPC
|
|
||||||
)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Link examples & regressions dynamically against Osd
|
# Link examples & regressions dynamically against Osd
|
||||||
set( OSD_LINK_TARGET osd_dynamic_cpu osd_dynamic_gpu )
|
set( OSD_LINK_TARGET osd_dynamic_cpu osd_dynamic_gpu )
|
||||||
|
|
||||||
|
@ -1,94 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright 2013 Pixar
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "Apache License")
|
|
||||||
# with the following modification; you may not use this file except in
|
|
||||||
# compliance with the Apache License and the following modification to it:
|
|
||||||
# Section 6. Trademarks. is deleted and replaced with:
|
|
||||||
#
|
|
||||||
# 6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
# names, trademarks, service marks, or product names of the Licensor
|
|
||||||
# and its affiliates, except as required to comply with Section 4(c) of
|
|
||||||
# the License and to reproduce the content of the NOTICE file.
|
|
||||||
#
|
|
||||||
# You may obtain a copy of the Apache License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the Apache License with the above modification is
|
|
||||||
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
# KIND, either express or implied. See the Apache License for the specific
|
|
||||||
# language governing permissions and limitations under the Apache License.
|
|
||||||
#
|
|
||||||
|
|
||||||
# - Try to find Intel's ISPC
|
|
||||||
# Once done this will define
|
|
||||||
#
|
|
||||||
# ISPC_FOUND - System has ISPC
|
|
||||||
# ISPC_DIR - The ISPC directory
|
|
||||||
|
|
||||||
# Obtain ISPC directory
|
|
||||||
if (WIN32)
|
|
||||||
#NOT IMPLEMENTED
|
|
||||||
elseif (APPLE)
|
|
||||||
#NOT IMPLEMENTED
|
|
||||||
else ()
|
|
||||||
find_path(ISPC_DIR
|
|
||||||
NAMES
|
|
||||||
ispc
|
|
||||||
PATHS
|
|
||||||
${ISPC_LOCATION}
|
|
||||||
NO_DEFAULT_PATH NO_SYSTEM_ENVIRONMENT_PATH
|
|
||||||
DOC "The directory where ISPC reside")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (ISPC_DIR)
|
|
||||||
execute_process(COMMAND ${ISPC_DIR}/ispc --version OUTPUT_VARIABLE ISPC_VERSION)
|
|
||||||
string(REGEX MATCH "[0-9].[0-9].[0-9]" ISPC_VERSION ${ISPC_VERSION})
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
include(FindPackageHandleStandardArgs)
|
|
||||||
|
|
||||||
find_package_handle_standard_args(ISPC
|
|
||||||
REQUIRED_VARS
|
|
||||||
ISPC_DIR
|
|
||||||
VERSION_VAR
|
|
||||||
ISPC_VERSION
|
|
||||||
)
|
|
||||||
|
|
||||||
mark_as_advanced( ISPC_DIR )
|
|
||||||
|
|
||||||
MACRO (ispc_compile)
|
|
||||||
|
|
||||||
SET(ISPC_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/osd_ispc_obj.dir)
|
|
||||||
|
|
||||||
SET(ISPC_OBJECTS "")
|
|
||||||
|
|
||||||
FOREACH(src ${ARGN})
|
|
||||||
|
|
||||||
GET_FILENAME_COMPONENT(fname ${src} NAME_WE)
|
|
||||||
|
|
||||||
SET(results "${ISPC_TARGET_DIR}/${fname}.dev.o")
|
|
||||||
|
|
||||||
ADD_CUSTOM_COMMAND(
|
|
||||||
OUTPUT ${results} ${ISPC_TARGET_DIR}/${fname}_ispc.h
|
|
||||||
COMMAND ${ISPC_DIR}/ispc
|
|
||||||
--pic
|
|
||||||
-O1
|
|
||||||
--wno-perf
|
|
||||||
--woff
|
|
||||||
-h ${ISPC_TARGET_DIR}/${fname}_ispc.h
|
|
||||||
-MMM ${ISPC_TARGET_DIR}/${fname}.dev.idep
|
|
||||||
-o ${ISPC_TARGET_DIR}/${fname}.dev.o
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/${src}
|
|
||||||
\;
|
|
||||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src}
|
|
||||||
)
|
|
||||||
|
|
||||||
SET(ISPC_OBJECTS ${ISPC_OBJECTS} ${results})
|
|
||||||
|
|
||||||
ENDFOREACH()
|
|
||||||
|
|
||||||
ENDMACRO()
|
|
||||||
|
|
@ -34,10 +34,6 @@ GLFWmonitor* g_primary=0;
|
|||||||
#include <osd/cpuGLVertexBuffer.h>
|
#include <osd/cpuGLVertexBuffer.h>
|
||||||
#include <osd/mesh.h>
|
#include <osd/mesh.h>
|
||||||
|
|
||||||
#ifdef OPENSUBDIV_HAS_ISPC
|
|
||||||
#include <osd/ispcEvaluator.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef OPENSUBDIV_HAS_TBB
|
#ifdef OPENSUBDIV_HAS_TBB
|
||||||
#include <osd/tbbEvaluator.h>
|
#include <osd/tbbEvaluator.h>
|
||||||
#endif
|
#endif
|
||||||
@ -108,8 +104,7 @@ enum KernelType { kCPU = 0,
|
|||||||
kCUDA = 3,
|
kCUDA = 3,
|
||||||
kCL = 4,
|
kCL = 4,
|
||||||
kGLXFB = 5,
|
kGLXFB = 5,
|
||||||
kGLCompute = 6,
|
kGLCompute = 6 };
|
||||||
kISPC = 7 };
|
|
||||||
|
|
||||||
enum EndCap { kEndCapBSplineBasis,
|
enum EndCap { kEndCapBSplineBasis,
|
||||||
kEndCapGregoryBasis };
|
kEndCapGregoryBasis };
|
||||||
@ -174,10 +169,10 @@ float g_currentTime = 0;
|
|||||||
Stopwatch g_fpsTimer;
|
Stopwatch g_fpsTimer;
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
int g_nParticles = 655360;
|
int g_nParticles = 65536;
|
||||||
|
|
||||||
bool g_randomStart = true;//false;
|
bool g_randomStart = true;//false;
|
||||||
bool g_animParticles = false;
|
bool g_animParticles = true;
|
||||||
|
|
||||||
GLuint g_samplesVAO=0;
|
GLuint g_samplesVAO=0;
|
||||||
|
|
||||||
@ -444,9 +439,7 @@ updateGeom() {
|
|||||||
assert(g_particles);
|
assert(g_particles);
|
||||||
|
|
||||||
float elapsed = g_currentTime - g_prevTime;
|
float elapsed = g_currentTime - g_prevTime;
|
||||||
if(elapsed != 0.0f) {
|
g_particles->Update(elapsed);
|
||||||
g_particles->Update(elapsed);
|
|
||||||
}
|
|
||||||
g_prevTime = g_currentTime;
|
g_prevTime = g_currentTime;
|
||||||
|
|
||||||
std::vector<OpenSubdiv::Osd::PatchCoord> const &patchCoords
|
std::vector<OpenSubdiv::Osd::PatchCoord> const &patchCoords
|
||||||
@ -655,20 +648,8 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) {
|
|||||||
(vertexStencils, varyingStencils,
|
(vertexStencils, varyingStencils,
|
||||||
nCoarseVertices, nverts, g_nParticles, g_patchTable,
|
nCoarseVertices, nverts, g_nParticles, g_patchTable,
|
||||||
&glComputeEvaluatorCache);
|
&glComputeEvaluatorCache);
|
||||||
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
#if defined(OPENSUBDIV_HAS_ISPC) && defined(OPENSUBDIV_HAS_TBB)
|
|
||||||
else if(g_kernel == kISPC) {
|
|
||||||
g_evalOutput = new EvalOutput<Osd::CpuGLVertexBuffer,
|
|
||||||
Osd::CpuGLVertexBuffer,
|
|
||||||
Far::StencilTable,
|
|
||||||
Osd::CpuPatchTable,
|
|
||||||
Osd::IspcEvaluator>
|
|
||||||
(vertexStencils, varyingStencils,
|
|
||||||
nCoarseVertices, nverts, g_nParticles, g_patchTable);
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
// Create the 'uv particles' manager - this class manages the limit
|
// Create the 'uv particles' manager - this class manages the limit
|
||||||
// location samples (ptex face index, (s,t) and updates them between frames.
|
// location samples (ptex face index, (s,t) and updates them between frames.
|
||||||
@ -894,7 +875,7 @@ display() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (g_endCap != kEndCapBSplineBasis &&
|
if (g_endCap != kEndCapBSplineBasis &&
|
||||||
(g_kernel != kCPU && g_kernel != kOPENMP && g_kernel != kTBB && g_kernel != kISPC)) {
|
(g_kernel != kCPU && g_kernel != kOPENMP && g_kernel != kTBB)) {
|
||||||
static char msg[] =
|
static char msg[] =
|
||||||
"ERROR: This kernel only supports BSpline basis patches.";
|
"ERROR: This kernel only supports BSpline basis patches.";
|
||||||
g_hud.DrawString(g_width/4, g_height/4+20, 1, 0, 0, msg);
|
g_hud.DrawString(g_width/4, g_height/4+20, 1, 0, 0, msg);
|
||||||
@ -1148,9 +1129,6 @@ initHUD() {
|
|||||||
#ifdef OPENSUBDIV_HAS_TBB
|
#ifdef OPENSUBDIV_HAS_TBB
|
||||||
g_hud.AddPullDownButton(compute_pulldown, "TBB", kTBB);
|
g_hud.AddPullDownButton(compute_pulldown, "TBB", kTBB);
|
||||||
#endif
|
#endif
|
||||||
#if defined(OPENSUBDIV_HAS_ISPC) && defined(OPENSUBDIV_HAS_TBB)
|
|
||||||
g_hud.AddPullDownButton(compute_pulldown, "ISPC", kISPC);
|
|
||||||
#endif
|
|
||||||
#ifdef OPENSUBDIV_HAS_CUDA
|
#ifdef OPENSUBDIV_HAS_CUDA
|
||||||
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
|
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
|
||||||
#endif
|
#endif
|
||||||
|
@ -32,17 +32,17 @@
|
|||||||
#ifdef OPENSUBDIV_HAS_TBB
|
#ifdef OPENSUBDIV_HAS_TBB
|
||||||
#include <tbb/parallel_for.h>
|
#include <tbb/parallel_for.h>
|
||||||
#include <tbb/atomic.h>
|
#include <tbb/atomic.h>
|
||||||
|
tbb::atomic<int> g_tbbCounter;
|
||||||
class TbbUpdateKernel {
|
class TbbUpdateKernel {
|
||||||
public:
|
public:
|
||||||
TbbUpdateKernel(float speed,
|
TbbUpdateKernel(float speed,
|
||||||
STParticles::Position *positions,
|
STParticles::Position *positions,
|
||||||
float *velocities,
|
float *velocities,
|
||||||
std::vector<STParticles::FaceInfo> const &adjacency,
|
std::vector<STParticles::FaceInfo> const &adjacency,
|
||||||
PatchHandleMap *patchHandleMap,
|
OpenSubdiv::Osd::PatchCoord *patchCoords,
|
||||||
OpenSubdiv::Far::PatchMap const *patchMap) :
|
OpenSubdiv::Far::PatchMap const *patchMap) :
|
||||||
_speed(speed), _positions(positions), _velocities(velocities),
|
_speed(speed), _positions(positions), _velocities(velocities),
|
||||||
_adjacency(adjacency), _patchHandleMap(patchHandleMap), _patchMap(patchMap) {
|
_adjacency(adjacency), _patchCoords(patchCoords), _patchMap(patchMap) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void operator () (tbb::blocked_range<int> const &r) const {
|
void operator () (tbb::blocked_range<int> const &r) const {
|
||||||
@ -76,13 +76,9 @@ public:
|
|||||||
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
|
OpenSubdiv::Far::PatchTable::PatchHandle const *handle =
|
||||||
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
|
_patchMap->FindPatch(p->ptexIndex, p->s, p->t);
|
||||||
if (handle) {
|
if (handle) {
|
||||||
PatchHandleMap::accessor a;
|
int index = g_tbbCounter.fetch_and_add(1);
|
||||||
if( !_patchHandleMap->find(a, handle)) {
|
_patchCoords[index] =
|
||||||
_patchHandleMap->insert(a, handle);
|
OpenSubdiv::Osd::PatchCoord(*handle, p->s, p->t);
|
||||||
}
|
|
||||||
std::vector<float> &st = a->second;
|
|
||||||
st.push_back(p->s);
|
|
||||||
st.push_back(p->t);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -91,7 +87,7 @@ private:
|
|||||||
STParticles::Position *_positions;
|
STParticles::Position *_positions;
|
||||||
float *_velocities;
|
float *_velocities;
|
||||||
std::vector<STParticles::FaceInfo> const &_adjacency;
|
std::vector<STParticles::FaceInfo> const &_adjacency;
|
||||||
PatchHandleMap *_patchHandleMap;
|
OpenSubdiv::Osd::PatchCoord *_patchCoords;
|
||||||
OpenSubdiv::Far::PatchMap const *_patchMap;
|
OpenSubdiv::Far::PatchMap const *_patchMap;
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
@ -280,36 +276,18 @@ STParticles::Update(float deltaTime) {
|
|||||||
if (deltaTime == 0) return;
|
if (deltaTime == 0) return;
|
||||||
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
|
float speed = GetSpeed() * std::max(0.001f, std::min(deltaTime, 0.5f));
|
||||||
|
|
||||||
|
_patchCoords.clear();
|
||||||
|
|
||||||
// XXX: this process should be parallelized.
|
// XXX: this process should be parallelized.
|
||||||
#ifdef OPENSUBDIV_HAS_TBB
|
#ifdef OPENSUBDIV_HAS_TBB
|
||||||
_patchHandleMap.clear();
|
|
||||||
|
|
||||||
|
_patchCoords.resize((int)GetNumParticles());
|
||||||
TbbUpdateKernel kernel(speed, &_positions[0], &_velocities[0],
|
TbbUpdateKernel kernel(speed, &_positions[0], &_velocities[0],
|
||||||
_adjacency, &_patchHandleMap, _patchMap);;
|
_adjacency, &_patchCoords[0], _patchMap);;
|
||||||
|
g_tbbCounter = 0;
|
||||||
tbb::blocked_range<int> range(0, GetNumParticles(), 256);
|
tbb::blocked_range<int> range(0, GetNumParticles(), 256);
|
||||||
tbb::parallel_for(range, kernel);
|
tbb::parallel_for(range, kernel);
|
||||||
|
_patchCoords.resize(g_tbbCounter);
|
||||||
|
|
||||||
int nCoord = 0;
|
|
||||||
for(PatchHandleMap::iterator i = _patchHandleMap.begin();
|
|
||||||
i != _patchHandleMap.end();
|
|
||||||
i ++) {
|
|
||||||
nCoord += (i->second.size() / 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
_patchCoords.resize(nCoord);
|
|
||||||
|
|
||||||
int index = 0;
|
|
||||||
for(PatchHandleMap::iterator i = _patchHandleMap.begin();
|
|
||||||
i != _patchHandleMap.end();
|
|
||||||
i ++) {
|
|
||||||
for(int j = 0; j < i->second.size(); j += 2) {
|
|
||||||
_patchCoords[index].handle = *(i->first);
|
|
||||||
_patchCoords[index].s = i->second[j];
|
|
||||||
_patchCoords[index].t = i->second[j+1];
|
|
||||||
index ++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
Position * p = &_positions[0];
|
Position * p = &_positions[0];
|
||||||
float * dp = &_velocities[0];
|
float * dp = &_velocities[0];
|
||||||
|
@ -30,11 +30,6 @@
|
|||||||
#include <osd/types.h>
|
#include <osd/types.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#ifdef OPENSUBDIV_HAS_TBB
|
|
||||||
#include <tbb/concurrent_hash_map.h>
|
|
||||||
typedef tbb::concurrent_hash_map< OpenSubdiv::Far::PatchTable::PatchHandle const*, std::vector<float> > PatchHandleMap;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// In order to emphasize the dynamic nature of the EvalLimit API, where the
|
// In order to emphasize the dynamic nature of the EvalLimit API, where the
|
||||||
// locations can be arbitrarily updated before each evaluation, the glEvalLimit
|
// locations can be arbitrarily updated before each evaluation, the glEvalLimit
|
||||||
@ -147,7 +142,7 @@ public:
|
|||||||
return _velocities;
|
return _velocities;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<OpenSubdiv::Osd::PatchCoord> const &GetPatchCoords() const {
|
std::vector<OpenSubdiv::Osd::PatchCoord> GetPatchCoords() const {
|
||||||
return _patchCoords;
|
return _patchCoords;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -165,10 +160,6 @@ private:
|
|||||||
|
|
||||||
std::vector<float> _velocities;
|
std::vector<float> _velocities;
|
||||||
|
|
||||||
#ifdef OPENSUBDIV_HAS_TBB
|
|
||||||
PatchHandleMap _patchHandleMap;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
std::vector<OpenSubdiv::Osd::PatchCoord> _patchCoords;
|
std::vector<OpenSubdiv::Osd::PatchCoord> _patchCoords;
|
||||||
|
|
||||||
float _speed; // velocity multiplier
|
float _speed; // velocity multiplier
|
||||||
|
@ -147,16 +147,9 @@ if (NOT NO_LIB)
|
|||||||
)
|
)
|
||||||
set_target_properties(osd_static_cpu PROPERTIES OUTPUT_NAME osdCPU CLEAN_DIRECT_OUTPUT 1)
|
set_target_properties(osd_static_cpu PROPERTIES OUTPUT_NAME osdCPU CLEAN_DIRECT_OUTPUT 1)
|
||||||
|
|
||||||
if( ISPC_FOUND)
|
target_link_libraries(osd_static_cpu
|
||||||
target_link_libraries(osd_static_cpu
|
${PLATFORM_CPU_LIBRARIES}
|
||||||
osd_ispc_obj
|
)
|
||||||
${PLATFORM_CPU_LIBRARIES}
|
|
||||||
)
|
|
||||||
else()
|
|
||||||
target_link_libraries(osd_static_cpu
|
|
||||||
${PLATFORM_CPU_LIBRARIES}
|
|
||||||
)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
install( TARGETS osd_static_cpu DESTINATION "${CMAKE_LIBDIR_BASE}" )
|
install( TARGETS osd_static_cpu DESTINATION "${CMAKE_LIBDIR_BASE}" )
|
||||||
|
|
||||||
@ -207,16 +200,9 @@ if (NOT NO_LIB)
|
|||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if ( ISPC_FOUND)
|
target_link_libraries(osd_dynamic_cpu
|
||||||
target_link_libraries(osd_dynamic_cpu
|
${PLATFORM_CPU_LIBRARIES}
|
||||||
osd_ispc_obj
|
)
|
||||||
${PLATFORM_CPU_LIBRARIES}
|
|
||||||
)
|
|
||||||
else()
|
|
||||||
target_link_libraries(osd_dynamic_cpu
|
|
||||||
${PLATFORM_CPU_LIBRARIES}
|
|
||||||
)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
install( TARGETS osd_dynamic_cpu LIBRARY DESTINATION "${CMAKE_LIBDIR_BASE}" )
|
install( TARGETS osd_dynamic_cpu LIBRARY DESTINATION "${CMAKE_LIBDIR_BASE}" )
|
||||||
|
|
||||||
|
@ -116,15 +116,6 @@ struct PatchParam {
|
|||||||
///
|
///
|
||||||
void Normalize( float & u, float & v ) const;
|
void Normalize( float & u, float & v ) const;
|
||||||
|
|
||||||
/// This function is the reverse operation of function Normalize()
|
|
||||||
/// The (u,v) pair is converted from patch sub-parametric space to control
|
|
||||||
/// face parametric space.
|
|
||||||
///
|
|
||||||
/// @param u u parameter
|
|
||||||
/// @param v v parameter
|
|
||||||
///
|
|
||||||
void Denormalize( float & u, float & v) const;
|
|
||||||
|
|
||||||
unsigned int field0:32;
|
unsigned int field0:32;
|
||||||
unsigned int field1:32;
|
unsigned int field1:32;
|
||||||
};
|
};
|
||||||
@ -170,20 +161,6 @@ PatchParam::Normalize( float & u, float & v ) const {
|
|||||||
v = (v - pv) / frac;
|
v = (v - pv) / frac;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void
|
|
||||||
PatchParam::Denormalize( float & u, float & v ) const {
|
|
||||||
|
|
||||||
float frac = GetParamFraction();
|
|
||||||
|
|
||||||
// top left corner
|
|
||||||
float pu = (float)GetU()*frac;
|
|
||||||
float pv = (float)GetV()*frac;
|
|
||||||
|
|
||||||
// normalize u,v coordinates
|
|
||||||
u = u * frac + pu;
|
|
||||||
v = v * frac + pv;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // end namespace Far
|
} // end namespace Far
|
||||||
|
|
||||||
} // end namespace OPENSUBDIV_VERSION
|
} // end namespace OPENSUBDIV_VERSION
|
||||||
|
@ -68,12 +68,6 @@ public:
|
|||||||
Index arrayIndex, // Array index of the patch
|
Index arrayIndex, // Array index of the patch
|
||||||
patchIndex, // Absolute Index of the patch
|
patchIndex, // Absolute Index of the patch
|
||||||
vertIndex; // Relative offset to the first CV of the patch in array
|
vertIndex; // Relative offset to the first CV of the patch in array
|
||||||
|
|
||||||
bool isEqual(const PatchHandle &other) {
|
|
||||||
return other.arrayIndex == arrayIndex &&
|
|
||||||
other.patchIndex == patchIndex &&
|
|
||||||
other.vertIndex == vertIndex;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -26,7 +26,6 @@
|
|||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------
|
||||||
# source & headers
|
# source & headers
|
||||||
|
|
||||||
set(CPU_SOURCE_FILES
|
set(CPU_SOURCE_FILES
|
||||||
cpuEvaluator.cpp
|
cpuEvaluator.cpp
|
||||||
cpuKernel.cpp
|
cpuKernel.cpp
|
||||||
@ -34,12 +33,8 @@ set(CPU_SOURCE_FILES
|
|||||||
cpuVertexBuffer.cpp
|
cpuVertexBuffer.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
if( ISPC_FOUND)
|
|
||||||
list(APPEND CPU_SOURCE_FILES ispcEvaluator.cpp)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(GPU_SOURCE_FILES )
|
set(GPU_SOURCE_FILES )
|
||||||
set(ISPC_SOURCE_FILES )
|
|
||||||
set(INC_FILES )
|
set(INC_FILES )
|
||||||
|
|
||||||
set(PRIVATE_HEADER_FILES
|
set(PRIVATE_HEADER_FILES
|
||||||
@ -301,17 +296,6 @@ if( CUDA_FOUND )
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if( ISPC_FOUND)
|
|
||||||
list(APPEND ISPC_SOURCE_FILES
|
|
||||||
ispcEvalLimitKernel.ispc
|
|
||||||
)
|
|
||||||
|
|
||||||
# Compile ISPC code to objs
|
|
||||||
ispc_compile(${ISPC_SOURCE_FILES})
|
|
||||||
ADD_LIBRARY(osd_ispc_obj STATIC ${ISPC_OBJECTS})
|
|
||||||
SET_TARGET_PROPERTIES(osd_ispc_obj PROPERTIES LINKER_LANGUAGE C)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
list(APPEND DOXY_HEADER_FILES ${CUDA_PUBLIC_HEADERS})
|
list(APPEND DOXY_HEADER_FILES ${CUDA_PUBLIC_HEADERS})
|
||||||
|
|
||||||
#-------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------
|
||||||
|
@ -1,880 +0,0 @@
|
|||||||
//
|
|
||||||
// Copyright 2013 Pixar
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
|
||||||
// with the following modification; you may not use this file except in
|
|
||||||
// compliance with the Apache License and the following modification to it:
|
|
||||||
// Section 6. Trademarks. is deleted and replaced with:
|
|
||||||
//
|
|
||||||
// 6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
// names, trademarks, service marks, or product names of the Licensor
|
|
||||||
// and its affiliates, except as required to comply with Section 4(c) of
|
|
||||||
// the License and to reproduce the content of the NOTICE file.
|
|
||||||
//
|
|
||||||
// You may obtain a copy of the Apache License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the Apache License with the above modification is
|
|
||||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
// KIND, either express or implied. See the Apache License for the specific
|
|
||||||
// language governing permissions and limitations under the Apache License.
|
|
||||||
//
|
|
||||||
|
|
||||||
|
|
||||||
#define MAX_CHANNEL 4
|
|
||||||
|
|
||||||
struct BufferDescriptor {
|
|
||||||
int offset; // offset to desired element data
|
|
||||||
int length; // number or length of the data
|
|
||||||
int stride; // stride to the next element
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Point {
|
|
||||||
float x;
|
|
||||||
float y;
|
|
||||||
float z;
|
|
||||||
};
|
|
||||||
|
|
||||||
inline struct Point operator+(struct Point a, struct Point b) {
|
|
||||||
struct Point result;
|
|
||||||
result.x = a.x + b.x;
|
|
||||||
result.y = a.y + b.y;
|
|
||||||
result.z = a.z + b.z;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform struct Point operator+(uniform struct Point a, uniform struct Point b) {
|
|
||||||
uniform struct Point result;
|
|
||||||
result.x = a.x + b.x;
|
|
||||||
result.y = a.y + b.y;
|
|
||||||
result.z = a.z + b.z;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline struct Point operator-(struct Point a, struct Point b) {
|
|
||||||
struct Point result;
|
|
||||||
result.x = a.x - b.x;
|
|
||||||
result.y = a.y - b.y;
|
|
||||||
result.z = a.z - b.z;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform struct Point operator-(uniform struct Point a, uniform struct Point b) {
|
|
||||||
uniform struct Point result;
|
|
||||||
result.x = a.x - b.x;
|
|
||||||
result.y = a.y - b.y;
|
|
||||||
result.z = a.z - b.z;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline struct Point operator*(struct Point a, float b) {
|
|
||||||
struct Point result;
|
|
||||||
result.x = a.x * b;
|
|
||||||
result.y = a.y * b;
|
|
||||||
result.z = a.z * b;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform struct Point operator*(uniform struct Point a, uniform float b) {
|
|
||||||
uniform struct Point result;
|
|
||||||
result.x = a.x * b;
|
|
||||||
result.y = a.y * b;
|
|
||||||
result.z = a.z * b;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline struct Point operator*(float b, struct Point a) {
|
|
||||||
struct Point result;
|
|
||||||
result.x = b * a.x;
|
|
||||||
result.y = b * a.y;
|
|
||||||
result.z = b * a.z;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform struct Point operator*(uniform float b, uniform struct Point a) {
|
|
||||||
uniform struct Point result;
|
|
||||||
result.x = b * a.x;
|
|
||||||
result.y = b * a.y;
|
|
||||||
result.z = b * a.z;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline struct Point operator/(struct Point a, float b) {
|
|
||||||
struct Point result;
|
|
||||||
result.x = a.x / b;
|
|
||||||
result.y = a.y / b;
|
|
||||||
result.z = a.z / b;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform struct Point operator/(uniform struct Point a, uniform float b) {
|
|
||||||
uniform struct Point result;
|
|
||||||
result.x = a.x / b;
|
|
||||||
result.y = a.y / b;
|
|
||||||
result.z = a.z / b;
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void cross(struct Point &a, struct Point &b, struct Point &c)
|
|
||||||
{
|
|
||||||
c.x = a.y*b.z - a.z*b.y;
|
|
||||||
c.y = a.z*b.x - a.x*b.z;
|
|
||||||
c.z = a.x*b.y - a.y*b.x;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform bool
|
|
||||||
nonQuadRoot(uniform unsigned int bitField)
|
|
||||||
{
|
|
||||||
return (bitField >> 3) & 0x1;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform unsigned int getU(uniform unsigned int bitField)
|
|
||||||
{
|
|
||||||
return (uniform unsigned int)((bitField >> 22) & 0x3ff);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform unsigned int getV(uniform unsigned int bitField)
|
|
||||||
{
|
|
||||||
return (uniform unsigned int)((bitField >> 12) & 0x3ff);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform unsigned int getBoundary(uniform unsigned int bitField)
|
|
||||||
{
|
|
||||||
return (uniform unsigned int)((bitField >> 8) & 0xf);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform unsigned int getDepth(uniform unsigned int bitField)
|
|
||||||
{
|
|
||||||
return (uniform unsigned int)(bitField & 0xf);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline uniform float
|
|
||||||
getParamFraction(uniform unsigned int bitField){
|
|
||||||
if (nonQuadRoot(bitField)) {
|
|
||||||
return 1.0f / (1 << (getDepth(bitField)-1));
|
|
||||||
} else {
|
|
||||||
return 1.0f / (1 << getDepth(bitField));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void
|
|
||||||
adjustBoundaryWeights(uniform unsigned int bitField,
|
|
||||||
float sWeights[4],
|
|
||||||
float tWeights[4]) {
|
|
||||||
|
|
||||||
uniform int boundary = getBoundary(bitField);
|
|
||||||
|
|
||||||
if (boundary & 1) {
|
|
||||||
tWeights[2] -= tWeights[0];
|
|
||||||
tWeights[1] += 2*tWeights[0];
|
|
||||||
tWeights[0] = 0;
|
|
||||||
}
|
|
||||||
if (boundary & 2) {
|
|
||||||
sWeights[1] -= sWeights[3];
|
|
||||||
sWeights[2] += 2*sWeights[3];
|
|
||||||
sWeights[3] = 0;
|
|
||||||
}
|
|
||||||
if (boundary & 4) {
|
|
||||||
tWeights[1] -= tWeights[3];
|
|
||||||
tWeights[2] += 2*tWeights[3];
|
|
||||||
tWeights[3] = 0;
|
|
||||||
}
|
|
||||||
if (boundary & 8) {
|
|
||||||
sWeights[2] -= sWeights[0];
|
|
||||||
sWeights[1] += 2*sWeights[0];
|
|
||||||
sWeights[0] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void
|
|
||||||
getBSplineWeights(float t, float point[4], float deriv[4]) {
|
|
||||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
|
||||||
float const one6th = 1.0f / 6.0f;
|
|
||||||
|
|
||||||
float t2 = t * t;
|
|
||||||
float t3 = t * t2;
|
|
||||||
|
|
||||||
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
|
||||||
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
|
||||||
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
|
||||||
point[3] = one6th * ( t3);
|
|
||||||
|
|
||||||
// Derivatives of the above four basis functions at t:
|
|
||||||
deriv[0] = -0.5f*t2 + t - 0.5f;
|
|
||||||
deriv[1] = 1.5f*t2 - 2.0f*t;
|
|
||||||
deriv[2] = -1.5f*t2 + t + 0.5f;
|
|
||||||
deriv[3] = 0.5f*t2;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void
|
|
||||||
getBezierWeights(float t, float point[4], float deriv[4]) {
|
|
||||||
// The four uniform cubic Bezier basis functions (in terms of t and its
|
|
||||||
// complement tC) evaluated at t:
|
|
||||||
float t2 = t*t;
|
|
||||||
float tC = 1.0f - t;
|
|
||||||
float tC2 = tC * tC;
|
|
||||||
|
|
||||||
point[0] = tC2 * tC;
|
|
||||||
point[1] = tC2 * t * 3.0f;
|
|
||||||
point[2] = t2 * tC * 3.0f;
|
|
||||||
point[3] = t2 * t;
|
|
||||||
|
|
||||||
// Derivatives of the above four basis functions at t:
|
|
||||||
deriv[0] = -3.0f * tC2;
|
|
||||||
deriv[1] = 9.0f * t2 - 12.0f * t + 3.0f;
|
|
||||||
deriv[2] = -9.0f * t2 + 6.0f * t;
|
|
||||||
deriv[3] = 3.0f * t2;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void
|
|
||||||
getBSplineWeightsNoDerivative(float t, float point[4]) {
|
|
||||||
// The four uniform cubic B-Spline basis functions evaluated at t:
|
|
||||||
float const one6th = 1.0f / 6.0f;
|
|
||||||
|
|
||||||
float t2 = t * t;
|
|
||||||
float t3 = t * t2;
|
|
||||||
|
|
||||||
point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3);
|
|
||||||
point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3);
|
|
||||||
point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3));
|
|
||||||
point[3] = one6th * ( t3);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void
|
|
||||||
getBezierWeightsNoDerivative(float t, float point[4]) {
|
|
||||||
// The four uniform cubic Bezier basis functions (in terms of t and its
|
|
||||||
// complement tC) evaluated at t:
|
|
||||||
float t2 = t*t;
|
|
||||||
float tC = 1.0f - t;
|
|
||||||
float tC2 = tC * tC;
|
|
||||||
|
|
||||||
point[0] = tC2 * tC;
|
|
||||||
point[1] = tC2 * t * 3.0f;
|
|
||||||
point[2] = t2 * tC * 3.0f;
|
|
||||||
point[3] = t2 * t;
|
|
||||||
}
|
|
||||||
|
|
||||||
export void
|
|
||||||
evalBilinear(uniform unsigned int bitField,
|
|
||||||
uniform int nPoint,
|
|
||||||
uniform const float * uniform u,
|
|
||||||
uniform const float * uniform v,
|
|
||||||
uniform const int * uniform vertexIndices,
|
|
||||||
uniform const BufferDescriptor &inDesc,
|
|
||||||
uniform const float * uniform inQ,
|
|
||||||
uniform const BufferDescriptor &outDesc,
|
|
||||||
uniform float *uniform outQ,
|
|
||||||
uniform const BufferDescriptor &duDesc,
|
|
||||||
uniform float *uniform outDQU,
|
|
||||||
uniform const BufferDescriptor &dvDesc,
|
|
||||||
uniform float *uniform outDQV)
|
|
||||||
{
|
|
||||||
uniform int nChannel = inDesc.length / 3;
|
|
||||||
assert(nChannel < MAX_CHANNEL);
|
|
||||||
|
|
||||||
uniform Point controlVertices[MAX_CHANNEL*4];
|
|
||||||
for(uniform int i=0; i<4; i++) {
|
|
||||||
uniform unsigned int id = vertexIndices[i];
|
|
||||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 4 + i;
|
|
||||||
controlVertices[offset].x = pVertex[0];
|
|
||||||
controlVertices[offset].y = pVertex[1];
|
|
||||||
controlVertices[offset].z = pVertex[2];
|
|
||||||
pVertex += 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach( n = 0 ... nPoint) {
|
|
||||||
float ou = 1.0f - u[n];
|
|
||||||
float ov = 1.0f - v[n];
|
|
||||||
float w[4] = { ov*ou, v[n]*ou, v[n]*u[n], ov*u[n] };
|
|
||||||
|
|
||||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
Point Q;
|
|
||||||
Q.x = Q.y = Q.z = 0.0;
|
|
||||||
for (uniform int i=0; i<4; ++i) {
|
|
||||||
Q = Q + w[i] * controlVertices[c * 4 + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uniform Point dU[MAX_CHANNEL], dV[MAX_CHANNEL];
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
dU[c] = 0.5 * (controlVertices[c * 4 + 3] - controlVertices[c * 4 + 0] +
|
|
||||||
controlVertices[c * 4 + 2] - controlVertices[c * 4 + 1] );
|
|
||||||
|
|
||||||
dV[c] = 0.5 * (controlVertices[c * 4 + 1] - controlVertices[c * 4 + 0] +
|
|
||||||
controlVertices[c * 4 + 2] - controlVertices[c * 4 + 3] );
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach( n = 0 ... nPoint) {
|
|
||||||
float *pOutDQU = outDQU + duDesc.offset + n * duDesc.stride;
|
|
||||||
float *pOutDQV = outDQV + dvDesc.offset + n * dvDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
*pOutDQU ++ = dU[c].x, *pOutDQU ++ = dU[c].y, *pOutDQU ++ = dU[c].z;
|
|
||||||
*pOutDQV ++ = dV[c].x, *pOutDQV ++ = dV[c].y, *pOutDQV ++ = dV[c].z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export void
|
|
||||||
evalBilinearNoDerivative(uniform unsigned int bitField,
|
|
||||||
uniform int nPoint,
|
|
||||||
uniform const float * uniform u,
|
|
||||||
uniform const float * uniform v,
|
|
||||||
uniform const int * uniform vertexIndices,
|
|
||||||
uniform const BufferDescriptor &inDesc,
|
|
||||||
uniform const float * uniform inQ,
|
|
||||||
uniform const BufferDescriptor &outDesc,
|
|
||||||
uniform float *uniform outQ)
|
|
||||||
{
|
|
||||||
uniform int nChannel = inDesc.length / 3;
|
|
||||||
assert(nChannel < MAX_CHANNEL);
|
|
||||||
|
|
||||||
uniform Point controlVertices[MAX_CHANNEL*4];
|
|
||||||
for(uniform int i=0; i<4; i++) {
|
|
||||||
uniform unsigned int id = vertexIndices[i];
|
|
||||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 4 + i;
|
|
||||||
controlVertices[offset].x = pVertex[0];
|
|
||||||
controlVertices[offset].y = pVertex[1];
|
|
||||||
controlVertices[offset].z = pVertex[2];
|
|
||||||
pVertex += 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach( n = 0 ... nPoint) {
|
|
||||||
float ou = 1.0f - u[n];
|
|
||||||
float ov = 1.0f - v[n];
|
|
||||||
float w[4] = { ov*ou, v[n]*ou, v[n]*u[n], ov*u[n] };
|
|
||||||
|
|
||||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
Point Q;
|
|
||||||
Q.x = Q.y = Q.z = 0.0;
|
|
||||||
for (uniform int i=0; i<4; ++i) {
|
|
||||||
Q = Q + w[i] * controlVertices[c * 4 + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export void
|
|
||||||
evalBSpline(uniform unsigned int bitField,
|
|
||||||
uniform int nPoint,
|
|
||||||
uniform const float * uniform u,
|
|
||||||
uniform const float * uniform v,
|
|
||||||
uniform const int * uniform vertexIndices,
|
|
||||||
uniform const BufferDescriptor &inDesc,
|
|
||||||
uniform const float * uniform inQ,
|
|
||||||
uniform const BufferDescriptor &outDesc,
|
|
||||||
uniform float *uniform outQ,
|
|
||||||
uniform const BufferDescriptor &duDesc,
|
|
||||||
uniform float *uniform outDQU,
|
|
||||||
uniform const BufferDescriptor &dvDesc,
|
|
||||||
uniform float *uniform outDQV)
|
|
||||||
{
|
|
||||||
uniform int nChannel = inDesc.length / 3;
|
|
||||||
assert(nChannel < MAX_CHANNEL);
|
|
||||||
|
|
||||||
uniform Point controlVertices[MAX_CHANNEL*16];
|
|
||||||
for(uniform int i=0; i<16; i++) {
|
|
||||||
uniform unsigned int id = vertexIndices[i];
|
|
||||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 16 + i;
|
|
||||||
controlVertices[offset].x = pVertex[0];
|
|
||||||
controlVertices[offset].y = pVertex[1];
|
|
||||||
controlVertices[offset].z = pVertex[2];
|
|
||||||
pVertex += 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uniform float dScale = (uniform float)(1 << getDepth(bitField));
|
|
||||||
|
|
||||||
uniform float frac = getParamFraction(bitField);
|
|
||||||
|
|
||||||
// top left corner
|
|
||||||
uniform float pu = (uniform float)getU(bitField)*frac;
|
|
||||||
uniform float pv = (uniform float)getV(bitField)*frac;
|
|
||||||
|
|
||||||
foreach( n = 0 ... nPoint) {
|
|
||||||
// normalize u,v coordinates
|
|
||||||
float s = (u[n] - pu) / frac;
|
|
||||||
float t = (v[n] - pv) / frac;
|
|
||||||
|
|
||||||
float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4];
|
|
||||||
|
|
||||||
getBSplineWeights(s, sWeights, dsWeights);
|
|
||||||
getBSplineWeights(t, tWeights, dtWeights);
|
|
||||||
|
|
||||||
adjustBoundaryWeights(bitField, sWeights, tWeights);
|
|
||||||
adjustBoundaryWeights(bitField, dsWeights, dtWeights);
|
|
||||||
|
|
||||||
float weight[16];
|
|
||||||
for (uniform int i = 0; i < 4; ++i) {
|
|
||||||
for (uniform int j = 0; j < 4; ++j) {
|
|
||||||
weight[4*i+j] = sWeights[j] * tWeights[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 16;
|
|
||||||
Point Q;
|
|
||||||
Q.x = Q.y = Q.z = 0.0;
|
|
||||||
for (uniform int i=0; i<16; ++i) {
|
|
||||||
Q = Q + weight[i] * controlVertices[offset + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
|
||||||
}
|
|
||||||
|
|
||||||
float derivS[16], derivT[16];
|
|
||||||
for (uniform int i = 0; i < 4; ++i) {
|
|
||||||
for (uniform int j = 0; j < 4; ++j) {
|
|
||||||
derivS[4*i+j] = dsWeights[j] * tWeights[i] * dScale;
|
|
||||||
derivT[4*i+j] = sWeights[j] * dtWeights[i] * dScale;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
float *pOutDQU = outDQU + duDesc.offset + n * duDesc.stride;
|
|
||||||
float *pOutDQV = outDQV + dvDesc.offset + n * dvDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 16;
|
|
||||||
Point DQU, DQV;
|
|
||||||
DQU.x = DQU.y = DQU.z = 0.0;
|
|
||||||
DQV.x = DQV.y = DQV.z = 0.0;
|
|
||||||
for (uniform int i=0; i<16; ++i) {
|
|
||||||
DQU = DQU + derivS[i] * controlVertices[offset + i];
|
|
||||||
DQV = DQV + derivT[i] * controlVertices[offset + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOutDQU ++ = DQU.x, *pOutDQU ++ = DQU.y, *pOutDQU ++ = DQU.z;
|
|
||||||
*pOutDQV ++ = DQV.x, *pOutDQV ++ = DQV.y, *pOutDQV ++ = DQV.z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export void
|
|
||||||
evalBSplineNoDerivative(uniform unsigned int bitField,
|
|
||||||
uniform int nPoint,
|
|
||||||
uniform const float * uniform u,
|
|
||||||
uniform const float * uniform v,
|
|
||||||
uniform const int * uniform vertexIndices,
|
|
||||||
uniform const BufferDescriptor &inDesc,
|
|
||||||
uniform const float * uniform inQ,
|
|
||||||
uniform const BufferDescriptor &outDesc,
|
|
||||||
uniform float *uniform outQ)
|
|
||||||
{
|
|
||||||
uniform int nChannel = inDesc.length / 3;
|
|
||||||
assert(nChannel < MAX_CHANNEL);
|
|
||||||
|
|
||||||
uniform Point controlVertices[MAX_CHANNEL*16];
|
|
||||||
for(uniform int i=0; i<16; i++) {
|
|
||||||
uniform unsigned int id = vertexIndices[i];
|
|
||||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 16 + i;
|
|
||||||
controlVertices[offset].x = pVertex[0];
|
|
||||||
controlVertices[offset].y = pVertex[1];
|
|
||||||
controlVertices[offset].z = pVertex[2];
|
|
||||||
pVertex += 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uniform float frac = getParamFraction(bitField);
|
|
||||||
|
|
||||||
// top left corner
|
|
||||||
uniform float pu = (uniform float)getU(bitField)*frac;
|
|
||||||
uniform float pv = (uniform float)getV(bitField)*frac;
|
|
||||||
|
|
||||||
foreach( n = 0 ... nPoint) {
|
|
||||||
// normalize u,v coordinates
|
|
||||||
float s = (u[n] - pu) / frac;
|
|
||||||
float t = (v[n] - pv) / frac;
|
|
||||||
|
|
||||||
float sWeights[4], tWeights[4];
|
|
||||||
|
|
||||||
getBSplineWeightsNoDerivative(s, sWeights);
|
|
||||||
getBSplineWeightsNoDerivative(t, tWeights);
|
|
||||||
|
|
||||||
adjustBoundaryWeights(bitField, sWeights, tWeights);
|
|
||||||
|
|
||||||
float weight[16];
|
|
||||||
for (uniform int i = 0; i < 4; ++i) {
|
|
||||||
for (uniform int j = 0; j < 4; ++j) {
|
|
||||||
weight[4*i+j] = sWeights[j] * tWeights[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 16;
|
|
||||||
Point Q;
|
|
||||||
Q.x = Q.y = Q.z = 0.0;
|
|
||||||
for (uniform int i=0; i<16; ++i) {
|
|
||||||
Q = Q + weight[i] * controlVertices[offset + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void getGregoryWeights(uniform unsigned int bitField,
|
|
||||||
float s, float t, float point[20], float deriv1[20], float deriv2[20]) {
|
|
||||||
//
|
|
||||||
// P3 e3- e2+ P2
|
|
||||||
// 15------17-------11--------10
|
|
||||||
// | | | |
|
|
||||||
// | | | |
|
|
||||||
// | | f3- | f2+ |
|
|
||||||
// | 19 13 |
|
|
||||||
// e3+ 16-----18 14-----12 e2-
|
|
||||||
// | f3+ f2- |
|
|
||||||
// | |
|
|
||||||
// | |
|
|
||||||
// | f0- f1+ |
|
|
||||||
// e0- 2------4 8------6 e1+
|
|
||||||
// | 3 9 |
|
|
||||||
// | | f0+ | f1- |
|
|
||||||
// | | | |
|
|
||||||
// | | | |
|
|
||||||
// O--------1--------7--------5
|
|
||||||
// P0 e0+ e1- P1
|
|
||||||
//
|
|
||||||
|
|
||||||
// Indices of boundary and interior points and their corresponding Bezier points
|
|
||||||
// (this can be reduced with more direct indexing and unrolling of loops):
|
|
||||||
//
|
|
||||||
static uniform int const boundaryGregory[12] = { 0, 1, 7, 5, 2, 6, 16, 12, 15, 17, 11, 10 };
|
|
||||||
static uniform int const boundaryBezSCol[12] = { 0, 1, 2, 3, 0, 3, 0, 3, 0, 1, 2, 3 };
|
|
||||||
static uniform int const boundaryBezTRow[12] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 };
|
|
||||||
|
|
||||||
static uniform int const interiorGregory[8] = { 3, 4, 8, 9, 13, 14, 18, 19 };
|
|
||||||
static uniform int const interiorBezSCol[8] = { 1, 1, 2, 2, 2, 2, 1, 1 };
|
|
||||||
static uniform int const interiorBezTRow[8] = { 1, 1, 1, 1, 2, 2, 2, 2 };
|
|
||||||
|
|
||||||
//
|
|
||||||
// Bezier basis functions are denoted with B while the rational multipliers for the
|
|
||||||
// interior points will be denoted G -- so we have B(s), B(t) and G(s,t):
|
|
||||||
//
|
|
||||||
// Directional Bezier basis functions B at s and t:
|
|
||||||
float Bs[4], Bds[4];
|
|
||||||
float Bt[4], Bdt[4];
|
|
||||||
|
|
||||||
getBezierWeights(s, Bs, Bds);
|
|
||||||
getBezierWeights(t, Bt, Bdt);
|
|
||||||
|
|
||||||
// Rational multipliers G at s and t:
|
|
||||||
float sC = 1.0f - s;
|
|
||||||
float tC = 1.0f - t;
|
|
||||||
|
|
||||||
// Use <= here to avoid compiler warnings -- the sums should always be non-negative:
|
|
||||||
float df0 = s + t; df0 = (df0 <= 0.0f) ? 1.0f : (1.0f / df0);
|
|
||||||
float df1 = sC + t; df1 = (df1 <= 0.0f) ? 1.0f : (1.0f / df1);
|
|
||||||
float df2 = sC + tC; df2 = (df2 <= 0.0f) ? 1.0f : (1.0f / df2);
|
|
||||||
float df3 = s + tC; df3 = (df3 <= 0.0f) ? 1.0f : (1.0f / df3);
|
|
||||||
|
|
||||||
float G[8] = { s*df0, t*df0, t*df1, sC*df1, sC*df2, tC*df2, tC*df3, s*df3 };
|
|
||||||
|
|
||||||
// Combined weights for boundary and interior points:
|
|
||||||
for (uniform int i = 0; i < 12; ++i) {
|
|
||||||
point[boundaryGregory[i]] = Bs[boundaryBezSCol[i]] * Bt[boundaryBezTRow[i]];
|
|
||||||
}
|
|
||||||
for (uniform int i = 0; i < 8; ++i) {
|
|
||||||
point[interiorGregory[i]] = Bs[interiorBezSCol[i]] * Bt[interiorBezTRow[i]] * G[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// For derivatives, the basis functions for the interior points are rational and ideally
|
|
||||||
// require appropriate differentiation, i.e. product rule for the combination of B and G
|
|
||||||
// and the quotient rule for the rational G itself. As initially proposed by Loop et al
|
|
||||||
// though, the approximation using the 16 Bezier points arising from the G(s,t) has
|
|
||||||
// proved adequate (and is what the GPU shaders use) so we continue to use that here.
|
|
||||||
//
|
|
||||||
// An implementation of the true derivatives is provided for future reference -- it is
|
|
||||||
// unclear if the approximations will hold up under surface analysis involving higher
|
|
||||||
// order differentiation.
|
|
||||||
//
|
|
||||||
|
|
||||||
// Remember to include derivative scaling in all assignments below:
|
|
||||||
uniform float dScale = (uniform float)(1 << getDepth(bitField));
|
|
||||||
|
|
||||||
// Combined weights for boundary points -- simple (scaled) tensor products:
|
|
||||||
for (uniform int i = 0; i < 12; ++i) {
|
|
||||||
uniform int iDst = boundaryGregory[i];
|
|
||||||
uniform int tRow = boundaryBezTRow[i];
|
|
||||||
uniform int sCol = boundaryBezSCol[i];
|
|
||||||
|
|
||||||
deriv1[iDst] = Bds[sCol] * Bt[tRow] * dScale;
|
|
||||||
deriv2[iDst] = Bdt[tRow] * Bs[sCol] * dScale;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define _USE_BEZIER_PSEUDO_DERIVATIVES
|
|
||||||
#ifdef _USE_BEZIER_PSEUDO_DERIVATIVES
|
|
||||||
// Approximation to the true Gregory derivatives by differentiating the Bezier patch
|
|
||||||
// unique to the given (s,t), i.e. having F = (g^+ * f^+) + (g^- * f^-) as its four
|
|
||||||
// interior points:
|
|
||||||
//
|
|
||||||
// Combined weights for interior points -- (scaled) tensor products with G+ or G-:
|
|
||||||
for (uniform int i = 0; i < 8; ++i) {
|
|
||||||
uniform int iDst = interiorGregory[i];
|
|
||||||
uniform int tRow = interiorBezTRow[i];
|
|
||||||
uniform int sCol = interiorBezSCol[i];
|
|
||||||
deriv1[iDst] = Bds[sCol] * Bt[tRow] * G[i] * dScale;
|
|
||||||
deriv2[iDst] = Bdt[tRow] * Bs[sCol] * G[i] * dScale;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// True Gregory derivatives using appropriate differentiation of composite functions:
|
|
||||||
//
|
|
||||||
// Note that for G(s,t) = N(s,t) / D(s,t), all N' and D' are trivial constants (which
|
|
||||||
// simplifies things for higher order derivatives). And while each pair of functions
|
|
||||||
// G (i.e. the G+ and G- corresponding to points f+ and f-) must sum to 1 to ensure
|
|
||||||
// Bezier equivalence (when f+ = f-), the pairs of G' must similarly sum to 0. So we
|
|
||||||
// can potentially compute only one of the pair and negate the result for the other
|
|
||||||
// (and with 4 or 8 computations involving these constants, this is all very SIMD
|
|
||||||
// friendly...) but for now we treat all 8 independently for simplicity.
|
|
||||||
//
|
|
||||||
//float N[8] = { s, t, t, sC, sC, tC, tC, s };
|
|
||||||
uniform float D[8] = { df0, df0, df1, df1, df2, df2, df3, df3 };
|
|
||||||
|
|
||||||
static uniform float const Nds[8] = { 1.0f, 0.0f, 0.0f, -1.0f, -1.0f, 0.0f, 0.0f, 1.0f };
|
|
||||||
static uniform float const Ndt[8] = { 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, -1.0f, -1.0f, 0.0f };
|
|
||||||
|
|
||||||
static uniform float const Dds[8] = { 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f };
|
|
||||||
static uniform float const Ddt[8] = { 1.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f };
|
|
||||||
|
|
||||||
// Combined weights for interior points -- (scaled) combinations of B, B', G and G':
|
|
||||||
for (uniform int i = 0; i < 8; ++i) {
|
|
||||||
uniform int iDst = interiorGregory[i];
|
|
||||||
uniform int tRow = interiorBezTRow[i];
|
|
||||||
uniform int sCol = interiorBezSCol[i];
|
|
||||||
|
|
||||||
// Quotient rule for G' (re-expressed in terms of G to simplify (and D = 1/D)):
|
|
||||||
float Gds = (Nds[i] - Dds[i] * G[i]) * D[i];
|
|
||||||
float Gdt = (Ndt[i] - Ddt[i] * G[i]) * D[i];
|
|
||||||
|
|
||||||
// Product rule combining B and B' with G and G' (and scaled):
|
|
||||||
deriv1[iDst] = (Bds[sCol] * G[i] + Bs[sCol] * Gds) * Bt[tRow] * dScale;
|
|
||||||
deriv2[iDst] = (Bdt[tRow] * G[i] + Bt[tRow] * Gdt) * Bs[sCol] * dScale;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void getGregoryWeightsNoDerivative(uniform unsigned int bitField, float s, float t, float point[20]) {
|
|
||||||
//
|
|
||||||
// P3 e3- e2+ P2
|
|
||||||
// 15------17-------11--------10
|
|
||||||
// | | | |
|
|
||||||
// | | | |
|
|
||||||
// | | f3- | f2+ |
|
|
||||||
// | 19 13 |
|
|
||||||
// e3+ 16-----18 14-----12 e2-
|
|
||||||
// | f3+ f2- |
|
|
||||||
// | |
|
|
||||||
// | |
|
|
||||||
// | f0- f1+ |
|
|
||||||
// e0- 2------4 8------6 e1+
|
|
||||||
// | 3 9 |
|
|
||||||
// | | f0+ | f1- |
|
|
||||||
// | | | |
|
|
||||||
// | | | |
|
|
||||||
// O--------1--------7--------5
|
|
||||||
// P0 e0+ e1- P1
|
|
||||||
//
|
|
||||||
|
|
||||||
// Indices of boundary and interior points and their corresponding Bezier points
|
|
||||||
// (this can be reduced with more direct indexing and unrolling of loops):
|
|
||||||
//
|
|
||||||
static uniform int const boundaryGregory[12] = { 0, 1, 7, 5, 2, 6, 16, 12, 15, 17, 11, 10 };
|
|
||||||
static uniform int const boundaryBezSCol[12] = { 0, 1, 2, 3, 0, 3, 0, 3, 0, 1, 2, 3 };
|
|
||||||
static uniform int const boundaryBezTRow[12] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 };
|
|
||||||
|
|
||||||
static uniform int const interiorGregory[8] = { 3, 4, 8, 9, 13, 14, 18, 19 };
|
|
||||||
static uniform int const interiorBezSCol[8] = { 1, 1, 2, 2, 2, 2, 1, 1 };
|
|
||||||
static uniform int const interiorBezTRow[8] = { 1, 1, 1, 1, 2, 2, 2, 2 };
|
|
||||||
|
|
||||||
//
|
|
||||||
// Bezier basis functions are denoted with B while the rational multipliers for the
|
|
||||||
// interior points will be denoted G -- so we have B(s), B(t) and G(s,t):
|
|
||||||
//
|
|
||||||
// Directional Bezier basis functions B at s and t:
|
|
||||||
float Bs[4];
|
|
||||||
float Bt[4];
|
|
||||||
|
|
||||||
getBezierWeightsNoDerivative(s, Bs);
|
|
||||||
getBezierWeightsNoDerivative(t, Bt);
|
|
||||||
|
|
||||||
// Rational multipliers G at s and t:
|
|
||||||
float sC = 1.0f - s;
|
|
||||||
float tC = 1.0f - t;
|
|
||||||
|
|
||||||
// Use <= here to avoid compiler warnings -- the sums should always be non-negative:
|
|
||||||
float df0 = s + t; df0 = (df0 <= 0.0f) ? 1.0f : (1.0f / df0);
|
|
||||||
float df1 = sC + t; df1 = (df1 <= 0.0f) ? 1.0f : (1.0f / df1);
|
|
||||||
float df2 = sC + tC; df2 = (df2 <= 0.0f) ? 1.0f : (1.0f / df2);
|
|
||||||
float df3 = s + tC; df3 = (df3 <= 0.0f) ? 1.0f : (1.0f / df3);
|
|
||||||
|
|
||||||
float G[8] = { s*df0, t*df0, t*df1, sC*df1, sC*df2, tC*df2, tC*df3, s*df3 };
|
|
||||||
|
|
||||||
// Combined weights for boundary and interior points:
|
|
||||||
for (uniform int i = 0; i < 12; ++i) {
|
|
||||||
point[boundaryGregory[i]] = Bs[boundaryBezSCol[i]] * Bt[boundaryBezTRow[i]];
|
|
||||||
}
|
|
||||||
for (uniform int i = 0; i < 8; ++i) {
|
|
||||||
point[interiorGregory[i]] = Bs[interiorBezSCol[i]] * Bt[interiorBezTRow[i]] * G[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export void
|
|
||||||
evalGregory(uniform unsigned int bitField,
|
|
||||||
uniform int nPoint,
|
|
||||||
uniform float u[],
|
|
||||||
uniform float v[],
|
|
||||||
uniform const unsigned int vertexIndices[],
|
|
||||||
uniform const BufferDescriptor &inDesc,
|
|
||||||
uniform const float inQ[],
|
|
||||||
uniform const BufferDescriptor &outDesc,
|
|
||||||
uniform float outQ[],
|
|
||||||
uniform const BufferDescriptor &duDesc,
|
|
||||||
uniform float outDQU[],
|
|
||||||
uniform const BufferDescriptor &dvDesc,
|
|
||||||
uniform float outDQV[])
|
|
||||||
{
|
|
||||||
uniform int nChannel = inDesc.length / 3;
|
|
||||||
assert(nChannel < MAX_CHANNEL);
|
|
||||||
|
|
||||||
uniform Point controlVertices[MAX_CHANNEL*20];
|
|
||||||
for(uniform int i=0; i<20; i++) {
|
|
||||||
uniform unsigned int id = vertexIndices[i];
|
|
||||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 20 + i;
|
|
||||||
controlVertices[offset].x = pVertex[0];
|
|
||||||
controlVertices[offset].y = pVertex[1];
|
|
||||||
controlVertices[offset].z = pVertex[2];
|
|
||||||
pVertex += 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uniform float frac = getParamFraction(bitField);
|
|
||||||
|
|
||||||
// top left corner
|
|
||||||
uniform float pu = (uniform float)getU(bitField)*frac;
|
|
||||||
uniform float pv = (uniform float)getV(bitField)*frac;
|
|
||||||
|
|
||||||
foreach( n = 0 ... nPoint) {
|
|
||||||
// normalize u,v coordinates
|
|
||||||
float s = (u[n] - pu) / frac;
|
|
||||||
float t = (v[n] - pv) / frac;
|
|
||||||
|
|
||||||
float point[20], deriv1[20], deriv2[20];
|
|
||||||
getGregoryWeights(bitField, s, t, point, deriv1, deriv2);
|
|
||||||
|
|
||||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 16;
|
|
||||||
Point Q;
|
|
||||||
Q.x = Q.y = Q.z = 0.0;
|
|
||||||
for (uniform int i=0; i<16; ++i) {
|
|
||||||
Q = Q + point[i] * controlVertices[offset + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
|
||||||
}
|
|
||||||
|
|
||||||
float *pOutDQU = outDQU + duDesc.offset + n * duDesc.stride;
|
|
||||||
float *pOutDQV = outDQV + dvDesc.offset + n * dvDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 20;
|
|
||||||
Point DQU, DQV;
|
|
||||||
DQU.x = DQU.y = DQU.z = 0.0;
|
|
||||||
DQV.x = DQV.y = DQV.z = 0.0;
|
|
||||||
for (uniform int i=0; i<20; ++i) {
|
|
||||||
DQU = DQU + deriv1[i] * controlVertices[offset + i];
|
|
||||||
DQV = DQV + deriv2[i] * controlVertices[offset + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOutDQU ++ = DQU.x, *pOutDQU ++ = DQU.y, *pOutDQU ++ = DQU.z;
|
|
||||||
*pOutDQV ++ = DQV.x, *pOutDQV ++ = DQV.y, *pOutDQV ++ = DQV.z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export void
|
|
||||||
evalGregoryNoDerivative(uniform unsigned int bitField,
|
|
||||||
uniform int nPoint,
|
|
||||||
uniform float u[],
|
|
||||||
uniform float v[],
|
|
||||||
uniform const unsigned int vertexIndices[],
|
|
||||||
uniform const BufferDescriptor &inDesc,
|
|
||||||
uniform const float inQ[],
|
|
||||||
uniform const BufferDescriptor &outDesc,
|
|
||||||
uniform float outQ[]
|
|
||||||
)
|
|
||||||
{
|
|
||||||
uniform int nChannel = inDesc.length / 3;
|
|
||||||
assert(nChannel < MAX_CHANNEL);
|
|
||||||
|
|
||||||
uniform Point controlVertices[MAX_CHANNEL*20];
|
|
||||||
for(uniform int i=0; i<20; i++) {
|
|
||||||
uniform unsigned int id = vertexIndices[i];
|
|
||||||
uniform const float * uniform pVertex = inQ + inDesc.offset + id * inDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 20 + i;
|
|
||||||
controlVertices[offset].x = pVertex[0];
|
|
||||||
controlVertices[offset].y = pVertex[1];
|
|
||||||
controlVertices[offset].z = pVertex[2];
|
|
||||||
pVertex += 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uniform float frac = getParamFraction(bitField);
|
|
||||||
|
|
||||||
// top left corner
|
|
||||||
uniform float pu = (uniform float)getU(bitField)*frac;
|
|
||||||
uniform float pv = (uniform float)getV(bitField)*frac;
|
|
||||||
|
|
||||||
foreach( n = 0 ... nPoint) {
|
|
||||||
// normalize u,v coordinates
|
|
||||||
float s = (u[n] - pu) / frac;
|
|
||||||
float t = (v[n] - pv) / frac;
|
|
||||||
|
|
||||||
float point[20];
|
|
||||||
getGregoryWeightsNoDerivative(bitField, s, t, point);
|
|
||||||
|
|
||||||
float *pOutQ = outQ + outDesc.offset + n * outDesc.stride;
|
|
||||||
for(uniform int c=0; c<nChannel; c++) {
|
|
||||||
uniform int offset = c * 20;
|
|
||||||
Point Q;
|
|
||||||
Q.x = Q.y = Q.z = 0.0;
|
|
||||||
for (uniform int i=0; i<20; ++i) {
|
|
||||||
Q = Q + point[i] * controlVertices[offset + i];
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOutQ ++ = Q.x, *pOutQ ++ = Q.y, *pOutQ ++ = Q.z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,55 +0,0 @@
|
|||||||
//
|
|
||||||
// ispcEvalLimitKernel.isph
|
|
||||||
// (Header automatically generated by the ispc compiler.)
|
|
||||||
// DO NOT EDIT THIS FILE.
|
|
||||||
//
|
|
||||||
|
|
||||||
#ifndef ISPC_ISPCEVALLIMITKERNEL_ISPH
|
|
||||||
#define ISPC_ISPCEVALLIMITKERNEL_ISPH
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
namespace ispc { /* namespace */
|
|
||||||
#endif // __cplusplus
|
|
||||||
#ifndef __ISPC_STRUCT_BufferDescriptor__
|
|
||||||
#define __ISPC_STRUCT_BufferDescriptor__
|
|
||||||
struct BufferDescriptor {
|
|
||||||
int32_t offset;
|
|
||||||
int32_t length;
|
|
||||||
int32_t stride;
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
|
||||||
// Functions exported from ispc code
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
|
||||||
#if defined(__cplusplus) && !defined(__ISPC_NO_EXTERN_C)
|
|
||||||
extern "C" {
|
|
||||||
#endif // __cplusplus
|
|
||||||
extern void evalBSpline(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ, const struct BufferDescriptor &duDesc, float * outDQU, const struct BufferDescriptor &dvDesc, float * outDQV);
|
|
||||||
|
|
||||||
extern void evalBilinear(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ, const struct BufferDescriptor &duDesc, float * outDQU, const struct BufferDescriptor &dvDesc, float * outDQV);
|
|
||||||
|
|
||||||
extern void evalGregory(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ, const struct BufferDescriptor &duDesc, float * outDQU, const struct BufferDescriptor &dvDesc, float * outDQV);
|
|
||||||
|
|
||||||
extern void evalBSplineNoDerivative(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ);
|
|
||||||
|
|
||||||
extern void evalBilinearNoDerivative(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ);
|
|
||||||
|
|
||||||
extern void evalGregoryNoDerivative(int32_t bitField, int32_t nPoint, const float * u, const float * v, const int32_t * vertexIndices, const struct BufferDescriptor &inDesc, const float * inQ, const struct BufferDescriptor &outDesc, float * outQ);
|
|
||||||
|
|
||||||
extern void getSIMDWidth(int32_t &simdWidth);
|
|
||||||
#if defined(__cplusplus) && !defined(__ISPC_NO_EXTERN_C)
|
|
||||||
} /* end extern C */
|
|
||||||
#endif // __cplusplus
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
} /* namespace */
|
|
||||||
#endif // __cplusplus
|
|
||||||
|
|
||||||
#endif // ISPC_ISPCEVALLIMITKERNEL_ISPH
|
|
@ -1,289 +0,0 @@
|
|||||||
//
|
|
||||||
// Copyright 2015 Pixar
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
|
||||||
// with the following modification; you may not use this file except in
|
|
||||||
// compliance with the Apache License and the following modification to it:
|
|
||||||
// Section 6. Trademarks. is deleted and replaced with:
|
|
||||||
//
|
|
||||||
// 6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
// names, trademarks, service marks, or product names of the Licensor
|
|
||||||
// and its affiliates, except as required to comply with Section 4(c) of
|
|
||||||
// the License and to reproduce the content of the NOTICE file.
|
|
||||||
//
|
|
||||||
// You may obtain a copy of the Apache License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the Apache License with the above modification is
|
|
||||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
// KIND, either express or implied. See the Apache License for the specific
|
|
||||||
// language governing permissions and limitations under the Apache License.
|
|
||||||
//
|
|
||||||
|
|
||||||
#include "ispcEvaluator.h"
|
|
||||||
#include "cpuKernel.h"
|
|
||||||
#include "../far/patchBasis.h"
|
|
||||||
#include "ispcEvalLimitKernel.isph"
|
|
||||||
|
|
||||||
#include <tbb/parallel_for.h>
|
|
||||||
#include <cstdlib>
|
|
||||||
|
|
||||||
namespace OpenSubdiv {
|
|
||||||
namespace OPENSUBDIV_VERSION {
|
|
||||||
|
|
||||||
namespace Osd {
|
|
||||||
|
|
||||||
#define grain_size 512
|
|
||||||
|
|
||||||
/* static */
|
|
||||||
bool
|
|
||||||
IspcEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
|
||||||
float *dst, BufferDescriptor const &dstDesc,
|
|
||||||
const int * sizes,
|
|
||||||
const int * offsets,
|
|
||||||
const int * indices,
|
|
||||||
const float * weights,
|
|
||||||
int start, int end) {
|
|
||||||
|
|
||||||
if (end <= start) return true;
|
|
||||||
if (srcDesc.length != dstDesc.length) return false;
|
|
||||||
|
|
||||||
// XXX: we can probably expand cpuKernel.cpp to here.
|
|
||||||
CpuEvalStencils(src, srcDesc, dst, dstDesc,
|
|
||||||
sizes, offsets, indices, weights, start, end);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */
|
|
||||||
bool
|
|
||||||
IspcEvaluator::EvalStencils(const float *src, BufferDescriptor const &srcDesc,
|
|
||||||
float *dst, BufferDescriptor const &dstDesc,
|
|
||||||
float *du, BufferDescriptor const &duDesc,
|
|
||||||
float *dv, BufferDescriptor const &dvDesc,
|
|
||||||
const int * sizes,
|
|
||||||
const int * offsets,
|
|
||||||
const int * indices,
|
|
||||||
const float * weights,
|
|
||||||
const float * duWeights,
|
|
||||||
const float * dvWeights,
|
|
||||||
int start, int end) {
|
|
||||||
if (end <= start) return true;
|
|
||||||
if (srcDesc.length != dstDesc.length) return false;
|
|
||||||
if (srcDesc.length != duDesc.length) return false;
|
|
||||||
if (srcDesc.length != dvDesc.length) return false;
|
|
||||||
|
|
||||||
CpuEvalStencils(src, srcDesc,
|
|
||||||
dst, dstDesc,
|
|
||||||
du, duDesc,
|
|
||||||
dv, dvDesc,
|
|
||||||
sizes, offsets, indices,
|
|
||||||
weights, duWeights, dvWeights,
|
|
||||||
start, end);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
struct BufferAdapter {
|
|
||||||
BufferAdapter(T *p, int length, int stride) :
|
|
||||||
_p(p), _length(length), _stride(stride) { }
|
|
||||||
void Clear() {
|
|
||||||
for (int i = 0; i < _length; ++i) _p[i] = 0;
|
|
||||||
}
|
|
||||||
void AddWithWeight(T const *src, float w) {
|
|
||||||
if (_p) {
|
|
||||||
for (int i = 0; i < _length; ++i) {
|
|
||||||
_p[i] += src[i] * w;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const T *operator[] (int index) const {
|
|
||||||
return _p + _stride * index;
|
|
||||||
}
|
|
||||||
BufferAdapter<T> & operator ++() {
|
|
||||||
if (_p) {
|
|
||||||
_p += _stride;
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
T *_p;
|
|
||||||
int _length;
|
|
||||||
int _stride;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* static */
|
|
||||||
bool
|
|
||||||
IspcEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc,
|
|
||||||
float *dst, BufferDescriptor const &dstDesc,
|
|
||||||
int numPatchCoords,
|
|
||||||
const PatchCoord *patchCoords,
|
|
||||||
const PatchArray *patchArrays,
|
|
||||||
const int *patchIndexBuffer,
|
|
||||||
const PatchParam *patchParamBuffer) {
|
|
||||||
if (srcDesc.length != dstDesc.length) return false;
|
|
||||||
|
|
||||||
// Copy BufferDescriptor to ispc version
|
|
||||||
// Since memory alignment in ISPC may be different from C++,
|
|
||||||
// we use the assignment for each field instead of the assignment for
|
|
||||||
// the whole struct
|
|
||||||
ispc::BufferDescriptor ispcSrcDesc;
|
|
||||||
ispcSrcDesc.offset = srcDesc.offset;
|
|
||||||
ispcSrcDesc.length = srcDesc.length;
|
|
||||||
ispcSrcDesc.stride = srcDesc.stride;
|
|
||||||
|
|
||||||
tbb::blocked_range<int> range = tbb::blocked_range<int>(0, numPatchCoords, grain_size);
|
|
||||||
tbb::parallel_for(range, [&](const tbb::blocked_range<int> &r)
|
|
||||||
{
|
|
||||||
uint i = r.begin();
|
|
||||||
|
|
||||||
ispc::BufferDescriptor ispcDstDesc, ispcDuDesc, ispcDvDesc;
|
|
||||||
ispcDstDesc.offset = dstDesc.offset + dstDesc.offset + i * dstDesc.stride;
|
|
||||||
ispcDstDesc.length = dstDesc.length;
|
|
||||||
ispcDstDesc.stride = dstDesc.stride;
|
|
||||||
|
|
||||||
while (i < r.end()) {
|
|
||||||
// the patch coordinates are sorted by patch handle
|
|
||||||
// the following code searches the coordinates that
|
|
||||||
// belongs to the same patch so that they can be evalauated
|
|
||||||
// with ISPC
|
|
||||||
int nCoord = 1;
|
|
||||||
Far::PatchTable::PatchHandle handle = patchCoords[i].handle;
|
|
||||||
while(i + nCoord < r.end() &&
|
|
||||||
handle.isEqual(patchCoords[i + nCoord].handle) )
|
|
||||||
nCoord ++;
|
|
||||||
|
|
||||||
PatchArray const &array = patchArrays[handle.arrayIndex];
|
|
||||||
int patchType = array.GetPatchType();
|
|
||||||
Far::PatchParam const & param = patchParamBuffer[handle.patchIndex];
|
|
||||||
|
|
||||||
unsigned int bitField = param.field1;
|
|
||||||
|
|
||||||
const int *cvs = &patchIndexBuffer[array.indexBase + handle.vertIndex];
|
|
||||||
|
|
||||||
__declspec( align(64) ) float u[nCoord];
|
|
||||||
__declspec( align(64) ) float v[nCoord];
|
|
||||||
|
|
||||||
for(int n=0; n<nCoord; n++) {
|
|
||||||
u[n] = patchCoords[i + n].s;
|
|
||||||
v[n] = patchCoords[i + n].t;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
|
||||||
ispc::evalBSplineNoDerivative(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
|
||||||
ispcDstDesc, dst);
|
|
||||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
|
||||||
ispc::evalGregoryNoDerivative(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
|
||||||
ispcDstDesc, dst);
|
|
||||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
|
||||||
ispc::evalBilinearNoDerivative(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
|
||||||
ispcDstDesc, dst);
|
|
||||||
} else {
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
i += nCoord;
|
|
||||||
ispcDstDesc.offset = dstDesc.offset + i * dstDesc.stride;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* static */
|
|
||||||
bool
|
|
||||||
IspcEvaluator::EvalPatches(const float *src, BufferDescriptor const &srcDesc,
|
|
||||||
float *dst, BufferDescriptor const &dstDesc,
|
|
||||||
float *du, BufferDescriptor const &duDesc,
|
|
||||||
float *dv, BufferDescriptor const &dvDesc,
|
|
||||||
int numPatchCoords,
|
|
||||||
const PatchCoord *patchCoords,
|
|
||||||
const PatchArray *patchArrays,
|
|
||||||
const int *patchIndexBuffer,
|
|
||||||
const PatchParam *patchParamBuffer) {
|
|
||||||
if (srcDesc.length != dstDesc.length) return false;
|
|
||||||
|
|
||||||
// Copy BufferDescriptor to ispc version
|
|
||||||
// Since memory alignment in ISPC may be different from C++,
|
|
||||||
// we use the assignment for each field instead of the assignment for
|
|
||||||
// the whole struct
|
|
||||||
ispc::BufferDescriptor ispcSrcDesc;
|
|
||||||
ispcSrcDesc.offset = srcDesc.offset;
|
|
||||||
ispcSrcDesc.length = srcDesc.length;
|
|
||||||
ispcSrcDesc.stride = srcDesc.stride;
|
|
||||||
|
|
||||||
tbb::blocked_range<int> range = tbb::blocked_range<int>(0, numPatchCoords, grain_size);
|
|
||||||
tbb::parallel_for(range, [&](const tbb::blocked_range<int> &r)
|
|
||||||
{
|
|
||||||
uint i = r.begin();
|
|
||||||
|
|
||||||
ispc::BufferDescriptor ispcDstDesc, ispcDuDesc, ispcDvDesc;
|
|
||||||
ispcDstDesc.offset = dstDesc.offset + dstDesc.offset + i * dstDesc.stride;
|
|
||||||
ispcDstDesc.length = dstDesc.length;
|
|
||||||
ispcDstDesc.stride = dstDesc.stride;
|
|
||||||
|
|
||||||
ispcDuDesc.offset = duDesc.offset + i * duDesc.stride;
|
|
||||||
ispcDuDesc.length = duDesc.length;
|
|
||||||
ispcDuDesc.stride = duDesc.stride;
|
|
||||||
|
|
||||||
ispcDvDesc.offset = dvDesc.offset + i * dvDesc.stride;
|
|
||||||
ispcDvDesc.length = dvDesc.length;
|
|
||||||
ispcDvDesc.stride = dvDesc.stride;
|
|
||||||
while (i < r.end()) {
|
|
||||||
// the patch coordinates are sorted by patch handle
|
|
||||||
// the following code searches the coordinates that
|
|
||||||
// belongs to the same patch so that they can be evalauated
|
|
||||||
// with ISPC
|
|
||||||
int nCoord = 1;
|
|
||||||
Far::PatchTable::PatchHandle handle = patchCoords[i].handle;
|
|
||||||
while(i + nCoord < r.end() &&
|
|
||||||
handle.isEqual(patchCoords[i + nCoord].handle) )
|
|
||||||
nCoord ++;
|
|
||||||
|
|
||||||
PatchArray const &array = patchArrays[handle.arrayIndex];
|
|
||||||
int patchType = array.GetPatchType();
|
|
||||||
Far::PatchParam const & param = patchParamBuffer[handle.patchIndex];
|
|
||||||
|
|
||||||
unsigned int bitField = param.field1;
|
|
||||||
|
|
||||||
const int *cvs = &patchIndexBuffer[array.indexBase + handle.vertIndex];
|
|
||||||
|
|
||||||
__declspec( align(64) ) float u[nCoord];
|
|
||||||
__declspec( align(64) ) float v[nCoord];
|
|
||||||
|
|
||||||
for(int n=0; n<nCoord; n++) {
|
|
||||||
u[n] = patchCoords[i + n].s;
|
|
||||||
v[n] = patchCoords[i + n].t;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (patchType == Far::PatchDescriptor::REGULAR) {
|
|
||||||
ispc::evalBSpline(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
|
||||||
ispcDstDesc, dst, ispcDuDesc, du, ispcDvDesc, dv);
|
|
||||||
} else if (patchType == Far::PatchDescriptor::GREGORY_BASIS) {
|
|
||||||
ispc::evalGregory(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
|
||||||
ispcDstDesc, dst, ispcDuDesc, du, ispcDvDesc, dv);
|
|
||||||
} else if (patchType == Far::PatchDescriptor::QUADS) {
|
|
||||||
ispc::evalBilinear(bitField, nCoord, u, v, cvs, ispcSrcDesc, src,
|
|
||||||
ispcDstDesc, dst, ispcDuDesc, du, ispcDvDesc, dv);
|
|
||||||
} else {
|
|
||||||
assert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
i += nCoord;
|
|
||||||
ispcDstDesc.offset = dstDesc.offset + i * dstDesc.stride;
|
|
||||||
ispcDuDesc.offset = duDesc.offset + i * duDesc.stride;
|
|
||||||
ispcDvDesc.offset = dvDesc.offset + i * dvDesc.stride;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} // end namespace Osd
|
|
||||||
|
|
||||||
} // end namespace OPENSUBDIV_VERSION
|
|
||||||
} // end namespace OpenSubdiv
|
|
@ -1,482 +0,0 @@
|
|||||||
//
|
|
||||||
// Copyright 2015 Pixar
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
|
||||||
// with the following modification; you may not use this file except in
|
|
||||||
// compliance with the Apache License and the following modification to it:
|
|
||||||
// Section 6. Trademarks. is deleted and replaced with:
|
|
||||||
//
|
|
||||||
// 6. Trademarks. This License does not grant permission to use the trade
|
|
||||||
// names, trademarks, service marks, or product names of the Licensor
|
|
||||||
// and its affiliates, except as required to comply with Section 4(c) of
|
|
||||||
// the License and to reproduce the content of the NOTICE file.
|
|
||||||
//
|
|
||||||
// You may obtain a copy of the Apache License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the Apache License with the above modification is
|
|
||||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
// KIND, either express or implied. See the Apache License for the specific
|
|
||||||
// language governing permissions and limitations under the Apache License.
|
|
||||||
//
|
|
||||||
|
|
||||||
#ifndef OPENSUBDIV3_OSD_ISPC_EVALUATOR_H
|
|
||||||
#define OPENSUBDIV3_OSD_ISPC_EVALUATOR_H
|
|
||||||
|
|
||||||
#include "../version.h"
|
|
||||||
|
|
||||||
#include <cstddef>
|
|
||||||
#include <vector>
|
|
||||||
#include "../osd/bufferDescriptor.h"
|
|
||||||
#include "../osd/types.h"
|
|
||||||
|
|
||||||
namespace OpenSubdiv {
|
|
||||||
namespace OPENSUBDIV_VERSION {
|
|
||||||
|
|
||||||
namespace Osd {
|
|
||||||
|
|
||||||
class IspcEvaluator {
|
|
||||||
public:
|
|
||||||
/// ----------------------------------------------------------------------
|
|
||||||
///
|
|
||||||
/// Stencil evaluations with StencilTable
|
|
||||||
///
|
|
||||||
/// ----------------------------------------------------------------------
|
|
||||||
|
|
||||||
/// \brief Generic static eval stencils function. This function has a same
|
|
||||||
/// signature as other device kernels have so that it can be called
|
|
||||||
/// in the same way from OsdMesh template interface.
|
|
||||||
///
|
|
||||||
/// @param srcBuffer Input primvar buffer.
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// const float pointer for read
|
|
||||||
///
|
|
||||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
||||||
///
|
|
||||||
/// @param dstBuffer Output primvar buffer
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// float pointer for write
|
|
||||||
///
|
|
||||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param stencilTable Far::StencilTable or equivalent
|
|
||||||
///
|
|
||||||
/// @param instance not used in the cpu kernel
|
|
||||||
/// (declared as a typed pointer to prevent
|
|
||||||
/// undesirable template resolution)
|
|
||||||
///
|
|
||||||
/// @param deviceContext not used in the cpu kernel
|
|
||||||
///
|
|
||||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
|
||||||
static bool EvalStencils(
|
|
||||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
|
||||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
|
||||||
STENCIL_TABLE const *stencilTable,
|
|
||||||
const IspcEvaluator *instance = NULL,
|
|
||||||
void * deviceContext = NULL) {
|
|
||||||
|
|
||||||
(void)instance; // unused
|
|
||||||
(void)deviceContext; // unused
|
|
||||||
|
|
||||||
if (stencilTable->GetNumStencils() == 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
|
||||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
|
||||||
&stencilTable->GetSizes()[0],
|
|
||||||
&stencilTable->GetOffsets()[0],
|
|
||||||
&stencilTable->GetControlIndices()[0],
|
|
||||||
&stencilTable->GetWeights()[0],
|
|
||||||
/*start = */ 0,
|
|
||||||
/*end = */ stencilTable->GetNumStencils());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Static eval stencils function which takes raw CPU pointers for
|
|
||||||
/// input and output.
|
|
||||||
///
|
|
||||||
/// @param src Input primvar pointer. An offset of srcDesc
|
|
||||||
/// will be applied internally (i.e. the pointer
|
|
||||||
/// should not include the offset)
|
|
||||||
///
|
|
||||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
||||||
///
|
|
||||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
|
||||||
/// will be applied internally.
|
|
||||||
///
|
|
||||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
|
||||||
/// to apply for the range [start, end)
|
|
||||||
///
|
|
||||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
|
||||||
///
|
|
||||||
/// @param indices pointer to the indices buffer of the stencil table
|
|
||||||
///
|
|
||||||
/// @param weights pointer to the weights buffer of the stencil table
|
|
||||||
///
|
|
||||||
/// @param start start index of stencil table
|
|
||||||
///
|
|
||||||
/// @param end end index of stencil table
|
|
||||||
///
|
|
||||||
static bool EvalStencils(
|
|
||||||
const float *src, BufferDescriptor const &srcDesc,
|
|
||||||
float *dst, BufferDescriptor const &dstDesc,
|
|
||||||
const int * sizes,
|
|
||||||
const int * offsets,
|
|
||||||
const int * indices,
|
|
||||||
const float * weights,
|
|
||||||
int start, int end);
|
|
||||||
|
|
||||||
/// \brief Generic static eval stencils function with derivatives.
|
|
||||||
/// This function has a same signature as other device kernels
|
|
||||||
/// have so that it can be called in the same way from OsdMesh
|
|
||||||
/// template interface.
|
|
||||||
///
|
|
||||||
/// @param srcBuffer Input primvar buffer.
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// const float pointer for read
|
|
||||||
///
|
|
||||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
||||||
///
|
|
||||||
/// @param dstBuffer Output primvar buffer
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// float pointer for write
|
|
||||||
///
|
|
||||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param duBuffer Output U-derivative buffer
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// float pointer for write
|
|
||||||
///
|
|
||||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param dvBuffer Output V-derivative buffer
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// float pointer for write
|
|
||||||
///
|
|
||||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param stencilTable Far::StencilTable or equivalent
|
|
||||||
///
|
|
||||||
/// @param instance not used in the cpu kernel
|
|
||||||
/// (declared as a typed pointer to prevent
|
|
||||||
/// undesirable template resolution)
|
|
||||||
///
|
|
||||||
/// @param deviceContext not used in the cpu kernel
|
|
||||||
///
|
|
||||||
template <typename SRC_BUFFER, typename DST_BUFFER, typename STENCIL_TABLE>
|
|
||||||
static bool EvalStencils(
|
|
||||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
|
||||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
|
||||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
|
||||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
|
||||||
STENCIL_TABLE const *stencilTable,
|
|
||||||
const IspcEvaluator *instance = NULL,
|
|
||||||
void * deviceContext = NULL) {
|
|
||||||
|
|
||||||
(void)instance; // unused
|
|
||||||
(void)deviceContext; // unused
|
|
||||||
|
|
||||||
return EvalStencils(srcBuffer->BindCpuBuffer(), srcDesc,
|
|
||||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
|
||||||
duBuffer->BindCpuBuffer(), duDesc,
|
|
||||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
|
||||||
&stencilTable->GetSizes()[0],
|
|
||||||
&stencilTable->GetOffsets()[0],
|
|
||||||
&stencilTable->GetControlIndices()[0],
|
|
||||||
&stencilTable->GetWeights()[0],
|
|
||||||
&stencilTable->GetDuWeights()[0],
|
|
||||||
&stencilTable->GetDvWeights()[0],
|
|
||||||
/*start = */ 0,
|
|
||||||
/*end = */ stencilTable->GetNumStencils());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Static eval stencils function with derivatives, which takes
|
|
||||||
/// raw CPU pointers for input and output.
|
|
||||||
///
|
|
||||||
/// @param src Input primvar pointer. An offset of srcDesc
|
|
||||||
/// will be applied internally (i.e. the pointer
|
|
||||||
/// should not include the offset)
|
|
||||||
///
|
|
||||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
||||||
///
|
|
||||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
|
||||||
/// will be applied internally.
|
|
||||||
///
|
|
||||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param du Output U-derivatives pointer. An offset of
|
|
||||||
/// duDesc will be applied internally.
|
|
||||||
///
|
|
||||||
/// @param duDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param dv Output V-derivatives pointer. An offset of
|
|
||||||
/// dvDesc will be applied internally.
|
|
||||||
///
|
|
||||||
/// @param dvDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param sizes pointer to the sizes buffer of the stencil table
|
|
||||||
///
|
|
||||||
/// @param offsets pointer to the offsets buffer of the stencil table
|
|
||||||
///
|
|
||||||
/// @param indices pointer to the indices buffer of the stencil table
|
|
||||||
///
|
|
||||||
/// @param weights pointer to the weights buffer of the stencil table
|
|
||||||
///
|
|
||||||
/// @param duWeights pointer to the du-weights buffer of the stencil table
|
|
||||||
///
|
|
||||||
/// @param dvWeights pointer to the dv-weights buffer of the stencil table
|
|
||||||
///
|
|
||||||
/// @param start start index of stencil table
|
|
||||||
///
|
|
||||||
/// @param end end index of stencil table
|
|
||||||
///
|
|
||||||
static bool EvalStencils(
|
|
||||||
const float *src, BufferDescriptor const &srcDesc,
|
|
||||||
float *dst, BufferDescriptor const &dstDesc,
|
|
||||||
float *du, BufferDescriptor const &duDesc,
|
|
||||||
float *dv, BufferDescriptor const &dvDesc,
|
|
||||||
const int * sizes,
|
|
||||||
const int * offsets,
|
|
||||||
const int * indices,
|
|
||||||
const float * weights,
|
|
||||||
const float * duWeights,
|
|
||||||
const float * dvWeights,
|
|
||||||
int start, int end);
|
|
||||||
|
|
||||||
/// ----------------------------------------------------------------------
|
|
||||||
///
|
|
||||||
/// Limit evaluations with PatchTable
|
|
||||||
///
|
|
||||||
/// ----------------------------------------------------------------------
|
|
||||||
|
|
||||||
/// \brief Generic limit eval function. This function has a same
|
|
||||||
/// signature as other device kernels have so that it can be called
|
|
||||||
/// in the same way.
|
|
||||||
///
|
|
||||||
/// @param srcBuffer Input primvar buffer.
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// const float pointer for read
|
|
||||||
///
|
|
||||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
||||||
///
|
|
||||||
/// @param dstBuffer Output primvar buffer
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// float pointer for write
|
|
||||||
///
|
|
||||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param numPatchCoords number of patchCoords.
|
|
||||||
///
|
|
||||||
/// @param patchCoords array of locations to be evaluated.
|
|
||||||
///
|
|
||||||
/// @param patchTable CpuPatchTable or equivalent
|
|
||||||
/// XXX: currently Far::PatchTable can't be used
|
|
||||||
/// due to interface mismatch
|
|
||||||
///
|
|
||||||
/// @param instance not used in the cpu evaluator
|
|
||||||
///
|
|
||||||
/// @param deviceContext not used in the cpu evaluator
|
|
||||||
///
|
|
||||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
|
||||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
|
||||||
static bool EvalPatches(
|
|
||||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
|
||||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
|
||||||
int numPatchCoords,
|
|
||||||
PATCHCOORD_BUFFER *patchCoords,
|
|
||||||
PATCH_TABLE *patchTable,
|
|
||||||
IspcEvaluator const *instance = NULL,
|
|
||||||
void * deviceContext = NULL) {
|
|
||||||
|
|
||||||
(void)instance; // unused
|
|
||||||
(void)deviceContext; // unused
|
|
||||||
|
|
||||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
|
||||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
|
||||||
numPatchCoords,
|
|
||||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
|
||||||
patchTable->GetPatchArrayBuffer(),
|
|
||||||
patchTable->GetPatchIndexBuffer(),
|
|
||||||
patchTable->GetPatchParamBuffer());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Generic limit eval function with derivatives. This function has
|
|
||||||
/// a same signature as other device kernels have so that it can be
|
|
||||||
/// called in the same way.
|
|
||||||
///
|
|
||||||
/// @param srcBuffer Input primvar buffer.
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// const float pointer for read
|
|
||||||
///
|
|
||||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
||||||
///
|
|
||||||
/// @param dstBuffer Output primvar buffer
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// float pointer for write
|
|
||||||
///
|
|
||||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param duBuffer Output U-derivatives buffer
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// float pointer for write
|
|
||||||
///
|
|
||||||
/// @param duDesc vertex buffer descriptor for the duBuffer
|
|
||||||
///
|
|
||||||
/// @param dvBuffer Output V-derivatives buffer
|
|
||||||
/// must have BindCpuBuffer() method returning a
|
|
||||||
/// float pointer for write
|
|
||||||
///
|
|
||||||
/// @param dvDesc vertex buffer descriptor for the dvBuffer
|
|
||||||
///
|
|
||||||
/// @param numPatchCoords number of patchCoords.
|
|
||||||
///
|
|
||||||
/// @param patchCoords array of locations to be evaluated.
|
|
||||||
///
|
|
||||||
/// @param patchTable CpuPatchTable or equivalent
|
|
||||||
/// XXX: currently Far::PatchTable can't be used
|
|
||||||
/// due to interface mismatch
|
|
||||||
///
|
|
||||||
/// @param instance not used in the cpu evaluator
|
|
||||||
///
|
|
||||||
/// @param deviceContext not used in the cpu evaluator
|
|
||||||
///
|
|
||||||
template <typename SRC_BUFFER, typename DST_BUFFER,
|
|
||||||
typename PATCHCOORD_BUFFER, typename PATCH_TABLE>
|
|
||||||
static bool EvalPatches(
|
|
||||||
SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc,
|
|
||||||
DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc,
|
|
||||||
DST_BUFFER *duBuffer, BufferDescriptor const &duDesc,
|
|
||||||
DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc,
|
|
||||||
int numPatchCoords,
|
|
||||||
PATCHCOORD_BUFFER *patchCoords,
|
|
||||||
PATCH_TABLE *patchTable,
|
|
||||||
IspcEvaluator const *instance = NULL,
|
|
||||||
void * deviceContext = NULL) {
|
|
||||||
(void)instance; // unused
|
|
||||||
(void)deviceContext; // unused
|
|
||||||
|
|
||||||
// XXX: PatchCoords is somewhat abusing vertex primvar buffer interop.
|
|
||||||
// ideally all buffer classes should have templated by datatype
|
|
||||||
// so that downcast isn't needed there.
|
|
||||||
// (e.g. Osd::CpuBuffer<PatchCoord> )
|
|
||||||
//
|
|
||||||
return EvalPatches(srcBuffer->BindCpuBuffer(), srcDesc,
|
|
||||||
dstBuffer->BindCpuBuffer(), dstDesc,
|
|
||||||
duBuffer->BindCpuBuffer(), duDesc,
|
|
||||||
dvBuffer->BindCpuBuffer(), dvDesc,
|
|
||||||
numPatchCoords,
|
|
||||||
(const PatchCoord*)patchCoords->BindCpuBuffer(),
|
|
||||||
patchTable->GetPatchArrayBuffer(),
|
|
||||||
patchTable->GetPatchIndexBuffer(),
|
|
||||||
patchTable->GetPatchParamBuffer());
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
|
||||||
/// and evaluate limit values on given PatchTable.
|
|
||||||
///
|
|
||||||
/// @param src Input primvar pointer. An offset of srcDesc
|
|
||||||
/// will be applied internally (i.e. the pointer
|
|
||||||
/// should not include the offset)
|
|
||||||
///
|
|
||||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
||||||
///
|
|
||||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
|
||||||
/// will be applied internally.
|
|
||||||
///
|
|
||||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param numPatchCoords number of patchCoords.
|
|
||||||
///
|
|
||||||
/// @param patchCoords array of locations to be evaluated.
|
|
||||||
///
|
|
||||||
/// @param patchArrays an array of Osd::PatchArray struct
|
|
||||||
/// indexed by PatchCoord::arrayIndex
|
|
||||||
///
|
|
||||||
/// @param patchIndexBuffer an array of patch indices
|
|
||||||
/// indexed by PatchCoord::vertIndex
|
|
||||||
///
|
|
||||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
|
||||||
/// indexed by PatchCoord::patchIndex
|
|
||||||
///
|
|
||||||
static bool EvalPatches(
|
|
||||||
const float *src, BufferDescriptor const &srcDesc,
|
|
||||||
float *dst, BufferDescriptor const &dstDesc,
|
|
||||||
int numPatchCoords,
|
|
||||||
const PatchCoord *patchCoords,
|
|
||||||
const PatchArray *patchArrays,
|
|
||||||
const int *patchIndexBuffer,
|
|
||||||
const PatchParam *patchParamBuffer);
|
|
||||||
|
|
||||||
/// \brief Static limit eval function. It takes an array of PatchCoord
|
|
||||||
/// and evaluate limit values on given PatchTable.
|
|
||||||
///
|
|
||||||
/// @param src Input primvar pointer. An offset of srcDesc
|
|
||||||
/// will be applied internally (i.e. the pointer
|
|
||||||
/// should not include the offset)
|
|
||||||
///
|
|
||||||
/// @param srcDesc vertex buffer descriptor for the input buffer
|
|
||||||
///
|
|
||||||
/// @param dst Output primvar pointer. An offset of dstDesc
|
|
||||||
/// will be applied internally.
|
|
||||||
///
|
|
||||||
/// @param dstDesc vertex buffer descriptor for the output buffer
|
|
||||||
///
|
|
||||||
/// @param du Output U-derivatives pointer. An offset of
|
|
||||||
/// duDesc will be applied internally.
|
|
||||||
///
|
|
||||||
/// @param duDesc vertex buffer descriptor for the du buffer
|
|
||||||
///
|
|
||||||
/// @param dv Output V-derivatives pointer. An offset of
|
|
||||||
/// dvDesc will be applied internally.
|
|
||||||
///
|
|
||||||
/// @param dvDesc vertex buffer descriptor for the dv buffer
|
|
||||||
///
|
|
||||||
/// @param numPatchCoords number of patchCoords.
|
|
||||||
///
|
|
||||||
/// @param patchCoords array of locations to be evaluated.
|
|
||||||
///
|
|
||||||
/// @param patchArrays an array of Osd::PatchArray struct
|
|
||||||
/// indexed by PatchCoord::arrayIndex
|
|
||||||
///
|
|
||||||
/// @param patchIndexBuffer an array of patch indices
|
|
||||||
/// indexed by PatchCoord::vertIndex
|
|
||||||
///
|
|
||||||
/// @param patchParamBuffer an array of Osd::PatchParam struct
|
|
||||||
/// indexed by PatchCoord::patchIndex
|
|
||||||
///
|
|
||||||
static bool EvalPatches(
|
|
||||||
const float *src, BufferDescriptor const &srcDesc,
|
|
||||||
float *dst, BufferDescriptor const &dstDesc,
|
|
||||||
float *du, BufferDescriptor const &duDesc,
|
|
||||||
float *dv, BufferDescriptor const &dvDesc,
|
|
||||||
int numPatchCoords,
|
|
||||||
PatchCoord const *patchCoords,
|
|
||||||
PatchArray const *patchArrays,
|
|
||||||
const int *patchIndexBuffer,
|
|
||||||
PatchParam const *patchParamBuffer);
|
|
||||||
|
|
||||||
/// ----------------------------------------------------------------------
|
|
||||||
///
|
|
||||||
/// Other methods
|
|
||||||
///
|
|
||||||
/// ----------------------------------------------------------------------
|
|
||||||
|
|
||||||
/// \brief synchronize all asynchronous computation invoked on this device.
|
|
||||||
static void Synchronize(void * /*deviceContext = NULL*/) {
|
|
||||||
// nothing.
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
} // end namespace Osd
|
|
||||||
|
|
||||||
} // end namespace OPENSUBDIV_VERSION
|
|
||||||
using namespace OPENSUBDIV_VERSION;
|
|
||||||
|
|
||||||
} // end namespace OpenSubdiv
|
|
||||||
|
|
||||||
|
|
||||||
#endif // OPENSUBDIV3_OSD_CPU_EVALUATOR_H
|
|
Loading…
Reference in New Issue
Block a user