diff --git a/examples/common/CMakeLists.txt b/examples/common/CMakeLists.txt index 9c3c5c12..492b9147 100644 --- a/examples/common/CMakeLists.txt +++ b/examples/common/CMakeLists.txt @@ -25,6 +25,8 @@ set(EXAMPLES_COMMON_SHADER_FILES framebuffer.glsl ssao.glsl + glslPtexCommon.glsl + hlslPtexCommon.hlsl ) set(EXAMPLES_COMMON_SOURCE_FILES @@ -107,6 +109,36 @@ if(CUDA_FOUND) ) endif() +#------------------------------------------------------------------------------- +if( PTEX_FOUND ) + list(APPEND EXAMPLES_COMMON_HEADER_FILES + ptexMipmapTextureLoader.h + ) + list(APPEND EXAMPLES_COMMON_SOURCE_FILES + ptexMipmapTextureLoader.cpp + ) + if( OPENGL_FOUND ) + list(APPEND EXAMPLES_COMMON_SOURCE_FILES + glPtexMipmapTexture.cpp + ) + list(APPEND EXAMPLES_COMMON_HEADER_FILES + glPtexMipmapTexture.h + ) + endif() + if( DXSDK_FOUND ) + list(APPEND EXAMPLES_COMMON_SOURCE_FILES + d3d11PtexMipmapTexture.cpp + ) + list(APPEND EXAMPLES_COMMON_HEADER_FILES + d3d11PtexMipmapTexture.h + ) + endif() + include_directories( "${PTEX_INCLUDE_DIR}" ) + list(APPEND PLATFORM_CPU_LIBRARIES + ${PTEX_LIBRARY} + ) +endif() +#------------------------------------------------------------------------------- include_directories( "${PROJECT_SOURCE_DIR}/opensubdiv" diff --git a/opensubdiv/osd/d3d11PtexMipmapTexture.cpp b/examples/common/d3d11PtexMipmapTexture.cpp similarity index 93% rename from opensubdiv/osd/d3d11PtexMipmapTexture.cpp rename to examples/common/d3d11PtexMipmapTexture.cpp index d6d14ec1..932b8450 100644 --- a/opensubdiv/osd/d3d11PtexMipmapTexture.cpp +++ b/examples/common/d3d11PtexMipmapTexture.cpp @@ -22,19 +22,14 @@ // language governing permissions and limitations under the Apache License. // -#include "../osd/d3d11PtexMipmapTexture.h" -#include "../osd/ptexMipmapTextureLoader.h" -#include "../far/error.h" +#include "d3d11PtexMipmapTexture.h" +#include "ptexMipmapTextureLoader.h" +#include // XXX: to be replaced #include #include #include -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - D3D11PtexMipmapTexture::D3D11PtexMipmapTexture() : _width(0), _height(0), _depth(0), _layout(0), _texels(0), @@ -51,6 +46,16 @@ D3D11PtexMipmapTexture::~D3D11PtexMipmapTexture() if (_texelsSRV) _texelsSRV->Release(); } +/*static*/ +const char * +D3D11PtexMipmapTexture::GetShaderSource() +{ + static const char *ptexShaderSource = +#include "hlslPtexCommon.gen.h" + ; + return ptexShaderSource; +} + static ID3D11Buffer * genTextureBuffer(ID3D11DeviceContext *deviceContext, int size, void const * data) { @@ -68,7 +73,7 @@ genTextureBuffer(ID3D11DeviceContext *deviceContext, int size, void const * data deviceContext->GetDevice(&device); hr = device->CreateBuffer(&hBufferDesc, NULL, &buffer); if (FAILED(hr)) { - Far::Error(Far::FAR_RUNTIME_ERROR, + OpenSubdiv::Far::Error(OpenSubdiv::Far::FAR_RUNTIME_ERROR, "Fail in CreateBuffer\n"); return 0; } @@ -77,7 +82,7 @@ genTextureBuffer(ID3D11DeviceContext *deviceContext, int size, void const * data hr = deviceContext->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &resource); if (FAILED(hr)) { - Far::Error(Far::FAR_RUNTIME_ERROR, + OpenSubdiv::Far::Error(OpenSubdiv::Far::FAR_RUNTIME_ERROR, "Fail in Map buffer\n"); buffer->Release(); return 0; @@ -98,7 +103,7 @@ D3D11PtexMipmapTexture::Create(ID3D11DeviceContext *deviceContext, int maxNumPages = D3D10_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION; // Read the ptex data and pack the texels - Osd::PtexMipmapTextureLoader loader(reader, maxNumPages, maxLevels); + PtexMipmapTextureLoader loader(reader, maxNumPages, maxLevels); int numFaces = loader.GetNumFaces(); @@ -216,8 +221,3 @@ D3D11PtexMipmapTexture::Create(ID3D11DeviceContext *deviceContext, return result; } - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/d3d11PtexMipmapTexture.h b/examples/common/d3d11PtexMipmapTexture.h similarity index 83% rename from opensubdiv/osd/d3d11PtexMipmapTexture.h rename to examples/common/d3d11PtexMipmapTexture.h index 1ec12412..34e4f902 100644 --- a/opensubdiv/osd/d3d11PtexMipmapTexture.h +++ b/examples/common/d3d11PtexMipmapTexture.h @@ -22,12 +22,10 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_D3D11_PTEX_MIPMAP_TEXTURE_H -#define OSD_D3D11_PTEX_MIPMAP_TEXTURE_H +#ifndef OPENSUBDIV_EXAMPLES_D3D11_PTEX_MIPMAP_TEXTURE_H +#define OPENSUBDIV_EXAMPLES_D3D11_PTEX_MIPMAP_TEXTURE_H -#include "../version.h" - -#include "../osd/nonCopyable.h" +#include class PtexTexture; struct ID3D11Buffer; @@ -35,17 +33,15 @@ struct ID3D11Texture2D; struct ID3D11DeviceContext; struct ID3D11ShaderResourceView; -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -class D3D11PtexMipmapTexture : NonCopyable { +class D3D11PtexMipmapTexture : OpenSubdiv::Osd::NonCopyable { public: static D3D11PtexMipmapTexture * Create(ID3D11DeviceContext *deviceContext, PtexTexture * reader, int maxLevels=10); + /// Returns GLSL shader snippet to fetch ptex + static const char *GetShaderSource(); + /// Returns the texture buffer containing the layout of the ptex faces /// in the texels texture array. ID3D11Buffer *GetLayoutTextureBuffer() const { return _layout; } @@ -74,11 +70,4 @@ private: ID3D11ShaderResourceView *_texelsSRV; }; -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_D3D11_PTEX_TEXTURE_H +#endif // OPENSUBDIV_EXAMPLES_D3D11_PTEX_TEXTURE_H diff --git a/opensubdiv/osd/glPtexMipmapTexture.cpp b/examples/common/glPtexMipmapTexture.cpp similarity index 94% rename from opensubdiv/osd/glPtexMipmapTexture.cpp rename to examples/common/glPtexMipmapTexture.cpp index 9d752c6a..34eb13c9 100644 --- a/opensubdiv/osd/glPtexMipmapTexture.cpp +++ b/examples/common/glPtexMipmapTexture.cpp @@ -22,18 +22,13 @@ // language governing permissions and limitations under the Apache License. // -#include "../osd/glPtexMipmapTexture.h" -#include "../osd/ptexMipmapTextureLoader.h" +#include "glPtexMipmapTexture.h" +#include "ptexMipmapTextureLoader.h" -#include "../osd/opengl.h" +#include #include -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - GLPtexMipmapTexture::GLPtexMipmapTexture() : _width(0), _height(0), _depth(0), _layout(0), _texels(0), _memoryUsage(0) { @@ -48,6 +43,16 @@ GLPtexMipmapTexture::~GLPtexMipmapTexture() glDeleteTextures(1, &_texels); } +/*static*/ +const char * +GLPtexMipmapTexture::GetShaderSource() +{ + static const char *ptexShaderSource = +#include "glslPtexCommon.gen.h" + ; + return ptexShaderSource; +} + static GLuint genTextureBuffer(GLenum format, GLsizeiptr size, GLvoid const * data) { @@ -154,8 +159,3 @@ GLPtexMipmapTexture::Create(PtexTexture * reader, return result; } - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glPtexMipmapTexture.h b/examples/common/glPtexMipmapTexture.h similarity index 82% rename from opensubdiv/osd/glPtexMipmapTexture.h rename to examples/common/glPtexMipmapTexture.h index 83523276..165a38ea 100644 --- a/opensubdiv/osd/glPtexMipmapTexture.h +++ b/examples/common/glPtexMipmapTexture.h @@ -22,29 +22,25 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_GL_PTEX_MIPMAP_TEXTURE_H -#define OSD_GL_PTEX_MIPMAP_TEXTURE_H +#ifndef OPENSUBDIV_EXAMPLES_GL_PTEX_MIPMAP_TEXTURE_H +#define OPENSUBDIV_EXAMPLES_GL_PTEX_MIPMAP_TEXTURE_H -#include "../version.h" - -#include "../osd/nonCopyable.h" -#include "../osd/opengl.h" +#include +#include #include class PtexTexture; -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -class GLPtexMipmapTexture : NonCopyable { +class GLPtexMipmapTexture : OpenSubdiv::Osd::NonCopyable { public: static GLPtexMipmapTexture * Create(PtexTexture * reader, int maxLevels=-1, size_t targetMemory=0); + /// Returns GLSL shader snippet to fetch ptex + static const char *GetShaderSource(); + /// Returns the texture buffer containing the layout of the ptex faces /// in the texels texture array. GLuint GetLayoutTextureBuffer() const { return _layout; } @@ -72,11 +68,4 @@ private: size_t _memoryUsage; // total amount of memory used (estimate) }; -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_GL_PTEX_MIPMAP_TEXTURE_H +#endif // OPENSUBDIV_EXAMPLES_GL_PTEX_MIPMAP_TEXTURE_H diff --git a/opensubdiv/osd/glslPtexCommon.glsl b/examples/common/glslPtexCommon.glsl similarity index 100% rename from opensubdiv/osd/glslPtexCommon.glsl rename to examples/common/glslPtexCommon.glsl diff --git a/opensubdiv/osd/hlslPtexCommon.hlsl b/examples/common/hlslPtexCommon.hlsl similarity index 100% rename from opensubdiv/osd/hlslPtexCommon.hlsl rename to examples/common/hlslPtexCommon.hlsl diff --git a/examples/common/patchColors.cpp b/examples/common/patchColors.cpp index 941a3b75..eef65dd8 100644 --- a/examples/common/patchColors.cpp +++ b/examples/common/patchColors.cpp @@ -40,9 +40,3 @@ getAdaptivePatchColor(Descriptor const & desc) { return _colors[(int)(desc.GetType()-Descriptor::REGULAR)]; } -float const * -getAdaptivePatchColor(OpenSubdiv::Osd::DrawContext::PatchDescriptor const & desc) { - - return _colors[(int)(desc.GetType()-Descriptor::REGULAR)]; -} - diff --git a/examples/common/patchColors.h b/examples/common/patchColors.h index bad5d740..7a16c3cd 100644 --- a/examples/common/patchColors.h +++ b/examples/common/patchColors.h @@ -30,8 +30,6 @@ #include // returns a unique color for each type of feature-adaptive patches -float const * getAdaptivePatchColor(OpenSubdiv::Osd::DrawContext::PatchDescriptor const & desc); - float const * getAdaptivePatchColor(OpenSubdiv::Far::PatchDescriptor const & desc); diff --git a/opensubdiv/osd/ptexMipmapTextureLoader.cpp b/examples/common/ptexMipmapTextureLoader.cpp similarity index 99% rename from opensubdiv/osd/ptexMipmapTextureLoader.cpp rename to examples/common/ptexMipmapTextureLoader.cpp index 503ceb5a..0d785766 100644 --- a/opensubdiv/osd/ptexMipmapTextureLoader.cpp +++ b/examples/common/ptexMipmapTextureLoader.cpp @@ -22,7 +22,7 @@ // language governing permissions and limitations under the Apache License. // -#include "../osd/ptexMipmapTextureLoader.h" +#include "ptexMipmapTextureLoader.h" #include #include @@ -32,11 +32,6 @@ #include #include -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - // sample neighbor pixels and populate around blocks void PtexMipmapTextureLoader::Block::guttering(PtexMipmapTextureLoader *loader, @@ -997,9 +992,3 @@ PtexMipmapTextureLoader::generateBuffers() fclose(fp); #endif } - - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/ptexMipmapTextureLoader.h b/examples/common/ptexMipmapTextureLoader.h similarity index 92% rename from opensubdiv/osd/ptexMipmapTextureLoader.h rename to examples/common/ptexMipmapTextureLoader.h index 785243fd..11200d7d 100644 --- a/opensubdiv/osd/ptexMipmapTextureLoader.h +++ b/examples/common/ptexMipmapTextureLoader.h @@ -22,10 +22,8 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_PTEX_MIPMAP_TEXTURE_LOADER_H -#define OSD_PTEX_MIPMAP_TEXTURE_LOADER_H - -#include "../version.h" +#ifndef OPENSUBDIV_EXAMPLES_PTEX_MIPMAP_TEXTURE_LOADER_H +#define OPENSUBDIV_EXAMPLES_PTEX_MIPMAP_TEXTURE_LOADER_H #include #include @@ -33,11 +31,6 @@ class PtexTexture; -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - class PtexMipmapTextureLoader { public: PtexMipmapTextureLoader(PtexTexture *ptex, @@ -158,12 +151,4 @@ private: size_t _memoryUsage; }; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_PTEX_MIPMAP_TEXTURE_LOADER_H +#endif // OPENSUBDIV_EXAMPLES_PTEX_MIPMAP_TEXTURE_LOADER_H diff --git a/examples/dxPtexViewer/dxPtexViewer.cpp b/examples/dxPtexViewer/dxPtexViewer.cpp index 08d85220..8fe04cd6 100644 --- a/examples/dxPtexViewer/dxPtexViewer.cpp +++ b/examples/dxPtexViewer/dxPtexViewer.cpp @@ -25,48 +25,33 @@ #include #include -#include #include #include -#include #include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL; +#include #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController * g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL #include - #include - #include - + #include #include "../common/clDeviceContext.h" - CLD3D11DeviceContext g_clDeviceContext; - OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_CUDA #include - #include - #include - + #include #include "../common/cudaDeviceContext.h" - CudaDeviceContext g_cudaDeviceContext; - OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL; #endif #include -#include -#include -OpenSubdiv::Osd::D3D11ComputeController * g_d3d11ComputeController = NULL; +#include #include OpenSubdiv::Osd::D3D11MeshInterface *g_mesh; @@ -78,6 +63,7 @@ OpenSubdiv::Osd::D3D11MeshInterface *g_mesh; #include "../common/stopwatch.h" #include "../common/simple_math.h" #include "../common/d3d11_hud.h" +#include "../common/d3d11PtexMipmapTexture.h" static const char *g_shaderSource = #include "shader.gen.h" @@ -214,10 +200,10 @@ float g_animTime = 0; std::vector g_positions, g_normals; -OpenSubdiv::Osd::D3D11PtexMipmapTexture * g_osdPTexImage = 0; -OpenSubdiv::Osd::D3D11PtexMipmapTexture * g_osdPTexDisplacement = 0; -OpenSubdiv::Osd::D3D11PtexMipmapTexture * g_osdPTexOcclusion = 0; -OpenSubdiv::Osd::D3D11PtexMipmapTexture * g_osdPTexSpecular = 0; +D3D11PtexMipmapTexture * g_osdPTexImage = 0; +D3D11PtexMipmapTexture * g_osdPTexDisplacement = 0; +D3D11PtexMipmapTexture * g_osdPTexOcclusion = 0; +D3D11PtexMipmapTexture * g_osdPTexSpecular = 0; const char * g_ptexColorFilename; ID3D11Device * g_pd3dDevice = NULL; @@ -427,14 +413,28 @@ union Effect { } }; -typedef std::pair EffectDesc; +struct EffectDesc { + EffectDesc(OpenSubdiv::Far::PatchDescriptor desc, + Effect effect) : desc(desc), effect(effect), + maxValence(0), numElements(0) { } + OpenSubdiv::Far::PatchDescriptor desc; + Effect effect; + int maxValence; + int numElements; + + bool operator < (const EffectDesc &e) const { + return desc < e.desc || (desc == e.desc && + (maxValence < e.maxValence || ((maxValence == e.maxValence) && + (effect < e.effect)))); + } +}; class EffectDrawRegistry : public OpenSubdiv::Osd::D3D11DrawRegistry { protected: virtual ConfigType * - _CreateDrawConfig(DescType const & desc, + _CreateDrawConfig(EffectDesc const & desc, SourceConfigType const * sconfig, ID3D11Device * pd3dDevice, ID3D11InputLayout ** ppInputLayout, @@ -442,20 +442,33 @@ protected: int numInputElements); virtual SourceConfigType * - _CreateDrawSourceConfig(DescType const & desc, ID3D11Device * pd3dDevice); + _CreateDrawSourceConfig(EffectDesc const & desc, ID3D11Device * pd3dDevice); }; EffectDrawRegistry::SourceConfigType * -EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc, ID3D11Device *pd3dDevice) +EffectDrawRegistry::_CreateDrawSourceConfig(EffectDesc const &effectDesc, ID3D11Device *pd3dDevice) { - Effect effect = desc.second; - - SetPtexEnabled(true); + Effect effect = effectDesc.effect; SourceConfigType * sconfig = - BaseRegistry::_CreateDrawSourceConfig(desc.first, pd3dDevice); + BaseRegistry::_CreateDrawSourceConfig(effectDesc.desc, pd3dDevice); assert(sconfig); + // legacy gregory patch requires OSD_MAX_VALENCE and OSD_NUM_ELEMENTS defined + if (effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::GREGORY or + effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::GREGORY_BOUNDARY) { + std::ostringstream ss; + ss << effectDesc.maxValence; + sconfig->commonShader.AddDefine("OSD_MAX_VALENCE", ss.str()); + ss.str(""); + + ss << effectDesc.numElements; + sconfig->commonShader.AddDefine("OSD_NUM_ELEMENTS", ss.str()); + } + + // add ptex functions + sconfig->commonShader.source += D3D11PtexMipmapTexture::GetShaderSource(); + if (effect.patchCull) sconfig->commonShader.AddDefine("OSD_ENABLE_PATCH_CULL"); if (effect.screenSpaceTess) @@ -464,8 +477,8 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc, ID3D11Device sconfig->commonShader.AddDefine("OSD_FRACTIONAL_ODD_SPACING"); bool quad = true; - if (desc.first.GetType() == OpenSubdiv::Far::PatchDescriptor::QUADS || - desc.first.GetType() == OpenSubdiv::Far::PatchDescriptor::TRIANGLES) { + if (effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::QUADS || + effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::TRIANGLES) { sconfig->vertexShader.source = g_shaderSource; sconfig->vertexShader.target = "vs_5_0"; sconfig->vertexShader.entry = "vs_main"; @@ -583,14 +596,14 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc, ID3D11Device EffectDrawRegistry::ConfigType * EffectDrawRegistry::_CreateDrawConfig( - DescType const & desc, + EffectDesc const & effectDesc, SourceConfigType const * sconfig, ID3D11Device * pd3dDevice, ID3D11InputLayout ** ppInputLayout, D3D11_INPUT_ELEMENT_DESC const * pInputElementDescs, int numInputElements) { - ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.first, sconfig, + ConfigType * config = BaseRegistry::_CreateDrawConfig(effectDesc.desc, sconfig, pd3dDevice, ppInputLayout, pInputElementDescs, numInputElements); assert(config); @@ -600,7 +613,7 @@ EffectDrawRegistry::_CreateDrawConfig( EffectDrawRegistry effectRegistry; //------------------------------------------------------------------------------ -OpenSubdiv::Osd::D3D11PtexMipmapTexture * +D3D11PtexMipmapTexture * createPtex(const char *filename) { Ptex::String ptexError; @@ -620,9 +633,8 @@ createPtex(const char *filename) { printf("Error in reading %s\n", filename); exit(1); } - OpenSubdiv::Osd::D3D11PtexMipmapTexture *osdPtex = - OpenSubdiv::Osd::D3D11PtexMipmapTexture::Create(g_pd3dDeviceContext, - ptex, g_maxMipmapLevels); + D3D11PtexMipmapTexture *osdPtex = D3D11PtexMipmapTexture::Create( + g_pd3dDeviceContext, ptex, g_maxMipmapLevels); ptex->release(); @@ -636,6 +648,8 @@ createPtex(const char *filename) { //------------------------------------------------------------------------------ void createOsdMesh(int level, int kernel) { + + using namespace OpenSubdiv; Ptex::String ptexError; PtexTexture *ptexColor = PtexTexture::open(g_ptexColorFilename, ptexError, true); if (ptexColor == NULL) { @@ -685,86 +699,87 @@ createOsdMesh(int level, int kernel) { OpenSubdiv::Osd::MeshBitset bits; bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive); - bits.set(OpenSubdiv::Osd::MeshPtexData, true); // gregory basis hasn't supported yet in D3D11Mesh bits.set(OpenSubdiv::Osd::MeshEndCapLegacyGregory, true); int numVertexElements = 6; //g_adaptive ? 3 : 6; int numVaryingElements = 0; - if (kernel == kCPU) { - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + if (g_kernel == kCPU) { + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); + #ifdef OPENSUBDIV_HAS_OPENMP } else if (kernel == kOPENMP) { - if (not g_ompComputeController) { - g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_ompComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); +#endif +#ifdef OPENSUBDIV_HAS_TBB + } else if (kernel == kTBB) { + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); #endif #ifdef OPENSUBDIV_HAS_OPENCL - } else if (kernel == kCL) { - if (not g_clComputeController) { - g_clComputeController = new OpenSubdiv::Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_clComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, &g_clDeviceContext); + } else if(kernel == kCL) { + static Osd::EvaluatorCacheT clEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &clEvaluatorCache, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_CUDA - } else if (kernel == kCUDA) { - if (not g_cudaComputeController) { - g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cudaComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + } else if (g_kernel == kCUDA) { + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); #endif } else if (g_kernel == kDirectCompute) { - if (not g_d3d11ComputeController) { - g_d3d11ComputeController = new OpenSubdiv::Osd::D3D11ComputeController(g_pd3dDeviceContext); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_d3d11ComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + static Osd::EvaluatorCacheT d3d11ComputeEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &d3d11ComputeEvaluatorCache, + g_pd3dDeviceContext); } else { printf("Unsupported kernel %s\n", getKernelName(kernel)); } @@ -778,6 +793,17 @@ bindProgram(Effect effect, OpenSubdiv::Osd::DrawContext::PatchArray const & patc EffectDesc effectDesc(patch.GetDescriptor(), effect); + // only legacy gregory needs maxValence and numElements + int maxValence = g_mesh->GetDrawContext()->GetMaxValence(); + int numElements = 6; + + typedef OpenSubdiv::Far::PatchDescriptor Descriptor; + if (patch.GetDescriptor().GetType() == Descriptor::GREGORY or + patch.GetDescriptor().GetType() == Descriptor::GREGORY_BOUNDARY) { + effectDesc.maxValence = maxValence; + effectDesc.numElements = numElements; + } + // input layout const D3D11_INPUT_ELEMENT_DESC hInElementDesc[] = { { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }, @@ -919,10 +945,13 @@ bindProgram(Effect effect, OpenSubdiv::Osd::DrawContext::PatchArray const & patc if (g_mesh->GetDrawContext()->quadOffsetBufferSRV) { g_pd3dDeviceContext->HSSetShaderResources(2, 1, &g_mesh->GetDrawContext()->quadOffsetBufferSRV); } - if (g_mesh->GetDrawContext()->ptexCoordinateBufferSRV) { - g_pd3dDeviceContext->HSSetShaderResources(3, 1, &g_mesh->GetDrawContext()->ptexCoordinateBufferSRV); - g_pd3dDeviceContext->DSSetShaderResources(3, 1, &g_mesh->GetDrawContext()->ptexCoordinateBufferSRV); - g_pd3dDeviceContext->GSSetShaderResources(3, 1, &g_mesh->GetDrawContext()->ptexCoordinateBufferSRV); + if (g_mesh->GetDrawContext()->patchParamBufferSRV) { + g_pd3dDeviceContext->HSSetShaderResources( + 3, 1, &g_mesh->GetDrawContext()->patchParamBufferSRV); + g_pd3dDeviceContext->DSSetShaderResources( + 3, 1, &g_mesh->GetDrawContext()->patchParamBufferSRV); + g_pd3dDeviceContext->GSSetShaderResources( + 3, 1, &g_mesh->GetDrawContext()->patchParamBufferSRV); } g_pd3dDeviceContext->PSSetShaderResources(4, 1, g_osdPTexImage->GetTexelsSRV()); @@ -1123,22 +1152,6 @@ quit() { SAFE_RELEASE(g_pd3dDeviceContext); SAFE_RELEASE(g_pd3dDevice); - delete g_cpuComputeController; - -#ifdef OPENSUBDIV_HAS_OPENMP - delete g_ompComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_OPENCL - delete g_clComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_CUDA - delete g_cudaComputeController; -#endif - - delete g_d3d11ComputeController; - PostQuitMessage(0); exit(0); } diff --git a/examples/dxPtexViewer/shader.hlsl b/examples/dxPtexViewer/shader.hlsl index e0c19191..a4d601dc 100644 --- a/examples/dxPtexViewer/shader.hlsl +++ b/examples/dxPtexViewer/shader.hlsl @@ -114,19 +114,10 @@ float4 displacement(float4 position, float3 normal, float4 patchCoord) } #endif -#line 20117 -float4 GeneratePatchCoord(float2 localUV, int primitiveID) // for non-adpative +float4 GeneratePatchCoord(float2 uv, int primitiveID) // for non-adaptive { - int2 ptexIndex = OsdPatchParamBuffer[GetPrimitiveID(primitiveID)].xy; - - int faceID = ptexIndex.x; - int lv = 1 << ((ptexIndex.y & 0xf) - ((ptexIndex.y >> 4) & 1)); - int u = (ptexIndex.y >> 17) & 0x3ff; - int v = (ptexIndex.y >> 7) & 0x3ff; - float2 uv = localUV; - uv = (uv * float2(1, 1)/lv) + float2(u, v)/lv; - - return float4(uv.x, uv.y, lv+0.5, faceID+0.5); + int3 patchParam = OsdGetPatchParam(OsdGetPatchIndex(primitiveID)); + return OsdInterpolatePatchCoord(uv, OsdGetPatchCoord(patchParam)); } // --------------------------------------------------------------------------- @@ -139,6 +130,11 @@ void vs_main( in InputVertex input, output.positionOut = mul(ModelViewProjectionMatrix, input.position); output.position = mul(ModelViewMatrix, input.position); output.normal = mul(ModelViewMatrix,float4(input.normal, 0)).xyz; + + output.patchCoord = float4(0,0,0,0); + output.tangent = float3(0,0,0); + output.bitangent = float3(0,0,0); + output.edgeDistance = float4(0,0,0,0); } // --------------------------------------------------------------------------- diff --git a/examples/dxViewer/dxviewer.cpp b/examples/dxViewer/dxviewer.cpp index 0fde7fc9..34f9a78f 100644 --- a/examples/dxViewer/dxviewer.cpp +++ b/examples/dxViewer/dxviewer.cpp @@ -25,52 +25,37 @@ #include #include -#include #include #include #include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL; +#include #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController * g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL #include - #include - #include - + #include #include "../common/clDeviceContext.h" - CLD3D11DeviceContext g_clDeviceContext; - OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_CUDA #include - #include - #include - + #include #include "../common/cudaDeviceContext.h" - CudaDeviceContext g_cudaDeviceContext; - OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL; #endif #include -#include -#include -OpenSubdiv::Osd::D3D11ComputeController * g_d3d11ComputeController = NULL; +#include #include OpenSubdiv::Osd::D3D11MeshInterface *g_mesh; @@ -273,17 +258,18 @@ getKernelName(int kernel) { static void createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=kCatmark) { - typedef OpenSubdiv::Far::ConstIndexArray IndexArray; + using namespace OpenSubdiv; + typedef Far::ConstIndexArray IndexArray; Shape * shape = Shape::parseObj(shapeDesc.data.c_str(), shapeDesc.scheme); // create Vtr mesh (topology) - OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape); - OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape); + Sdc::SchemeType sdctype = GetSdcType(*shape); + Sdc::Options sdcoptions = GetSdcOptions(*shape); - OpenSubdiv::Far::TopologyRefiner * refiner = - OpenSubdiv::Far::TopologyRefinerFactory::Create(*shape, - OpenSubdiv::Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); + Far::TopologyRefiner * refiner = + Far::TopologyRefinerFactory::Create(*shape, + Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); // save coarse topology (used for coarse mesh drawing) int nedges = refiner->GetNumEdges(0), @@ -317,104 +303,90 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme= bool doAdaptive = (g_adaptive!=0 and g_scheme==kCatmark), doSingleCreasePatch = (g_singleCreasePatch!=0 and g_scheme==kCatmark); - OpenSubdiv::Osd::MeshBitset bits; - bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive); - bits.set(OpenSubdiv::Osd::MeshUseSingleCreasePatch, doSingleCreasePatch); + Osd::MeshBitset bits; + bits.set(Osd::MeshAdaptive, doAdaptive); + bits.set(Osd::MeshUseSingleCreasePatch, doSingleCreasePatch); // gregory basis hasn't supported yet in D3D11Mesh - bits.set(OpenSubdiv::Osd::MeshEndCapLegacyGregory, true); + bits.set(Osd::MeshEndCapLegacyGregory, true); int numVertexElements = 6; int numVaryingElements = 0; if (g_kernel == kCPU) { - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); + #ifdef OPENSUBDIV_HAS_OPENMP } else if (kernel == kOPENMP) { - if (not g_ompComputeController) { - g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_ompComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); #endif #ifdef OPENSUBDIV_HAS_TBB } else if (kernel == kTBB) { - if (not g_tbbComputeController) { - g_tbbComputeController = new OpenSubdiv::Osd::TbbComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_tbbComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); #endif #ifdef OPENSUBDIV_HAS_OPENCL } else if(kernel == kCL) { - if (not g_clComputeController) { - g_clComputeController = new OpenSubdiv::Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_clComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, - &g_clDeviceContext); + static Osd::EvaluatorCacheT clEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &clEvaluatorCache, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_CUDA } else if (g_kernel == kCUDA) { - if (not g_cudaComputeController) { - g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cudaComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, NULL, g_pd3dDeviceContext); #endif } else if (g_kernel == kDirectCompute) { - if (not g_d3d11ComputeController) { - g_d3d11ComputeController = new OpenSubdiv::Osd::D3D11ComputeController(g_pd3dDeviceContext); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_d3d11ComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, g_pd3dDeviceContext); + static Osd::EvaluatorCacheT d3d11ComputeEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &d3d11ComputeEvaluatorCache, + g_pd3dDeviceContext); } else { printf("Unsupported kernel %s\n", getKernelName(kernel)); } @@ -472,14 +444,28 @@ union Effect { } }; +struct EffectDesc { + EffectDesc(OpenSubdiv::Far::PatchDescriptor desc, + Effect effect) : desc(desc), effect(effect), + maxValence(0), numElements(0) { } -typedef std::pair EffectDesc; + OpenSubdiv::Far::PatchDescriptor desc; + Effect effect; + int maxValence; + int numElements; + + bool operator < (const EffectDesc &e) const { + return desc < e.desc || (desc == e.desc && + (maxValence < e.maxValence || ((maxValence == e.maxValence) && + (effect < e.effect)))); + } +}; class EffectDrawRegistry : public OpenSubdiv::Osd::D3D11DrawRegistry { protected: virtual ConfigType * - _CreateDrawConfig(DescType const & desc, + _CreateDrawConfig(EffectDesc const & desc, SourceConfigType const * sconfig, ID3D11Device * pd3dDevice, ID3D11InputLayout ** ppInputLayout, @@ -487,28 +473,40 @@ protected: int numInputElements); virtual SourceConfigType * - _CreateDrawSourceConfig(DescType const & desc, ID3D11Device * pd3dDevice); + _CreateDrawSourceConfig(EffectDesc const & desc, ID3D11Device * pd3dDevice); }; EffectDrawRegistry::SourceConfigType * EffectDrawRegistry::_CreateDrawSourceConfig( - DescType const & desc, ID3D11Device * pd3dDevice) { + EffectDesc const &effectDesc, ID3D11Device * pd3dDevice) { - Effect effect = desc.second; + Effect effect = effectDesc.effect; SourceConfigType * sconfig = - BaseRegistry::_CreateDrawSourceConfig(desc.first, pd3dDevice); + BaseRegistry::_CreateDrawSourceConfig(effectDesc.desc, pd3dDevice); sconfig->commonShader.AddDefine("OSD_ENABLE_PATCH_CULL"); sconfig->commonShader.AddDefine("OSD_ENABLE_SCREENSPACE_TESSELLATION"); + // legacy gregory patch requires OSD_MAX_VALENCE and OSD_NUM_ELEMENTS defined + if (effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::GREGORY or + effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::GREGORY_BOUNDARY) { + std::ostringstream ss; + ss << effectDesc.maxValence; + sconfig->commonShader.AddDefine("OSD_MAX_VALENCE", ss.str()); + ss.str(""); + + ss << effectDesc.numElements; + sconfig->commonShader.AddDefine("OSD_NUM_ELEMENTS", ss.str()); + } + bool smoothNormals = false; - if (desc.first.GetType() == OpenSubdiv::Far::PatchDescriptor::QUADS || - desc.first.GetType() == OpenSubdiv::Far::PatchDescriptor::TRIANGLES) { + if (effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::QUADS || + effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::TRIANGLES) { sconfig->vertexShader.source = shaderSource; sconfig->vertexShader.target = "vs_5_0"; sconfig->vertexShader.entry = "vs_main"; - } else if (desc.first.GetType() == OpenSubdiv::Far::PatchDescriptor::TRIANGLES) { + } else if (effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::TRIANGLES) { if (effect.displayStyle == kQuadWire) effect.displayStyle = kTriWire; if (effect.displayStyle == kQuadFill) effect.displayStyle = kTriFill; if (effect.displayStyle == kQuadLine) effect.displayStyle = kTriLine; @@ -612,7 +610,7 @@ EffectDrawRegistry::_CreateDrawConfig( D3D11_INPUT_ELEMENT_DESC const * pInputElementDescs, int numInputElements) { - ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.first, sconfig, + ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.desc, sconfig, pd3dDevice, ppInputLayout, pInputElementDescs, numInputElements); assert(config); @@ -640,6 +638,17 @@ bindProgram(Effect effect, OpenSubdiv::Osd::DrawContext::PatchArray const & patc EffectDesc effectDesc(patch.GetDescriptor(), effect); + // only legacy gregory needs maxValence and numElements + int maxValence = g_mesh->GetDrawContext()->GetMaxValence(); + int numElements = 6; + + typedef OpenSubdiv::Far::PatchDescriptor Descriptor; + if (patch.GetDescriptor().GetType() == Descriptor::GREGORY or + patch.GetDescriptor().GetType() == Descriptor::GREGORY_BOUNDARY) { + effectDesc.maxValence = maxValence; + effectDesc.numElements = numElements; + } + // input layout const D3D11_INPUT_ELEMENT_DESC hInElementDesc[] = { { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }, @@ -790,9 +799,11 @@ bindProgram(Effect effect, OpenSubdiv::Osd::DrawContext::PatchArray const & patc if (g_mesh->GetDrawContext()->quadOffsetBufferSRV) { g_pd3dDeviceContext->HSSetShaderResources(2, 1, &g_mesh->GetDrawContext()->quadOffsetBufferSRV); } - if (g_mesh->GetDrawContext()->ptexCoordinateBufferSRV) { - g_pd3dDeviceContext->HSSetShaderResources(3, 1, &g_mesh->GetDrawContext()->ptexCoordinateBufferSRV); - g_pd3dDeviceContext->DSSetShaderResources(3, 1, &g_mesh->GetDrawContext()->ptexCoordinateBufferSRV); + if (g_mesh->GetDrawContext()->patchParamBufferSRV) { + g_pd3dDeviceContext->HSSetShaderResources( + 3, 1, &g_mesh->GetDrawContext()->patchParamBufferSRV); + g_pd3dDeviceContext->DSSetShaderResources( + 3, 1, &g_mesh->GetDrawContext()->patchParamBufferSRV); } } @@ -847,7 +858,7 @@ display() { for (int i=0; i<(int)patches.size(); ++i) { OpenSubdiv::Osd::DrawContext::PatchArray const & patch = patches[i]; - OpenSubdiv::Osd::DrawContext::PatchDescriptor desc = patch.GetDescriptor(); + OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType(); patchCount[patchType] += patch.GetNumPatches(); @@ -857,7 +868,7 @@ display() { if (g_mesh->GetDrawContext()->IsAdaptive()) { - OpenSubdiv::Osd::DrawContext::PatchDescriptor desc = patch.GetDescriptor(); + OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); switch (desc.GetNumControlVertices()) { case 4: @@ -909,10 +920,6 @@ display() { patchCount[Descriptor::QUADS]); y += 20; g_hud->DrawString(x, y, "Regular : %d", patchCount[Descriptor::REGULAR]); y+= 20; - g_hud->DrawString(x, y, "Boundary : %d", - patchCount[Descriptor::BOUNDARY]); y+= 20; - g_hud->DrawString(x, y, "Corner : %d", - patchCount[Descriptor::CORNER]); y+= 20; g_hud->DrawString(x, y, "Gregory : %d", patchCount[Descriptor::GREGORY]); y+= 20; g_hud->DrawString(x, y, "Boundary Gregory : %d", @@ -996,26 +1003,6 @@ quit() { SAFE_RELEASE(g_pd3dDeviceContext); SAFE_RELEASE(g_pd3dDevice); - delete g_cpuComputeController; - -#ifdef OPENSUBDIV_HAS_OPENMP - delete g_ompComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_TBB - delete g_tbbComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_OPENCL - delete g_clComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_CUDA - delete g_cudaComputeController; -#endif - - delete g_d3d11ComputeController; - PostQuitMessage(0); exit(0); } diff --git a/examples/glEvalLimit/glEvalLimit.cpp b/examples/glEvalLimit/glEvalLimit.cpp index 5b43c7dd..8084aa32 100755 --- a/examples/glEvalLimit/glEvalLimit.cpp +++ b/examples/glEvalLimit/glEvalLimit.cpp @@ -42,20 +42,18 @@ GLFWwindow* g_window=0; GLFWmonitor* g_primary=0; -#include -#include -#include -#include +#include #include #include #include #include -#include + #include #include #include #include #include +#include #include @@ -139,7 +137,7 @@ int g_nparticles=0, g_nsamples=101, g_nsamplesFound=0; -bool g_randomStart=true; +bool g_randomStart=false; GLuint g_cageEdgeVAO = 0, g_cageEdgeVBO = 0, @@ -199,13 +197,12 @@ Far::TopologyRefiner * g_topologyRefiner = 0; Osd::CpuVertexBuffer * g_vertexData = 0, * g_varyingData = 0; -Osd::CpuComputeContext * g_computeCtx = 0; +Far::StencilTables const * g_vertexStencils = NULL; +Far::StencilTables const * g_varyingStencils = NULL; -Osd::CpuComputeController g_computeCtrl; - -Osd::CpuEvalLimitContext * g_evalCtx = 0; - -Osd::CpuEvalLimitController g_evalCtrl; +Far::PatchTables const * g_patchTables = NULL; +Far::PatchMap const * g_patchMap = NULL; +Osd::PatchCoordArray g_patchCoords; Osd::VertexBufferDescriptor g_idesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 3 ), g_odesc( /*offset*/ 0, /*legnth*/ 3, /*stride*/ 6 ), @@ -246,7 +243,25 @@ updateGeom() { g_vertexData->UpdateData( &g_positions[0], 0, nverts); - g_computeCtrl.Compute(g_computeCtx, g_vertexData, g_varyingData); + if (! g_topologyRefiner) return; + + // note that for patch eval we need coarse+refined combined buffer. + int nCoarseVertices = g_topologyRefiner->GetNumVertices(0); + Osd::CpuEvaluator::EvalStencils(g_vertexData, + Osd::VertexBufferDescriptor(0, 3, 3), + g_vertexData, + Osd::VertexBufferDescriptor( + nCoarseVertices*3, 3, 3), + g_vertexStencils); + + if (g_varyingData) { + Osd::CpuEvaluator::EvalStencils(g_varyingData, + Osd::VertexBufferDescriptor(0, 3, 3), + g_varyingData, + Osd::VertexBufferDescriptor( + nCoarseVertices*3, 3, 3), + g_varyingStencils); + } s.Stop(); g_computeTime = float(s.GetElapsed() * 1000.0f); @@ -256,66 +271,37 @@ updateGeom() { s.Start(); - // The varying data ends-up interleaved in the same g_Q output buffer because - // g_Q has a stride of 6 and g_vdesc sets the offset to 3, while g_odesc sets - // the offset to 0 - switch (g_drawMode) { - case kVARYING : g_evalCtrl.BindVaryingBuffers( g_idesc, g_varyingData, g_vdesc, g_Q ); break; - - case kFACEVARYING : //g_evalCtrl.BindFacevaryingBuffers( g_fvidesc, g_fvodesc, g_Q ); break; - case kRANDOM : - case kUV : - default : g_evalCtrl.Unbind(); break; - } - - // Bind/Unbind of the vertex buffers to the context needs to happen - // outside of the parallel loop - g_evalCtrl.BindVertexBuffers( g_idesc, g_vertexData, g_odesc, g_Q, g_dQs, g_dQt ); - - // Apply 'dynamics' update assert(g_particles); g_particles->Update(g_evalTime); // XXXX g_evalTime is not really elapsed time... - // Evaluate the positions of the samples on the limit surface - g_nsamplesFound=0; -#define USE_OPENMP -#if defined(OPENSUBDIV_HAS_OPENMP) and defined(USE_OPENMP) - #pragma omp parallel for -#endif - for (int i=0; iGetPositions()[i]; - - int n = g_evalCtrl.EvalLimitSample( coord, g_evalCtx, i ); - - if (n) { - // point colors - switch (g_drawMode) { - case kUV : { float * color = g_Q->BindCpuBuffer() + i*g_Q->GetNumElements() + 3; - color[0] = coord.s; - color[1] = 0.0f; - color[2] = coord.t; } break; - - case kRANDOM : // no update needed - case kVARYING : - case kFACEVARYING : break; - - default : break; - } -#if defined(OPENSUBDIV_HAS_OPENMP) and defined(USE_OPENMP) - #pragma omp atomic -#endif - g_nsamplesFound += n; - } else { - // "hide" unfound samples (hole tags...) as a black dot at the origin - float * sample = g_Q->BindCpuBuffer() + i*g_Q->GetNumElements(); - memset(sample, 0, g_Q->GetNumElements() * sizeof(float)); + // resolve particle positions into patch handles + // XXX: this process should be handled by OsdKernel in parallel + g_patchCoords.clear(); + for (int i = 0; i < g_particles->GetNumParticles(); ++i) { + STParticles::Position const &position = g_particles->GetPositions()[i]; + Far::PatchTables::PatchHandle const *handle = + g_patchMap->FindPatch(position.ptexIndex, position.s, position.t); + if (handle) { + g_patchCoords.push_back(Osd::PatchCoord( + *handle, position.s, position.t)); } } - g_evalCtrl.Unbind(); + // Evaluate the positions of the samples on the limit surface + g_nsamplesFound = Osd::CpuEvaluator::EvalPatches(g_vertexData, g_idesc, + g_Q, g_odesc, + g_patchCoords, + g_patchTables, NULL); + + // varying + if (g_drawMode == kVARYING) { + Osd::CpuEvaluator::EvalPatches(g_varyingData, g_idesc, + g_Q, g_vdesc, + g_patchCoords, + g_patchTables, NULL); + } g_Q->BindVBO(); @@ -336,7 +322,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape); delete g_topologyRefiner; - OpenSubdiv::Far::TopologyRefiner * g_topologyRefiner = + g_topologyRefiner = OpenSubdiv::Far::TopologyRefinerFactory::Create(*shape, OpenSubdiv::Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); @@ -351,7 +337,7 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { // location samples (ptex face index, (s,t) and updates them between frames. // Note: the number of limit locations can be entirely arbitrary delete g_particles; - g_particles = new STParticles(*g_topologyRefiner, g_nsamples, g_randomStart); + g_particles = new STParticles(*g_topologyRefiner, g_nsamples, !g_randomStart); g_nparticles = g_particles->GetNumParticles(); g_particles->SetSpeed(speed); @@ -410,18 +396,17 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level) { nverts = vertexStencils->GetNumControlVertices() + vertexStencils->GetNumStencils(); - // Create an Osd Compute context, used to "pose" the vertices with - // the stencils tables - delete g_computeCtx; - g_computeCtx = Osd::CpuComputeContext::Create(vertexStencils, - varyingStencils); + if (g_vertexStencils) delete g_vertexStencils; + g_vertexStencils = vertexStencils; + if (g_varyingStencils) delete g_varyingStencils; + g_varyingStencils = varyingStencils; - // Create a limit Eval context with the patch tables - delete g_evalCtx; - g_evalCtx = Osd::CpuEvalLimitContext::Create(*patchTables); + if (g_patchTables) delete g_patchTables; + g_patchTables = patchTables; - delete vertexStencils; - delete varyingStencils; + // Create a far patch map + if (g_patchMap) delete g_patchMap; + g_patchMap = new Far::PatchMap(*g_patchTables); } { // Create vertex primvar buffer for the CVs @@ -911,7 +896,7 @@ callbackFreeze(bool checked, int /* f */) { //------------------------------------------------------------------------------ static void callbackCentered(bool checked, int /* f */) { - g_randomStart = !checked; + g_randomStart = checked; createOsdMesh(g_defaultShapes[g_currentShape], g_level); } @@ -954,7 +939,7 @@ initHUD() { g_hud.AddCheckBox("Animate vertices (M)", g_moveScale != 0, 10, 50, callbackAnimate, 0, 'm'); g_hud.AddCheckBox("Freeze (spc)", false, 10, 70, callbackFreeze, 0, ' '); - g_hud.AddCheckBox("Random Start", false, 10, 120, callbackCentered, 0); + g_hud.AddCheckBox("Random Start", false, 10, 120, callbackCentered, g_randomStart); int shading_pulldown = g_hud.AddPullDown("Shading (W)", 250, 10, 250, callbackDisplayVaryingColors, 'w'); g_hud.AddPullDownButton(shading_pulldown, "Random", kRANDOM, g_drawMode==kRANDOM); diff --git a/examples/glEvalLimit/particles.h b/examples/glEvalLimit/particles.h index 80f7c2b6..ea1f798d 100644 --- a/examples/glEvalLimit/particles.h +++ b/examples/glEvalLimit/particles.h @@ -25,7 +25,6 @@ #ifndef ST_PARTICLES_H #define ST_PARTICLES_H -#include #include #include @@ -54,8 +53,25 @@ class STParticles { public: + /// \brief Coordinates set on a limit surface + /// + struct Position { + Position() { } + + /// \brief Constructor + /// + /// @param f Ptex face id + /// + /// @param x parametric location on face + /// + /// @param y parametric location on face + /// + Position(int f, float x, float y) : ptexIndex(f), s(x), t(y) { } + + int ptexIndex; ///< ptex face index + float s, t; ///< parametric location on face + }; - typedef OpenSubdiv::Osd::LimitLocation Position; typedef OpenSubdiv::Far::TopologyRefiner Refiner; STParticles(Refiner const & refiner, int nparticles, bool centered=false); diff --git a/examples/glFVarViewer/glFVarViewer.cpp b/examples/glFVarViewer/glFVarViewer.cpp index c9b5d0c1..04c26302 100644 --- a/examples/glFVarViewer/glFVarViewer.cpp +++ b/examples/glFVarViewer/glFVarViewer.cpp @@ -42,15 +42,12 @@ GLFWwindow* g_window = 0; GLFWmonitor* g_primary = 0; -#include #include #include #include +#include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL; #include OpenSubdiv::Osd::GLMeshInterface *g_mesh = NULL; @@ -359,20 +356,15 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, Scheme scheme = kCatmark) int numVertexElements = 3; int numVaryingElements = 0; - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } - delete g_mesh; - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + OpenSubdiv::Far::StencilTables, + OpenSubdiv::Osd::CpuEvaluator, + OpenSubdiv::Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); std::vector fvarData; @@ -537,25 +529,40 @@ union Effect { } }; -typedef std::pair EffectDesc; +struct EffectDesc { + EffectDesc(OpenSubdiv::Far::PatchDescriptor desc, + Effect effect) : desc(desc), effect(effect), + maxValence(0), numElements(0) { } + + OpenSubdiv::Far::PatchDescriptor desc; + Effect effect; + int maxValence; + int numElements; + + bool operator < (const EffectDesc &e) const { + return desc < e.desc || (desc == e.desc && + (maxValence < e.maxValence || ((maxValence == e.maxValence) && + (effect < e.effect)))); + } +}; class EffectDrawRegistry : public OpenSubdiv::Osd::GLDrawRegistry { protected: virtual ConfigType * - _CreateDrawConfig(DescType const & desc, SourceConfigType const * sconfig); + _CreateDrawConfig(EffectDesc const & desc, SourceConfigType const * sconfig); virtual SourceConfigType * - _CreateDrawSourceConfig(DescType const & desc); + _CreateDrawSourceConfig(EffectDesc const & desc); }; EffectDrawRegistry::SourceConfigType * -EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { +EffectDrawRegistry::_CreateDrawSourceConfig(EffectDesc const & effectDesc) { - Effect effect = desc.second; + Effect effect = effectDesc.effect; SourceConfigType * sconfig = - BaseRegistry::_CreateDrawSourceConfig(desc.first); + BaseRegistry::_CreateDrawSourceConfig(effectDesc.desc); assert(sconfig); @@ -567,8 +574,20 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { typedef OpenSubdiv::Far::PatchDescriptor Descriptor; - if (desc.first.GetType() == Descriptor::QUADS or - desc.first.GetType() == Descriptor::TRIANGLES) { + // legacy gregory patch requires OSD_MAX_VALENCE and OSD_NUM_ELEMENTS defined + if (effectDesc.desc.GetType() == Descriptor::GREGORY or + effectDesc.desc.GetType() == Descriptor::GREGORY_BOUNDARY) { + std::ostringstream ss; + ss << effectDesc.maxValence; + sconfig->commonShader.AddDefine("OSD_MAX_VALENCE", ss.str()); + ss.str(""); + + ss << effectDesc.numElements; + sconfig->commonShader.AddDefine("OSD_NUM_ELEMENTS", ss.str()); + } + + if (effectDesc.desc.GetType() == Descriptor::QUADS or + effectDesc.desc.GetType() == Descriptor::TRIANGLES) { sconfig->vertexShader.source = shaderSource; sconfig->vertexShader.version = glslVersion; sconfig->vertexShader.AddDefine("VERTEX_SHADER"); @@ -587,12 +606,12 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { sconfig->commonShader.AddDefine("OSD_FVAR_WIDTH", "2"); - if (desc.first.GetType() == Descriptor::QUADS) { + if (effectDesc.desc.GetType() == Descriptor::QUADS) { // uniform catmark, bilinear sconfig->geometryShader.AddDefine("PRIM_QUAD"); sconfig->fragmentShader.AddDefine("PRIM_QUAD"); sconfig->commonShader.AddDefine("UNIFORM_SUBDIVISION"); - } else if (desc.first.GetType() == Descriptor::TRIANGLES) { + } else if (effectDesc.desc.GetType() == Descriptor::TRIANGLES) { // uniform loop sconfig->geometryShader.AddDefine("PRIM_TRI"); sconfig->fragmentShader.AddDefine("PRIM_TRI"); @@ -630,10 +649,10 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { EffectDrawRegistry::ConfigType * EffectDrawRegistry::_CreateDrawConfig( - DescType const & desc, + DescType const & effectDesc, SourceConfigType const * sconfig) { - ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.first, sconfig); + ConfigType * config = BaseRegistry::_CreateDrawConfig(effectDesc.desc, sconfig); assert(config); GLuint uboIndex; @@ -701,6 +720,18 @@ static GLuint bindProgram(Effect effect, OpenSubdiv::Osd::DrawContext::PatchArray const & patch) { EffectDesc effectDesc(patch.GetDescriptor(), effect); + + // only legacy gregory needs maxValence and numElements + int maxValence = g_mesh->GetDrawContext()->GetMaxValence(); + int numElements = 3; + + typedef OpenSubdiv::Far::PatchDescriptor Descriptor; + if (patch.GetDescriptor().GetType() == Descriptor::GREGORY or + patch.GetDescriptor().GetType() == Descriptor::GREGORY_BOUNDARY) { + effectDesc.maxValence = maxValence; + effectDesc.numElements = numElements; + } + EffectDrawRegistry::ConfigType * config = effectRegistry.GetDrawConfig(effectDesc); @@ -822,7 +853,7 @@ display() { for (int i = 0; i < (int)patches.size(); ++i) { OpenSubdiv::Osd::DrawContext::PatchArray const & patch = patches[i]; - OpenSubdiv::Osd::DrawContext::PatchDescriptor desc = patch.GetDescriptor(); + OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType(); GLenum primType; @@ -889,7 +920,7 @@ display() { for (int i = 0; i < (int)patches.size(); ++i) { OpenSubdiv::Osd::DrawContext::PatchArray const & patch = patches[i]; - OpenSubdiv::Osd::DrawContext::PatchDescriptor desc = patch.GetDescriptor(); + OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType(); GLenum primType; @@ -1009,8 +1040,6 @@ uninitGL() { if (g_mesh) delete g_mesh; - - delete g_cpuComputeController; } //------------------------------------------------------------------------------ diff --git a/examples/glImaging/glImaging.cpp b/examples/glImaging/glImaging.cpp index 4078a15a..768dc60f 100755 --- a/examples/glImaging/glImaging.cpp +++ b/examples/glImaging/glImaging.cpp @@ -46,55 +46,40 @@ #include #include +#include #include -#include -#include - OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL; #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL + #include #include - #include - #include #include "../common/clDeviceContext.h" - CLDeviceContext g_clDeviceContext; - OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_CUDA + #include #include - #include - #include - #include "../common/cudaDeviceContext.h" - CudaDeviceContext g_cudaDeviceContext; - OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - #include - #include + #include #include - OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslTransformFeedbackComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - #include - #include + #include #include - OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL; #endif #include @@ -244,103 +229,82 @@ createOsdMesh(std::string const &kernel, Osd::MeshBitset bits) { if (kernel == "CPU") { - if (not g_cpuComputeController) { - g_cpuComputeController = new Osd::CpuComputeController(); - } return new Osd::Mesh( - g_cpuComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Far::StencilTables, + Osd::CpuEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #ifdef OPENSUBDIV_HAS_OPENMP } else if (kernel == "OPENMP") { - if (not g_ompComputeController) { - g_ompComputeController = new Osd::OmpComputeController(); - } return new Osd::Mesh( - g_ompComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Far::StencilTables, + Osd::OmpEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_TBB } else if (kernel == "TBB") { - if (not g_tbbComputeController) { - g_tbbComputeController = new Osd::TbbComputeController(); - } return new Osd::Mesh( - g_tbbComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Far::StencilTables, + Osd::TbbEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_OPENCL } else if(kernel == "CL") { - if (not g_clComputeController) { - g_clComputeController = new Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - } return new Osd::Mesh( - g_clComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, &g_clDeviceContext); + Osd::CLStencilTables, + Osd::CLEvaluator, + Osd::GLDrawContext, + CLDeviceContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + NULL, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_CUDA } else if(kernel == "CUDA") { - if (not g_cudaComputeController) { - g_cudaComputeController = new Osd::CudaComputeController(); - } return new Osd::Mesh( - g_cudaComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Osd::CudaStencilTables, + Osd::CudaEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK } else if(kernel == "XFB") { - if (not g_glslTransformFeedbackComputeController) { - g_glslTransformFeedbackComputeController = new Osd::GLSLTransformFeedbackComputeController(); - } return new Osd::Mesh( - g_glslTransformFeedbackComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Osd::GLStencilTablesTBO, + Osd::GLXFBEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE } else if(kernel == "GLSL") { - if (not g_glslComputeController) { - g_glslComputeController = new Osd::GLSLComputeController(); - } return new Osd::Mesh( - g_glslComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + Osd::GLStencilTablesSSBO, + Osd::GLComputeEvaluator, + Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif } @@ -461,7 +425,7 @@ void runTest(ShapeDesc const &shapeDesc, std::string const &kernel, for (int i=0; i<(int)patches.size(); ++i) { Osd::DrawContext::PatchArray const & patch = patches[i]; - Osd::DrawContext::PatchDescriptor desc = patch.GetDescriptor(); + Far::PatchDescriptor desc = patch.GetDescriptor(); Far::PatchDescriptor::Type patchType = desc.GetType(); GLenum primType; diff --git a/examples/glPaintTest/glPaintTest.cpp b/examples/glPaintTest/glPaintTest.cpp index f0242954..f71c4a42 100644 --- a/examples/glPaintTest/glPaintTest.cpp +++ b/examples/glPaintTest/glPaintTest.cpp @@ -42,16 +42,13 @@ GLFWwindow* g_window=0; GLFWmonitor* g_primary=0; -#include #include #include #include #include +#include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL; #include OpenSubdiv::Osd::GLMeshInterface *g_mesh; @@ -239,16 +236,12 @@ createOsdMesh() { bool doAdaptive = true; OpenSubdiv::Osd::MeshBitset bits; bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive); - bits.set(OpenSubdiv::Osd::MeshPtexData, true); - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, - refiner, 3, 0, g_level, bits); + OpenSubdiv::Far::StencilTables, + OpenSubdiv::Osd::CpuEvaluator, + OpenSubdiv::Osd::GLDrawContext>( + refiner, 3, 0, g_level, bits); // compute model bounding float min[3] = { FLT_MAX, FLT_MAX, FLT_MAX}; @@ -347,25 +340,54 @@ union Effect { } }; -typedef std::pair EffectDesc; +struct EffectDesc { + EffectDesc(OpenSubdiv::Far::PatchDescriptor desc, + Effect effect) : desc(desc), effect(effect), + maxValence(0), numElements(0) { } + + OpenSubdiv::Far::PatchDescriptor desc; + Effect effect; + int maxValence; + int numElements; + + bool operator < (const EffectDesc &e) const { + return desc < e.desc || (desc == e.desc && + (maxValence < e.maxValence || ((maxValence == e.maxValence) && + (effect < e.effect)))); + } +}; class EffectDrawRegistry : public OpenSubdiv::Osd::GLDrawRegistry { protected: virtual ConfigType * - _CreateDrawConfig(DescType const & desc, SourceConfigType const * sconfig); + _CreateDrawConfig(EffectDesc const & desc, SourceConfigType const * sconfig); virtual SourceConfigType * - _CreateDrawSourceConfig(DescType const & desc); + _CreateDrawSourceConfig(EffectDesc const & desc); }; EffectDrawRegistry::SourceConfigType * -EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { +EffectDrawRegistry::_CreateDrawSourceConfig(EffectDesc const & effectDesc) { - Effect effect = desc.second; + typedef OpenSubdiv::Far::PatchDescriptor Descriptor; + + Effect effect = effectDesc.effect; SourceConfigType * sconfig = - BaseRegistry::_CreateDrawSourceConfig(desc.first); + BaseRegistry::_CreateDrawSourceConfig(effectDesc.desc); + + // legacy gregory patch requires OSD_MAX_VALENCE and OSD_NUM_ELEMENTS defined + if (effectDesc.desc.GetType() == Descriptor::GREGORY or + effectDesc.desc.GetType() == Descriptor::GREGORY_BOUNDARY) { + std::ostringstream ss; + ss << effectDesc.maxValence; + sconfig->commonShader.AddDefine("OSD_MAX_VALENCE", ss.str()); + ss.str(""); + + ss << effectDesc.numElements; + sconfig->commonShader.AddDefine("OSD_NUM_ELEMENTS", ss.str()); + } sconfig->commonShader.AddDefine("USE_PTEX_COORD"); @@ -423,10 +445,10 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { EffectDrawRegistry::ConfigType * EffectDrawRegistry::_CreateDrawConfig( - DescType const & desc, + DescType const & effectDesc, SourceConfigType const * sconfig) { - ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.first, sconfig); + ConfigType * config = BaseRegistry::_CreateDrawConfig(effectDesc.desc, sconfig); assert(config); GLuint uboIndex; @@ -652,7 +674,7 @@ display() { // patch drawing for (int i=0; i<(int)patches.size(); ++i) { OpenSubdiv::Osd::DrawContext::PatchArray const & patch = patches[i]; - OpenSubdiv::Osd::DrawContext::PatchDescriptor desc = patch.GetDescriptor(); + OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); GLenum primType = GL_PATCHES; glPatchParameteri(GL_PATCH_VERTICES, desc.GetNumControlVertices()); @@ -823,7 +845,7 @@ drawStroke(int x, int y) { for (int i=0; i<(int)patches.size(); ++i) { OpenSubdiv::Osd::DrawContext::PatchArray const & patch = patches[i]; - OpenSubdiv::Osd::DrawContext::PatchDescriptor desc = patch.GetDescriptor(); + OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); GLenum primType = GL_PATCHES; glPatchParameteri(GL_PATCH_VERTICES, desc.GetNumControlVertices()); @@ -1096,8 +1118,6 @@ uninitGL() { if (g_mesh) delete g_mesh; - - delete g_cpuComputeController; } //------------------------------------------------------------------------------ diff --git a/examples/glPtexViewer/glPtexViewer.cpp b/examples/glPtexViewer/glPtexViewer.cpp index 63711bb3..864fbe82 100644 --- a/examples/glPtexViewer/glPtexViewer.cpp +++ b/examples/glPtexViewer/glPtexViewer.cpp @@ -52,59 +52,41 @@ GLFWmonitor* g_primary = 0; #include #include -#include #include +#include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL; #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController * g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL + #include #include - #include - #include - #include "../common/clDeviceContext.h" - CLDeviceContext g_clDeviceContext; - OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_CUDA + #include #include - #include - #include - #include "../common/cudaDeviceContext.h" - CudaDeviceContext g_cudaDeviceContext; - OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - #include - #include + #include #include - OpenSubdiv::Osd::GLSLTransformFeedbackComputeController - *g_glslTransformFeedbackComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - #include - #include + #include #include - OpenSubdiv::Osd::GLSLComputeController * g_glslComputeController = NULL; #endif #include @@ -120,6 +102,7 @@ OpenSubdiv::Osd::GLMeshInterface *g_mesh; #include "../common/patchColors.h" #include "../common/hdr_reader.h" #include "../common/stb_image_write.h" +#include "../common/glPtexMipmapTexture.h" static const char *g_defaultShaderSource = #if defined(GL_ARB_tessellation_shader) || defined(GL_VERSION_4_0) @@ -328,10 +311,10 @@ struct ImageShader { //------------------------------------------------------------------------------ -OpenSubdiv::Osd::GLPtexMipmapTexture * g_osdPTexImage = 0; -OpenSubdiv::Osd::GLPtexMipmapTexture * g_osdPTexDisplacement = 0; -OpenSubdiv::Osd::GLPtexMipmapTexture * g_osdPTexOcclusion = 0; -OpenSubdiv::Osd::GLPtexMipmapTexture * g_osdPTexSpecular = 0; +GLPtexMipmapTexture * g_osdPTexImage = 0; +GLPtexMipmapTexture * g_osdPTexDisplacement = 0; +GLPtexMipmapTexture * g_osdPTexOcclusion = 0; +GLPtexMipmapTexture * g_osdPTexSpecular = 0; const char * g_ptexColorFilename; size_t g_ptexMemoryUsage = 0; @@ -636,7 +619,7 @@ static GLuint compileShader(GLenum shaderType, //------------------------------------------------------------------------------ -int bindPTexture(GLint program, OpenSubdiv::Osd::GLPtexMipmapTexture *osdPTex, +int bindPTexture(GLint program, GLPtexMipmapTexture *osdPTex, GLuint data, GLuint packing, int samplerUnit) { #if defined(GL_ARB_separate_shader_objects) || defined(GL_VERSION_4_1) @@ -681,30 +664,45 @@ union Effect { } }; -typedef std::pair EffectDesc; +struct EffectDesc { + EffectDesc(OpenSubdiv::Far::PatchDescriptor desc, + Effect effect) : desc(desc), effect(effect), + maxValence(0), numElements(0) { } + + OpenSubdiv::Far::PatchDescriptor desc; + Effect effect; + int maxValence; + int numElements; + + bool operator < (const EffectDesc &e) const { + return desc < e.desc || (desc == e.desc && + (maxValence < e.maxValence || ((maxValence == e.maxValence) && + (effect < e.effect)))); + } +}; class EffectDrawRegistry : public OpenSubdiv::Osd::GLDrawRegistry { protected: virtual ConfigType * - _CreateDrawConfig(DescType const & desc, SourceConfigType const * sconfig); + _CreateDrawConfig(EffectDesc const & desc, SourceConfigType const * sconfig); virtual SourceConfigType * - _CreateDrawSourceConfig(DescType const & desc); + _CreateDrawSourceConfig(EffectDesc const & desc); }; //------------------------------------------------------------------------------ - EffectDrawRegistry::SourceConfigType * -EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { +EffectDrawRegistry::_CreateDrawSourceConfig(EffectDesc const & effectDesc) { - Effect effect = desc.second; - - SetPtexEnabled(true); + Effect effect = effectDesc.effect; SourceConfigType * sconfig = - BaseRegistry::_CreateDrawSourceConfig(desc.first); + BaseRegistry::_CreateDrawSourceConfig(effectDesc.desc); + + // add ptex functions + sconfig->commonShader.source += GLPtexMipmapTexture::GetShaderSource(); if (effect.patchCull) sconfig->commonShader.AddDefine("OSD_ENABLE_PATCH_CULL"); @@ -719,15 +717,27 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { const char *glslVersion = "#version 330\n"; #endif + // legacy gregory patch requires OSD_MAX_VALENCE and OSD_NUM_ELEMENTS defined + if (effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::GREGORY or + effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::GREGORY_BOUNDARY) { + std::ostringstream ss; + ss << effectDesc.maxValence; + sconfig->commonShader.AddDefine("OSD_MAX_VALENCE", ss.str()); + ss.str(""); + + ss << effectDesc.numElements; + sconfig->commonShader.AddDefine("OSD_NUM_ELEMENTS", ss.str()); + } + int nverts = 4; - if (desc.first.GetType() == OpenSubdiv::Far::PatchDescriptor::QUADS) { + if (effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::QUADS) { sconfig->vertexShader.source = g_shaderSource; sconfig->vertexShader.version = glslVersion; sconfig->vertexShader.AddDefine("VERTEX_SHADER"); if (effect.displacement) { sconfig->geometryShader.AddDefine("FLAT_NORMALS"); } - } else if (desc.first.GetType() == OpenSubdiv::Far::PatchDescriptor::LINES) { + } else if (effectDesc.desc.GetType() == OpenSubdiv::Far::PatchDescriptor::LINES) { nverts = 2; sconfig->vertexShader.source = g_shaderSource; sconfig->vertexShader.version = glslVersion; @@ -848,10 +858,10 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { EffectDrawRegistry::ConfigType * EffectDrawRegistry::_CreateDrawConfig( - DescType const & desc, + DescType const & effectDesc, SourceConfigType const * sconfig) { - ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.first, sconfig); + ConfigType * config = BaseRegistry::_CreateDrawConfig(effectDesc.desc, sconfig); assert(config); // XXXdyu can use layout(binding=) with GLSL 4.20 and beyond @@ -910,10 +920,18 @@ EffectDrawRegistry effectRegistry; EffectDrawRegistry::ConfigType * getInstance(Effect effect, - OpenSubdiv::Osd::DrawContext::PatchDescriptor const & patchDesc) { + OpenSubdiv::Far::PatchDescriptor const & patchDesc) { EffectDesc desc(patchDesc, effect); + // only legacy gregory needs maxValence and numElements + typedef OpenSubdiv::Far::PatchDescriptor Descriptor; + if (patchDesc.GetType() == Descriptor::GREGORY or + patchDesc.GetType() == Descriptor::GREGORY_BOUNDARY) { + desc.maxValence = g_mesh->GetDrawContext()->GetMaxValence(); + desc.numElements = 3; + } + EffectDrawRegistry::ConfigType * config = effectRegistry.GetDrawConfig(desc); assert(config); @@ -922,7 +940,7 @@ getInstance(Effect effect, } //------------------------------------------------------------------------------ -OpenSubdiv::Osd::GLPtexMipmapTexture * +GLPtexMipmapTexture * createPtex(const char *filename, int memLimit) { Ptex::String ptexError; @@ -945,10 +963,8 @@ createPtex(const char *filename, int memLimit) { size_t targetMemory = memLimit * 1024 * 1024; // MB - OpenSubdiv::Osd::GLPtexMipmapTexture *osdPtex = - OpenSubdiv::Osd::GLPtexMipmapTexture::Create(ptex, - g_maxMipmapLevels, - targetMemory); + GLPtexMipmapTexture *osdPtex = GLPtexMipmapTexture::Create( + ptex, g_maxMipmapLevels, targetMemory); GLuint texture = osdPtex->GetTexelsTexture(); glBindTexture(GL_TEXTURE_2D_ARRAY, texture); @@ -1024,33 +1040,26 @@ createOsdMesh(int level, int kernel) { OpenSubdiv::Osd::MeshBitset bits; bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive); - bits.set(OpenSubdiv::Osd::MeshPtexData, true); bits.set(OpenSubdiv::Osd::MeshEndCapGregoryBasis, true); int numVertexElements = g_adaptive ? 3 : 6; int numVaryingElements = 0; if (kernel == kCPU) { - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, + OpenSubdiv::Far::StencilTables, + OpenSubdiv::Osd::CpuEvaluator, + OpenSubdiv::Osd::GLDrawContext>( refiner, numVertexElements, numVaryingElements, level, bits); #ifdef OPENSUBDIV_HAS_OPENMP } else if (kernel == kOPENMP) { - if (not g_ompComputeController) { - g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController(); - } g_mesh = new OpenSubdiv::Osd::Mesh( - g_ompComputeController, + OpenSubdiv::Far::StencilTables, + OpenSubdiv::Osd::OmpEvaluator, + OpenSubdiv::Osd::GLDrawContext>( refiner, numVertexElements, numVaryingElements, @@ -1058,13 +1067,10 @@ createOsdMesh(int level, int kernel) { #endif #ifdef OPENSUBDIV_HAS_TBB } else if (kernel == kTBB) { - if (not g_tbbComputeController) { - g_tbbComputeController = new OpenSubdiv::Osd::TbbComputeController(); - } g_mesh = new OpenSubdiv::Osd::Mesh( - g_tbbComputeController, + OpenSubdiv::Far::StencilTables, + OpenSubdiv::Osd::TbbEvaluator, + OpenSubdiv::Osd::GLDrawContext>( refiner, numVertexElements, numVaryingElements, @@ -1072,30 +1078,25 @@ createOsdMesh(int level, int kernel) { #endif #ifdef OPENSUBDIV_HAS_OPENCL } else if (kernel == kCL) { - if (not g_clComputeController) { - g_clComputeController = new OpenSubdiv::Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - } + static OpenSubdiv::Osd::EvaluatorCacheT clEvaluatorCache; g_mesh = new OpenSubdiv::Osd::Mesh( - g_clComputeController, + OpenSubdiv::Osd::CLStencilTables, + OpenSubdiv::Osd::CLEvaluator, + OpenSubdiv::Osd::GLDrawContext, + CLDeviceContext>( refiner, numVertexElements, numVaryingElements, - level, bits, &g_clDeviceContext); + level, bits, + &clEvaluatorCache, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_CUDA } else if (kernel == kCUDA) { - if (not g_cudaComputeController) { - g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController(); - } g_mesh = new OpenSubdiv::Osd::Mesh( - g_cudaComputeController, + OpenSubdiv::Osd::CudaStencilTables, + OpenSubdiv::Osd::CudaEvaluator, + OpenSubdiv::Osd::GLDrawContext>( refiner, numVertexElements, numVaryingElements, @@ -1103,32 +1104,29 @@ createOsdMesh(int level, int kernel) { #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK } else if (kernel == kGLSL) { - if (not g_glslTransformFeedbackComputeController) { - g_glslTransformFeedbackComputeController = - new OpenSubdiv::Osd::GLSLTransformFeedbackComputeController(); - } + static OpenSubdiv::Osd::EvaluatorCacheT glXFBEvaluatorCache; g_mesh = new OpenSubdiv::Osd::Mesh( - g_glslTransformFeedbackComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + OpenSubdiv::Osd::GLStencilTablesTBO, + OpenSubdiv::Osd::GLXFBEvaluator, + OpenSubdiv::Osd::GLDrawContext>( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &glXFBEvaluatorCache); #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE } else if (kernel == kGLSLCompute) { - if (not g_glslComputeController) { - g_glslComputeController = new OpenSubdiv::Osd::GLSLComputeController(); - } + static OpenSubdiv::Osd::EvaluatorCacheT glComputeEvaluatorCache; g_mesh = new OpenSubdiv::Osd::Mesh( - g_glslComputeController, + OpenSubdiv::Osd::GLStencilTablesSSBO, + OpenSubdiv::Osd::GLComputeEvaluator, + OpenSubdiv::Osd::GLDrawContext>( refiner, numVertexElements, numVaryingElements, - level, bits); + level, bits, + &glComputeEvaluatorCache); #endif } else { printf("Unsupported kernel %s\n", getKernelName(kernel)); @@ -1492,7 +1490,7 @@ updateUniformBlocks() { //------------------------------------------------------------------------------ static GLuint -bindProgram(Effect effect, OpenSubdiv::Osd::DrawContext::PatchDescriptor const &desc) { +bindProgram(Effect effect, OpenSubdiv::Far::PatchDescriptor const &desc) { EffectDrawRegistry::ConfigType * config = getInstance(effect, desc); @@ -1574,7 +1572,7 @@ drawModel() { for (int i = 0; i < (int)patches.size(); ++i) { OpenSubdiv::Osd::DrawContext::PatchArray const & patch = patches[i]; - OpenSubdiv::Osd::DrawContext::PatchDescriptor desc = patch.GetDescriptor(); + OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType(); GLenum primType; @@ -1749,8 +1747,7 @@ drawCageEdges() { typedef OpenSubdiv::Far::PatchDescriptor FDesc; - OpenSubdiv::Osd::DrawContext::PatchDescriptor desc( - FDesc(FDesc::LINES), 0, 0); + FDesc desc(FDesc::LINES); EffectDrawRegistry::ConfigType *config = getInstance(effect, desc); glUseProgram(config->program); @@ -1979,32 +1976,6 @@ void uninitGL() { if (g_mesh) delete g_mesh; - delete g_cpuComputeController; - -#ifdef OPENSUBDIV_HAS_OPENMP - delete g_ompComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_TBB - delete g_tbbComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_OPENCL - delete g_clComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_CUDA - delete g_cudaComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - delete g_glslTransformFeedbackComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - delete g_glslComputeController; -#endif - if (g_diffuseEnvironmentMap) glDeleteTextures(1, &g_diffuseEnvironmentMap); if (g_specularEnvironmentMap) diff --git a/examples/glPtexViewer/shader.glsl b/examples/glPtexViewer/shader.glsl index 6447803c..068e4f63 100644 --- a/examples/glPtexViewer/shader.glsl +++ b/examples/glPtexViewer/shader.glsl @@ -21,7 +21,6 @@ // KIND, either express or implied. See the Apache License for the specific // language governing permissions and limitations under the Apache License. // -#line 25 //-------------------------------------------------------------- // Common @@ -30,17 +29,10 @@ uniform float displacementScale = 1.0; uniform float mipmapBias = 0; -vec4 GeneratePatchCoord(vec2 localUV, int primitiveID) // for non-adpative +vec4 GeneratePatchCoord(vec2 uv, int primitiveID) // for non-adaptive { - ivec2 ptexIndex = texelFetch(OsdPatchParamBuffer, primitiveID).xy; - int faceID = ptexIndex.x; - int lv = 1 << ((ptexIndex.y & 0xf) - ((ptexIndex.y >> 4) & 1)); - int u = (ptexIndex.y >> 17) & 0x3ff; - int v = (ptexIndex.y >> 7) & 0x3ff; - vec2 uv = localUV; - uv = (uv * vec2(1.0)/lv) + vec2(u, v)/lv; - - return vec4(uv.x, uv.y, lv+0.5, faceID+0.5); + ivec3 patchParam = OsdGetPatchParam(OsdGetPatchIndex(primitiveID)); + return OsdInterpolatePatchCoord(uv, OsdGetPatchCoord(patchParam)); } #if defined(DISPLACEMENT_HW_BILINEAR) \ @@ -490,14 +482,14 @@ GetOverrideColor(int patchParam) #elif defined OSD_PATCH_GREGORY_BASIS patchType = 6; #endif - int edgeCount = bitCount((patchParam >> 4) & 0xf); + int edgeCount = bitCount(OsdGetPatchBoundaryMask(patchParam)); if (edgeCount == 1) { patchType = 2; // BOUNDARY } if (edgeCount == 2) { patchType = 3; // CORNER } - int pattern = bitCount((patchParam >> 8) & 0xf); + int pattern = bitCount(OsdGetPatchTransitionMask(patchParam)); int offset = 7*patchType + pattern; return patchColors[offset]; } @@ -675,7 +667,7 @@ main() textureImage_Data, textureImage_Packing); #elif defined COLOR_PATCHTYPE - vec4 texColor = edgeColor(lighting(GetOverrideColor(GetPatchParam()), inpt.v.position.xyz, normal, 1, 0)); + vec4 texColor = edgeColor(lighting(GetOverrideColor(OsdGetPatchParam(OsdGetPatchIndex(gl_PrimitiveID))), inpt.v.position.xyz, normal, 1, 0)); outColor = texColor; return; #elif defined COLOR_PATCHCOORD diff --git a/examples/glShareTopology/glShareTopology.cpp b/examples/glShareTopology/glShareTopology.cpp index 5604783d..e4a6425b 100644 --- a/examples/glShareTopology/glShareTopology.cpp +++ b/examples/glShareTopology/glShareTopology.cpp @@ -42,60 +42,46 @@ GLFWwindow* g_window=0; GLFWmonitor* g_primary=0; -#include #include #include #include #include +#include #include +#include +#include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL; +#include #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL #include - #include - #include - OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL; - + #include #include "../common/clDeviceContext.h" CLDeviceContext g_clDeviceContext; #endif #ifdef OPENSUBDIV_HAS_CUDA #include - #include - #include - OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL; - + #include #include "../common/cudaDeviceContext.h" CudaDeviceContext g_cudaDeviceContext; #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - #include - #include - #include - OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslXFBComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - #include - #include - #include - OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL; + #include #endif @@ -172,7 +158,6 @@ public: if (interleaved) { assert(vertexDesc.stride == varyingDesc.stride); - _vertexBuffer = createVertexBuffer( vertexDesc.stride, numInstances * numVertices); } else { @@ -260,62 +245,61 @@ public: return _restPosition; } - int GetNumVertices() const { + int GetNumVertices() const { // total (control + refined) return _numVertices; } + int GetNumControlVertices() const { + return _numControlVertices; + } protected: TopologyBase(Far::PatchTables const * patchTables) { - _drawContext = Osd::GLDrawContext::Create(patchTables, 7); - } - - void updateVertexBufferStride(int stride) { - // modifying patchArrays in drawcontext. - Osd::DrawContext::PatchArrayVector &patchArrays = - _drawContext->GetPatchArrays(); - for (int i = 0; i < (int)patchArrays.size(); ++i) { - Osd::DrawContext::PatchDescriptor desc = patchArrays[i].GetDescriptor(); - desc.SetNumElements(stride); - patchArrays[i].SetDescriptor(desc); - } + _drawContext = Osd::GLDrawContext::Create(patchTables); } int _numVertices; + int _numControlVertices; private: Osd::GLDrawContext *_drawContext; std::vector _restPosition; }; -template class Topology : public TopologyBase { - public: - - typedef COMPUTE_CONTROLLER ComputeController; - typedef typename COMPUTE_CONTROLLER::ComputeContext ComputeContext; + typedef EVALUATOR Evaluator; + typedef STENCIL_TABLES StencilTables; typedef DEVICE_CONTEXT DeviceContext; + typedef Osd::EvaluatorCacheT EvaluatorCache; - Topology(ComputeController * computeController, - Far::PatchTables const * patchTables, - Far::StencilTables const * vertexStencils, + Topology(Far::PatchTables const * patchTables, + Far::StencilTables const * vertexStencils, //XXX: takes ownership Far::StencilTables const * varyingStencils, + int numControlVertices, + EvaluatorCache * evaluatorCache = NULL, DeviceContext * deviceContext = NULL) : TopologyBase(patchTables), - _computeController(computeController), + _evaluatorCache(evaluatorCache), _deviceContext(deviceContext) { - _computeContext = ComputeContext::Create( - vertexStencils, varyingStencils, deviceContext); + _numControlVertices = numControlVertices; + _numVertices = numControlVertices + vertexStencils->GetNumStencils(); + + _vertexStencils = Osd::convertToCompatibleStencilTables( + vertexStencils, deviceContext); + _varyingStencils = Osd::convertToCompatibleStencilTables( + varyingStencils, deviceContext); - _numVertices = vertexStencils->GetNumStencils() + - vertexStencils->GetNumControlVertices(); } ~Topology() { - delete _computeContext; + delete _vertexStencils; + delete _varyingStencils; } void Refine(InstancesBase *instance, int numInstances) { @@ -330,21 +314,59 @@ public: for (int i = 0; i < numInstances; ++i) { - Osd::VertexBufferDescriptor vertexDesc( - globalVertexDesc.offset + _numVertices*globalVertexDesc.stride*i, + Osd::VertexBufferDescriptor vertexSrcDesc( + globalVertexDesc.offset + _numVertices*i*globalVertexDesc.stride, globalVertexDesc.length, globalVertexDesc.stride); - Osd::VertexBufferDescriptor varyingDesc( - globalVaryingDesc.offset + _numVertices*globalVaryingDesc.stride*i, - globalVaryingDesc.length, - globalVaryingDesc.stride); + Osd::VertexBufferDescriptor vertexDstDesc( + globalVertexDesc.offset + (_numVertices*i + _numControlVertices)*globalVertexDesc.stride, + globalVertexDesc.length, + globalVertexDesc.stride); - _computeController->Compute(_computeContext, - typedInstance->GetVertexBuffer(), - typedInstance->GetVaryingBuffer(), - &vertexDesc, - &varyingDesc); + // vertex + Evaluator const *evalInstance = Osd::GetEvaluator( + _evaluatorCache, vertexSrcDesc, vertexDstDesc, _deviceContext); + + Evaluator::EvalStencils(typedInstance->GetVertexBuffer(), vertexSrcDesc, + typedInstance->GetVertexBuffer(), vertexDstDesc, + _vertexStencils, + evalInstance, + _deviceContext); + + // varying + if (_varyingStencils) { + Osd::VertexBufferDescriptor varyingSrcDesc( + globalVaryingDesc.offset + _numVertices*i*globalVaryingDesc.stride, + globalVaryingDesc.length, + globalVaryingDesc.stride); + + Osd::VertexBufferDescriptor varyingDstDesc( + globalVaryingDesc.offset + (_numVertices*i + _numControlVertices)*globalVaryingDesc.stride, + globalVaryingDesc.length, + globalVaryingDesc.stride); + + evalInstance = Osd::GetEvaluator( + _evaluatorCache, varyingSrcDesc, varyingDstDesc, _deviceContext); + + if (typedInstance->GetVaryingBuffer()) { + // non interleaved + Evaluator::EvalStencils( + typedInstance->GetVaryingBuffer(), varyingSrcDesc, + typedInstance->GetVaryingBuffer(), varyingDstDesc, + _varyingStencils, + evalInstance, + _deviceContext); + } else { + // interleaved + Evaluator::EvalStencils( + typedInstance->GetVertexBuffer(), varyingSrcDesc, + typedInstance->GetVertexBuffer(), varyingDstDesc, + _varyingStencils, + evalInstance, + _deviceContext); + } + } } } @@ -360,20 +382,19 @@ public: } virtual void Synchronize() { - _computeController->Synchronize(); + Evaluator::Synchronize(_deviceContext); } virtual void UpdateVertexTexture(InstancesBase *instances) { Instances *typedInstance = static_cast *>(instances); GetDrawContext()->UpdateVertexTexture(typedInstance->GetVertexBuffer()); - - updateVertexBufferStride(typedInstance->GetVertexBuffer()->GetNumElements()); } private: - ComputeController *_computeController; - ComputeContext *_computeContext; + StencilTables const *_vertexStencils; + StencilTables const *_varyingStencils; + EvaluatorCache * _evaluatorCache; DeviceContext *_deviceContext; }; @@ -645,81 +666,85 @@ createOsdMesh( const std::string &shapeStr, int level, Scheme scheme=kCatmark ) } } + int numControlVertices = refiner->GetNumVertices(0); // create partitioned patcharray TopologyBase *topology = NULL; if (g_kernel == kCPU) { - if (not g_cpuComputeController) - g_cpuComputeController = new Osd::CpuComputeController(); - topology = new Topology(g_cpuComputeController, + topology = new Topology( patchTables, - vertexStencils, varyingStencils); + vertexStencils, varyingStencils, + numControlVertices); #ifdef OPENSUBDIV_HAS_OPENMP } else if (g_kernel == kOPENMP) { - if (not g_ompComputeController) - g_ompComputeController = new Osd::OmpComputeController(); - topology = new Topology(g_ompComputeController, + topology = new Topology( patchTables, - vertexStencils, varyingStencils); + vertexStencils, varyingStencils, + numControlVertices); #endif #ifdef OPENSUBDIV_HAS_TBB } else if (g_kernel == kTBB) { - if (not g_tbbComputeController) - g_tbbComputeController = new Osd::TbbComputeController(); - topology = new Topology(g_tbbComputeController, + topology = new Topology( patchTables, - vertexStencils, varyingStencils); + vertexStencils, varyingStencils, + numControlVertices); #endif #ifdef OPENSUBDIV_HAS_CUDA } else if (g_kernel == kCUDA) { - if (not g_cudaComputeController) - g_cudaComputeController = new Osd::CudaComputeController(); - topology = new Topology(g_cudaComputeController, + topology = new Topology( patchTables, - vertexStencils, varyingStencils); + vertexStencils, varyingStencils, + numControlVertices); #endif #ifdef OPENSUBDIV_HAS_OPENCL } else if (g_kernel == kCL) { - if (not g_clComputeController) - g_clComputeController = new Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - topology = new Topology(g_clComputeController, - patchTables, - vertexStencils, varyingStencils, - &g_clDeviceContext); + static Osd::EvaluatorCacheT clEvaluatorCache; + topology = new Topology( + patchTables, + vertexStencils, varyingStencils, + numControlVertices, + &clEvaluatorCache, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK } else if (g_kernel == kGLSL) { - if (not g_glslXFBComputeController) - g_glslXFBComputeController = new Osd::GLSLTransformFeedbackComputeController(); - topology = new Topology(g_glslXFBComputeController, - patchTables, - vertexStencils, varyingStencils); + static Osd::EvaluatorCacheT glXFBEvaluatorCache; + topology = new Topology( + patchTables, + vertexStencils, varyingStencils, + numControlVertices); #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE } else if (g_kernel == kGLSLCompute) { - if (not g_glslComputeController) - g_glslComputeController = new Osd::GLSLComputeController(); - topology = new Topology(g_glslComputeController, - patchTables, - vertexStencils, varyingStencils); + static Osd::EvaluatorCacheT glComputeEvaluatorCache; + topology = new Topology( + patchTables, + vertexStencils, varyingStencils, + numControlVertices); #endif } else { } delete refiner; - delete vertexStencils; - delete varyingStencils; + // XXX: Weired API. think again.. +/// delete vertexStencils; +/// delete varyingStencils; delete patchTables; // centering rest position @@ -771,25 +796,40 @@ union Effect { } }; -typedef std::pair EffectDesc; +struct EffectDesc { + EffectDesc(OpenSubdiv::Far::PatchDescriptor desc, + Effect effect) : desc(desc), effect(effect), + maxValence(0), numElements(0) { } + + OpenSubdiv::Far::PatchDescriptor desc; + Effect effect; + int maxValence; + int numElements; + + bool operator < (const EffectDesc &e) const { + return desc < e.desc || (desc == e.desc && + (maxValence < e.maxValence || ((maxValence == e.maxValence) && + (effect < e.effect)))); + } +}; class EffectDrawRegistry : public Osd::GLDrawRegistry { protected: virtual ConfigType * - _CreateDrawConfig(DescType const & desc, SourceConfigType const * sconfig); + _CreateDrawConfig(EffectDesc const & desc, SourceConfigType const * sconfig); virtual SourceConfigType * - _CreateDrawSourceConfig(DescType const & desc); + _CreateDrawSourceConfig(EffectDesc const & desc); }; EffectDrawRegistry::SourceConfigType * -EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { +EffectDrawRegistry::_CreateDrawSourceConfig(EffectDesc const & effectDesc) { - Effect effect = desc.second; + Effect effect = effectDesc.effect; SourceConfigType * sconfig = - BaseRegistry::_CreateDrawSourceConfig(desc.first); + BaseRegistry::_CreateDrawSourceConfig(effectDesc.desc); assert(sconfig); @@ -799,8 +839,20 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { const char *glslVersion = "#version 330\n"; #endif - if (desc.first.GetType() == Far::PatchDescriptor::QUADS or - desc.first.GetType() == Far::PatchDescriptor::TRIANGLES) { + // legacy gregory patch requires OSD_MAX_VALENCE and OSD_NUM_ELEMENTS defined + if (effectDesc.desc.GetType() == Far::PatchDescriptor::GREGORY or + effectDesc.desc.GetType() == Far::PatchDescriptor::GREGORY_BOUNDARY) { + std::ostringstream ss; + ss << effectDesc.maxValence; + sconfig->commonShader.AddDefine("OSD_MAX_VALENCE", ss.str()); + ss.str(""); + + ss << effectDesc.numElements; + sconfig->commonShader.AddDefine("OSD_NUM_ELEMENTS", ss.str()); + } + + if (effectDesc.desc.GetType() == Far::PatchDescriptor::QUADS or + effectDesc.desc.GetType() == Far::PatchDescriptor::TRIANGLES) { sconfig->vertexShader.source = shaderSource; sconfig->vertexShader.version = glslVersion; sconfig->vertexShader.AddDefine("VERTEX_SHADER"); @@ -816,12 +868,12 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) { sconfig->fragmentShader.version = glslVersion; sconfig->fragmentShader.AddDefine("FRAGMENT_SHADER"); - if (desc.first.GetType() == Far::PatchDescriptor::QUADS) { + if (effectDesc.desc.GetType() == Far::PatchDescriptor::QUADS) { // uniform catmark, bilinear sconfig->geometryShader.AddDefine("PRIM_QUAD"); sconfig->fragmentShader.AddDefine("PRIM_QUAD"); sconfig->commonShader.AddDefine("UNIFORM_SUBDIVISION"); - } else if (desc.first.GetType() == Far::PatchDescriptor::TRIANGLES) { + } else if (effectDesc.desc.GetType() == Far::PatchDescriptor::TRIANGLES) { // uniform loop sconfig->geometryShader.AddDefine("PRIM_TRI"); sconfig->fragmentShader.AddDefine("PRIM_TRI"); @@ -865,7 +917,7 @@ EffectDrawRegistry::_CreateDrawConfig( DescType const & desc, SourceConfigType const * sconfig) { - ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.first, sconfig); + ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.desc, sconfig); assert(config); GLuint uboIndex; @@ -938,6 +990,18 @@ static GLuint bindProgram(Effect effect, Osd::DrawContext::PatchArray const & patch) { EffectDesc effectDesc(patch.GetDescriptor(), effect); + + // only legacy gregory needs maxValence and numElements + int maxValence = g_topology->GetDrawContext()->GetMaxValence(); + int numElements = (g_displayStyle == kVaryingInterleaved ? 7 : 3); + + typedef OpenSubdiv::Far::PatchDescriptor Descriptor; + if (patch.GetDescriptor().GetType() == Descriptor::GREGORY or + patch.GetDescriptor().GetType() == Descriptor::GREGORY_BOUNDARY) { + effectDesc.maxValence = maxValence; + effectDesc.numElements = numElements; + } + EffectDrawRegistry::ConfigType * config = effectRegistry.GetDrawConfig(effectDesc); @@ -1054,7 +1118,7 @@ drawPatches(Osd::DrawContext::PatchArrayVector const &patches, Osd::DrawContext::PatchArray const & patch = patches[i]; - Osd::DrawContext::PatchDescriptor desc = patch.GetDescriptor(); + Far::PatchDescriptor desc = patch.GetDescriptor(); Far::PatchDescriptor::Type patchType = desc.GetType(); GLenum primType; @@ -1292,28 +1356,6 @@ uninitGL() { delete g_instances; if (g_topology) delete g_topology; - - delete g_cpuComputeController; - -#ifdef OPENSUBDIV_HAS_OPENMP - delete g_ompComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_TBB - delete g_tbbComputeController; -#endif -#ifdef OPENSUBDIV_HAS_OPENCL - delete g_clComputeController; -#endif -#ifdef OPENSUBDIV_HAS_CUDA - delete g_cudaComputeController; -#endif -#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - delete g_glslXFBComputeController; -#endif -#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - delete g_glslComputeController; -#endif } //------------------------------------------------------------------------------ @@ -1454,7 +1496,7 @@ static void callbackDisplayStyle(int b) { g_displayStyle = b; - rebuildInstances(); + rebuildOsdMesh(); } static void diff --git a/examples/glStencilViewer/glStencilViewer.cpp b/examples/glStencilViewer/glStencilViewer.cpp index 63fdeca0..ef240eb4 100644 --- a/examples/glStencilViewer/glStencilViewer.cpp +++ b/examples/glStencilViewer/glStencilViewer.cpp @@ -54,8 +54,7 @@ GLFWmonitor* g_primary=0; #include #include -#include -#include +#include #include #include @@ -154,18 +153,12 @@ Osd::VertexBufferDescriptor g_controlDesc( /*offset*/ 0, /*legnth*/ 3, /*stride* g_outputDuDesc( /*offset*/ 3, /*legnth*/ 3, /*stride*/ 18 ), g_outputDvDesc( /*offset*/ 9, /*legnth*/ 3, /*stride*/ 18 ); -Osd::CpuEvalStencilsContext * g_evalCtx=0; - -Osd::CpuEvalStencilsController g_evalCpuCtrl; - #if defined(OPENSUBDIV_HAS_OPENMP) - #include - Osd::OmpEvalStencilsController g_evalOmpCtrl; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - Osd::TbbEvalStencilsController g_evalTbbCtrl; + #include #endif @@ -200,48 +193,41 @@ updateGeom() { float * ptr = g_stencilValues->BindCpuBuffer(); memset(ptr, 0, g_controlStencils->GetNumStencils() * 18 * sizeof(float)); - // Uppdate random points by applying point & tangent stencils + // Update random points by applying point & tangent stencils switch (g_kernel) { case kCPU: { - g_evalCpuCtrl.UpdateValues( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDataDesc, g_stencilValues ); - - g_evalCpuCtrl.UpdateDerivs( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDuDesc, g_stencilValues, - g_outputDvDesc, g_stencilValues ); + Osd::CpuEvaluator::EvalStencils( + g_controlValues, g_controlDesc, // input + g_stencilValues, g_outputDataDesc, // position + g_stencilValues, g_outputDuDesc, // Du + g_stencilValues, g_outputDvDesc, // Dv + // Normals will be filled afterwards + g_controlStencils); } break; #if defined(OPENSUBDIV_HAS_OPENMP) case kOPENMP: { - g_evalOmpCtrl.UpdateValues( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDataDesc, g_stencilValues ); - - g_evalOmpCtrl.UpdateDerivs( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDuDesc, g_stencilValues, - g_outputDvDesc, g_stencilValues ); +// FIXME: implements OmpEvaluator + Osd::CpuEvaluator::EvalStencils( + g_controlValues, g_controlDesc, // input + g_stencilValues, g_outputDataDesc, // position + g_stencilValues, g_outputDuDesc, // Du + g_stencilValues, g_outputDvDesc, // Dv + // Normals will be filled afterwards + g_controlStencils); } break; #endif #if defined(OPENSUBDIV_HAS_TBB) +// FIXME: implements TbbEvaluator case kTBB: { - g_evalTbbCtrl.UpdateValues( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDataDesc, g_stencilValues ); - - g_evalTbbCtrl.UpdateDerivs( - g_evalCtx, - g_controlDesc, g_controlValues, - g_outputDuDesc, g_stencilValues, - g_outputDvDesc, g_stencilValues ); + Osd::CpuEvaluator::EvalStencils( + g_controlValues, g_controlDesc, // input + g_stencilValues, g_outputDataDesc, // position + g_stencilValues, g_outputDuDesc, // Du + g_stencilValues, g_outputDvDesc, // Dv + // Normals will be filled afterwards + g_controlStencils); } break; #endif default: @@ -364,9 +350,6 @@ createMesh(ShapeDesc const & shapeDesc, int level) { g_controlValues = Osd::CpuVertexBuffer::Create(3, nverts); // Create eval context & data buffers - delete g_evalCtx; - g_evalCtx = Osd::CpuEvalStencilsContext::Create(g_controlStencils); - delete g_stencilValues; g_stencilValues = Osd::CpuGLVertexBuffer::Create(3, g_controlStencils->GetNumStencils() * 6 ); @@ -674,6 +657,7 @@ drawStencils() { g_samplesProgram.EnableVertexAttributes(); + glDrawArrays(GL_POINTS, 0, numEdges*2); glDrawArrays(GL_LINES, 0, numEdges*2); glBindVertexArray(0); diff --git a/examples/glViewer/glViewer.cpp b/examples/glViewer/glViewer.cpp index 98926a7d..479c4ad2 100644 --- a/examples/glViewer/glViewer.cpp +++ b/examples/glViewer/glViewer.cpp @@ -43,59 +43,44 @@ GLFWwindow* g_window=0; GLFWmonitor* g_primary=0; #include -#include #include #include +#include #include -#include -#include -OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL; #ifdef OPENSUBDIV_HAS_OPENMP - #include - OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_TBB - #include - OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL; + #include #endif #ifdef OPENSUBDIV_HAS_OPENCL #include - #include - #include - + #include #include "../common/clDeviceContext.h" CLDeviceContext g_clDeviceContext; - OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_CUDA #include - #include - #include - + #include #include "../common/cudaDeviceContext.h" CudaDeviceContext g_cudaDeviceContext; - OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - #include - #include + #include #include - OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslTransformFeedbackComputeController = NULL; #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - #include - #include + #include #include - OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL; #endif #include @@ -450,13 +435,7 @@ updateGeom() { Stopwatch s; s.Start(); - if (g_displayStyle == kInterleavedVaryingColor) { - OpenSubdiv::Osd::VertexBufferDescriptor vertexDesc(0, 3, 7); - OpenSubdiv::Osd::VertexBufferDescriptor varyingDesc(3, 4, 7); - g_mesh->Refine(&vertexDesc, &varyingDesc, true); - } else { - g_mesh->Refine(); - } + g_mesh->Refine(); s.Stop(); g_cpuTime = float(s.GetElapsed() * 1000.0f); @@ -493,7 +472,8 @@ getKernelName(int kernel) { static void createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=kCatmark) { - typedef OpenSubdiv::Far::ConstIndexArray IndexArray; + using namespace OpenSubdiv; + typedef Far::ConstIndexArray IndexArray; bool doAnim = g_objAnim and g_currentShape==0; @@ -505,12 +485,12 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme= } // create Vtr mesh (topology) - OpenSubdiv::Sdc::SchemeType sdctype = GetSdcType(*shape); - OpenSubdiv::Sdc::Options sdcoptions = GetSdcOptions(*shape); + Sdc::SchemeType sdctype = GetSdcType(*shape); + Sdc::Options sdcoptions = GetSdcOptions(*shape); - OpenSubdiv::Far::TopologyRefiner * refiner = - OpenSubdiv::Far::TopologyRefinerFactory::Create(*shape, - OpenSubdiv::Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); + Far::TopologyRefiner * refiner = + Far::TopologyRefinerFactory::Create(*shape, + Far::TopologyRefinerFactory::Options(sdctype, sdcoptions)); // save coarse topology (used for coarse mesh drawing) int nedges = refiner->GetNumEdges(0), @@ -545,117 +525,104 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme= interleaveVarying = g_displayStyle == kInterleavedVaryingColor, doSingleCreasePatch = (g_singleCreasePatch!=0 and g_scheme==kCatmark); - OpenSubdiv::Osd::MeshBitset bits; - bits.set(OpenSubdiv::Osd::MeshAdaptive, doAdaptive); - bits.set(OpenSubdiv::Osd::MeshUseSingleCreasePatch, doSingleCreasePatch); - bits.set(OpenSubdiv::Osd::MeshInterleaveVarying, interleaveVarying); - bits.set(OpenSubdiv::Osd::MeshFVarData, g_displayStyle == kFaceVaryingColor); - bits.set(OpenSubdiv::Osd::MeshEndCapBSplineBasis, g_endCap == kEndCapBSplineBasis); - bits.set(OpenSubdiv::Osd::MeshEndCapGregoryBasis, g_endCap == kEndCapGregoryBasis); - bits.set(OpenSubdiv::Osd::MeshEndCapLegacyGregory, g_endCap == kEndCapLegacyGregory); + Osd::MeshBitset bits; + bits.set(Osd::MeshAdaptive, doAdaptive); + bits.set(Osd::MeshUseSingleCreasePatch, doSingleCreasePatch); + bits.set(Osd::MeshInterleaveVarying, interleaveVarying); + bits.set(Osd::MeshFVarData, g_displayStyle == kFaceVaryingColor); + bits.set(Osd::MeshEndCapBSplineBasis, g_endCap == kEndCapBSplineBasis); + bits.set(Osd::MeshEndCapGregoryBasis, g_endCap == kEndCapGregoryBasis); + bits.set(Osd::MeshEndCapLegacyGregory, g_endCap == kEndCapLegacyGregory); int numVertexElements = 3; int numVaryingElements = (g_displayStyle == kVaryingColor or interleaveVarying) ? 4 : 0; if (kernel == kCPU) { - if (not g_cpuComputeController) { - g_cpuComputeController = new OpenSubdiv::Osd::CpuComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cpuComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #ifdef OPENSUBDIV_HAS_OPENMP } else if (kernel == kOPENMP) { - if (not g_ompComputeController) { - g_ompComputeController = new OpenSubdiv::Osd::OmpComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_ompComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_TBB } else if (kernel == kTBB) { - if (not g_tbbComputeController) { - g_tbbComputeController = new OpenSubdiv::Osd::TbbComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_tbbComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_OPENCL } else if(kernel == kCL) { - if (not g_clComputeController) { - g_clComputeController = new OpenSubdiv::Osd::CLComputeController( - g_clDeviceContext.GetContext(), - g_clDeviceContext.GetCommandQueue()); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_clComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits, &g_clDeviceContext); + // CLKernel + static Osd::EvaluatorCacheT clEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &clEvaluatorCache, + &g_clDeviceContext); #endif #ifdef OPENSUBDIV_HAS_CUDA } else if(kernel == kCUDA) { - if (not g_cudaComputeController) { - g_cudaComputeController = new OpenSubdiv::Osd::CudaComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_cudaComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits); #endif #ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK } else if(kernel == kGLSL) { - if (not g_glslTransformFeedbackComputeController) { - g_glslTransformFeedbackComputeController = new OpenSubdiv::Osd::GLSLTransformFeedbackComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_glslTransformFeedbackComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + static Osd::EvaluatorCacheT glXFBEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &glXFBEvaluatorCache); #endif #ifdef OPENSUBDIV_HAS_GLSL_COMPUTE } else if(kernel == kGLSLCompute) { - if (not g_glslComputeController) { - g_glslComputeController = new OpenSubdiv::Osd::GLSLComputeController(); - } - g_mesh = new OpenSubdiv::Osd::Mesh( - g_glslComputeController, - refiner, - numVertexElements, - numVaryingElements, - level, bits); + static Osd::EvaluatorCacheT glComputeEvaluatorCache; + g_mesh = new Osd::Mesh( + refiner, + numVertexElements, + numVaryingElements, + level, bits, + &glComputeEvaluatorCache); + + #endif } else { printf("Unsupported kernel %s\n", getKernelName(kernel)); @@ -847,35 +814,63 @@ union Effect { } }; -typedef std::pair EffectDesc; +struct EffectDesc { + EffectDesc(OpenSubdiv::Far::PatchDescriptor desc, + Effect effect) : desc(desc), effect(effect), + maxValence(0), numElements(0) { } + + OpenSubdiv::Far::PatchDescriptor desc; + Effect effect; + int maxValence; + int numElements; + + bool operator < (const EffectDesc &e) const { + return desc < e.desc || (desc == e.desc && + (maxValence < e.maxValence || ((maxValence == e.maxValence) && + (effect < e.effect)))); + } +}; class EffectDrawRegistry : public OpenSubdiv::Osd::GLDrawRegistry { protected: virtual ConfigType * - _CreateDrawConfig(DescType const & desc, SourceConfigType const * sconfig); + _CreateDrawConfig(EffectDesc const & desc, SourceConfigType const * sconfig); virtual SourceConfigType * - _CreateDrawSourceConfig(DescType const & desc); + _CreateDrawSourceConfig(EffectDesc const & desc); }; EffectDrawRegistry::SourceConfigType * -EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) +EffectDrawRegistry::_CreateDrawSourceConfig(EffectDesc const & effectDesc) { typedef OpenSubdiv::Far::PatchDescriptor Descriptor; - Effect effect = desc.second; + Effect effect = effectDesc.effect; SourceConfigType * sconfig = - BaseRegistry::_CreateDrawSourceConfig(desc.first); + BaseRegistry::_CreateDrawSourceConfig(effectDesc.desc); assert(sconfig); const std::string glslVersionStr = get_shader_version_include(); const char *glslVersion = glslVersionStr.c_str(); - if (desc.first.GetType() == Descriptor::QUADS or - desc.first.GetType() == Descriptor::TRIANGLES) { + + // legacy gregory patch requires OSD_MAX_VALENCE and OSD_NUM_ELEMENTS defined + if (effectDesc.desc.GetType() == Descriptor::GREGORY or + effectDesc.desc.GetType() == Descriptor::GREGORY_BOUNDARY) { + std::ostringstream ss; + ss << effectDesc.maxValence; + sconfig->commonShader.AddDefine("OSD_MAX_VALENCE", ss.str()); + ss.str(""); + + ss << effectDesc.numElements; + sconfig->commonShader.AddDefine("OSD_NUM_ELEMENTS", ss.str()); + } + + if (effectDesc.desc.GetType() == Descriptor::QUADS or + effectDesc.desc.GetType() == Descriptor::TRIANGLES) { sconfig->vertexShader.source = shaderSource(); sconfig->vertexShader.version = glslVersion; sconfig->vertexShader.AddDefine("VERTEX_SHADER"); @@ -891,12 +886,12 @@ EffectDrawRegistry::_CreateDrawSourceConfig(DescType const & desc) sconfig->fragmentShader.version = glslVersion; sconfig->fragmentShader.AddDefine("FRAGMENT_SHADER"); - if (desc.first.GetType() == Descriptor::QUADS) { + if (effectDesc.desc.GetType() == Descriptor::QUADS) { // uniform catmark, bilinear sconfig->geometryShader.AddDefine("PRIM_QUAD"); sconfig->fragmentShader.AddDefine("PRIM_QUAD"); sconfig->commonShader.AddDefine("UNIFORM_SUBDIVISION"); - } else if (desc.first.GetType() == Descriptor::TRIANGLES) { + } else if (effectDesc.desc.GetType() == Descriptor::TRIANGLES) { // uniform loop sconfig->geometryShader.AddDefine("PRIM_TRI"); sconfig->fragmentShader.AddDefine("PRIM_TRI"); @@ -956,7 +951,7 @@ EffectDrawRegistry::_CreateDrawConfig( DescType const & desc, SourceConfigType const * sconfig) { - ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.first, sconfig); + ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.desc, sconfig); assert(config); GLuint uboIndex; @@ -1042,6 +1037,18 @@ static GLuint bindProgram(Effect effect, OpenSubdiv::Osd::DrawContext::PatchArray const & patch) { EffectDesc effectDesc(patch.GetDescriptor(), effect); + + // only legacy gregory needs maxValence and numElements + int maxValence = g_mesh->GetDrawContext()->GetMaxValence(); + int numElements = (g_displayStyle == kInterleavedVaryingColor ? 7 : 3); + + typedef OpenSubdiv::Far::PatchDescriptor Descriptor; + if (patch.GetDescriptor().GetType() == Descriptor::GREGORY or + patch.GetDescriptor().GetType() == Descriptor::GREGORY_BOUNDARY) { + effectDesc.maxValence = maxValence; + effectDesc.numElements = numElements; + } + EffectDrawRegistry::ConfigType * config = effectRegistry.GetDrawConfig(effectDesc); @@ -1204,7 +1211,7 @@ display() { for (int i=0; i<(int)patches.size(); ++i) { OpenSubdiv::Osd::DrawContext::PatchArray const & patch = patches[i]; - OpenSubdiv::Osd::DrawContext::PatchDescriptor desc = patch.GetDescriptor(); + OpenSubdiv::Far::PatchDescriptor desc = patch.GetDescriptor(); OpenSubdiv::Far::PatchDescriptor::Type patchType = desc.GetType(); patchCount[patchType] += patch.GetNumPatches(); @@ -1320,10 +1327,6 @@ display() { patchCount[Descriptor::QUADS]); y += 20; g_hud.DrawString(x, y, "Regular : %d", patchCount[Descriptor::REGULAR]); y+= 20; - g_hud.DrawString(x, y, "Boundary : %d", - patchCount[Descriptor::BOUNDARY]); y+= 20; - g_hud.DrawString(x, y, "Corner : %d", - patchCount[Descriptor::CORNER]); y+= 20; g_hud.DrawString(x, y, "Gregory : %d", patchCount[Descriptor::GREGORY]); y+= 20; g_hud.DrawString(x, y, "Boundary Gregory : %d", @@ -1410,28 +1413,6 @@ uninitGL() { if (g_mesh) delete g_mesh; - - delete g_cpuComputeController; - -#ifdef OPENSUBDIV_HAS_OPENMP - delete g_ompComputeController; -#endif - -#ifdef OPENSUBDIV_HAS_TBB - delete g_tbbComputeController; -#endif -#ifdef OPENSUBDIV_HAS_OPENCL - delete g_clComputeController; -#endif -#ifdef OPENSUBDIV_HAS_CUDA - delete g_cudaComputeController; -#endif -#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK - delete g_glslTransformFeedbackComputeController; -#endif -#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE - delete g_glslComputeController; -#endif } //------------------------------------------------------------------------------ diff --git a/examples/glViewer/shader.glsl b/examples/glViewer/shader.glsl index 80fb805c..83fd90fa 100644 --- a/examples/glViewer/shader.glsl +++ b/examples/glViewer/shader.glsl @@ -388,7 +388,7 @@ edgeColor(vec4 Cfill, vec4 edgeDistance) } vec4 -getAdaptivePatchColor(int patchParam) +getAdaptivePatchColor(ivec3 patchParam) { const vec4 patchColors[7*6] = vec4[7*6]( vec4(1.0f, 1.0f, 1.0f, 1.0f), // regular @@ -450,7 +450,7 @@ getAdaptivePatchColor(int patchParam) patchType = 6; #endif - int edgeCount = bitCount((patchParam >> 4) & 0xf); + int edgeCount = bitCount(OsdGetPatchBoundaryMask(patchParam)); if (edgeCount == 1) { patchType = 2; // BOUNDARY } @@ -458,7 +458,7 @@ getAdaptivePatchColor(int patchParam) patchType = 3; // CORNER } - int pattern = bitCount((patchParam >> 8) & 0xf); + int pattern = bitCount(OsdGetPatchTransitionMask(patchParam)); #ifdef OSD_PATCH_ENABLE_SINGLE_CREASE if (inpt.sharpness > 0) pattern += 6; #endif @@ -480,7 +480,7 @@ main() int(floor(20*inpt.color.r)+floor(20*inpt.color.g))&1, 1); #else //vec4 color = diffuseColor; - vec4 color = getAdaptivePatchColor(GetPatchParam()); + vec4 color = getAdaptivePatchColor(OsdGetPatchParam(OsdGetPatchIndex(gl_PrimitiveID))); #endif vec4 Cf = lighting(color, inpt.v.position.xyz, N); diff --git a/examples/mayaPolySmooth/mayaPolySmooth.cpp b/examples/mayaPolySmooth/mayaPolySmooth.cpp index 1b90c98c..97a2712a 100644 --- a/examples/mayaPolySmooth/mayaPolySmooth.cpp +++ b/examples/mayaPolySmooth/mayaPolySmooth.cpp @@ -64,8 +64,6 @@ #include #include -#include -#include #include diff --git a/examples/vtrViewer/gl_mesh.cpp b/examples/vtrViewer/gl_mesh.cpp index 0e6ed31b..e5c6cf50 100644 --- a/examples/vtrViewer/gl_mesh.cpp +++ b/examples/vtrViewer/gl_mesh.cpp @@ -708,16 +708,6 @@ GLMesh::initializeBuffers(Options options, TopologyRefiner const & refiner, eao[face*4+1] = cvs[ 6]; eao[face*4+2] = cvs[10]; eao[face*4+3] = cvs[ 9]; - } else if (desc.GetType()==Descriptor::BOUNDARY) { - eao[face*4 ] = cvs[ 2]; - eao[face*4+1] = cvs[ 6]; - eao[face*4+2] = cvs[ 5]; - eao[face*4+3] = cvs[ 1]; - } else if (desc.GetType()==Descriptor::CORNER) { - eao[face*4 ] = cvs[ 1]; - eao[face*4+1] = cvs[ 2]; - eao[face*4+2] = cvs[ 5]; - eao[face*4+3] = cvs[ 4]; } else { memcpy(&eao[face*4], cvs.begin(), 4*sizeof(OpenSubdiv::Far::Index)); } diff --git a/examples/vtrViewer/gl_mesh.h b/examples/vtrViewer/gl_mesh.h index c9fc3fa5..a30fec60 100644 --- a/examples/vtrViewer/gl_mesh.h +++ b/examples/vtrViewer/gl_mesh.h @@ -28,7 +28,6 @@ #include #include #include -#include #include "../common/gl_common.h" diff --git a/examples/vtrViewer/hbr_refine.cpp b/examples/vtrViewer/hbr_refine.cpp index c5704a7c..c6168386 100644 --- a/examples/vtrViewer/hbr_refine.cpp +++ b/examples/vtrViewer/hbr_refine.cpp @@ -455,8 +455,6 @@ private: template struct PatchTypes { TYPE R, // regular patch - B, // boundary patch (4 rotations) - C, // corner patch (4 rotations) G, // gregory patch GB, // gregory boundary patch GP; // gregory basis @@ -484,8 +482,6 @@ Far::PatchTablesFactory::PatchTypes::getValue( Far::PatchDescriptor desc ) switch (desc.GetType()) { case Far::PatchDescriptor::REGULAR : return R; - case Far::PatchDescriptor::BOUNDARY : return B; - case Far::PatchDescriptor::CORNER : return C; case Far::PatchDescriptor::GREGORY : return G; case Far::PatchDescriptor::GREGORY_BOUNDARY : return GB; case Far::PatchDescriptor::GREGORY_BASIS : return GP; @@ -500,10 +496,9 @@ Far::PatchTablesFactory::PatchTypes::getNumPatchArrays() const { int result=0; if (R) ++result; - if (B) ++result; - if (C) ++result; if (G) ++result; if (GB) ++result; + if (GP) ++result; return result; } @@ -690,12 +685,12 @@ Far::PatchTablesFactory::Create(Hmesh & mesh, int maxvalence) { case 2 : { // Boundary patch f->_adaptiveFlags.rots=computeBoundaryPatchRotation(f); - patchCtr.B++; + patchCtr.R++; } break; case 3 : { // Corner patch f->_adaptiveFlags.rots=computeCornerPatchRotation(f); - patchCtr.C++; + patchCtr.R++; } break; default : break; @@ -784,7 +779,7 @@ Far::PatchTablesFactory::Create(Hmesh & mesh, int maxvalence) { f->_adaptiveFlags.rots=rot; // override the transition rotation - patchCtr.B++; + patchCtr.R++; } break; case 3 : { // corner patch @@ -794,7 +789,7 @@ Far::PatchTablesFactory::Create(Hmesh & mesh, int maxvalence) { f->_adaptiveFlags.rots=rot; // override the transition rotation - patchCtr.C++; + patchCtr.R++; } break; default : assert(0); break; @@ -808,8 +803,8 @@ Far::PatchTablesFactory::Create(Hmesh & mesh, int maxvalence) { static const Far::Index remapRegular [16] = {5,6,10,9,4,0,1,2,3,7,11,15,14,13,12,8}; - static const Far::Index remapRegularBoundary[12] = {1,2,6,5,0,3,7,11,10,9,8,4}; - static const Far::Index remapRegularCorner [ 9] = {1,2,5,4,0,8,7,6,3}; + static const Far::Index remapRegularBoundary[16] = {5,6,10,9,4,7,11,15,14,13,12,8,0,1,2,3}; + static const Far::Index remapRegularCorner [16] = {5,6,10,9,7,11,15,14,13,12,8,4,0,1,2,3}; int fvarwidth=0; @@ -878,16 +873,16 @@ Far::PatchTablesFactory::Create(Hmesh & mesh, int maxvalence) { case 2 : { // Boundary Patch (12 CVs) f->_adaptiveFlags.brots = (f->_adaptiveFlags.rots+1)%4; - iptrs.B = getOneRing(f, 12, remapRegularBoundary, iptrs.B); - pptrs.B = computePatchParam(f, pptrs.B); - //fptrs.B[pattern][rot] = computeFVarData(f, fvarwidth, fptrs.B[0][0], /*isAdaptive=*/true); + iptrs.R = getOneRing(f, 12, remapRegularBoundary, iptrs.R); + pptrs.R = computePatchParam(f, pptrs.R); + //fptrs.R[pattern][rot] = computeFVarData(f, fvarwidth, fptrs.R[0][0], /*isAdaptive=*/true); } break; case 3 : { // Corner Patch (9 CVs) f->_adaptiveFlags.brots = (f->_adaptiveFlags.rots+1)%4; - iptrs.C = getOneRing(f, 9, remapRegularCorner, iptrs.C); - pptrs.C = computePatchParam(f, pptrs.C); - //fptrs.C[pattern][rot] = computeFVarData(f, fvarwidth, fptrs.C[0][0], /*isAdaptive=*/true); + iptrs.R = getOneRing(f, 9, remapRegularCorner, iptrs.R); + pptrs.R = computePatchParam(f, pptrs.R); + //fptrs.R[pattern][rot] = computeFVarData(f, fvarwidth, fptrs.R[0][0], /*isAdaptive=*/true); } break; default : assert(0); @@ -938,16 +933,16 @@ Far::PatchTablesFactory::Create(Hmesh & mesh, int maxvalence) { case 2 : { // Boundary Transition Patch (12 CVs) //unsigned rot = f->_adaptiveFlags.brots; - iptrs.B = getOneRing(f, 12, remapRegularBoundary, iptrs.B); - pptrs.B = computePatchParam(f, pptrs.B); - //fptrs.B[pattern][rot] = computeFVarData(f, fvarwidth, fptrs.B[pattern][rot], /*isAdaptive=*/true); + iptrs.R = getOneRing(f, 12, remapRegularBoundary, iptrs.R); + pptrs.R = computePatchParam(f, pptrs.R); + //fptrs.R[pattern][rot] = computeFVarData(f, fvarwidth, fptrs.R[pattern][rot], /*isAdaptive=*/true); } break; case 3 : { // Corner Transition Patch (9 CVs) //unsigned rot = f->_adaptiveFlags.brots; - iptrs.C = getOneRing(f, 9, remapRegularCorner, iptrs.C); - pptrs.C = computePatchParam(f, pptrs.C); - //fptrs.C[pattern][rot] = computeFVarData(f, fvarwidth, fptrs.C[pattern][rot], /*isAdaptive=*/true); + iptrs.R = getOneRing(f, 9, remapRegularCorner, iptrs.R); + pptrs.R = computePatchParam(f, pptrs.R); + //fptrs.R[pattern][rot] = computeFVarData(f, fvarwidth, fptrs.R[pattern][rot], /*isAdaptive=*/true); } break; } } else @@ -1106,10 +1101,10 @@ Far::PatchTablesFactory::getOneRing(Hface const * f, // Boundary case // - // 4 0 3 5 + // 4 0 1 5 // ---- o ---- o ---- o ---- o ---- // | | | | - // | 11 | 1 | 2 | 6 + // | 11 | 3 | 2 | 6 // ---- o ---- o ---- o ---- o ---- // | | | | // | 10 | 9 | 8 | 7 @@ -1141,7 +1136,10 @@ Far::PatchTablesFactory::getOneRing(Hface const * f, result[remap[idx++ % ringsize]] = e->GetOrgVertex()->GetID(); } - result += 12; + for (int i=12; i<16; ++i) { + result[remap[idx++]] = result[0]; + } + result += 16; } else if (ringsize==9) { @@ -1176,10 +1174,13 @@ Far::PatchTablesFactory::getOneRing(Hface const * f, result[remap[idx++ % ringsize]] = e->GetOrgVertex()->GetID(); } - result += 9; + for (int i=9; i<16; ++i) { + result[remap[idx++]] = result[0]; + } + result += 16; } - assert(idx==ringsize); + assert(idx==16); return result; } diff --git a/examples/vtrViewer/vtrViewer.cpp b/examples/vtrViewer/vtrViewer.cpp index 4875b116..a99eaa77 100644 --- a/examples/vtrViewer/vtrViewer.cpp +++ b/examples/vtrViewer/vtrViewer.cpp @@ -46,7 +46,6 @@ GLFWmonitor* g_primary=0; #define snprintf _snprintf #endif -#include #include #include @@ -710,8 +709,6 @@ createPtexNumbers(OpenSubdiv::Far::PatchTables const & patchTables, static char buf[16]; static int regular[4] = {5, 6, 9, 10}, - boundary[4] = {1, 2, 5, 6}, - corner[4] = {1, 2, 4, 5}, gregory[4] = {0, 1, 2, 3}; for (int array=0; array<(int)patchTables.GetNumPatchArrays(); ++array) { @@ -724,8 +721,6 @@ createPtexNumbers(OpenSubdiv::Far::PatchTables const & patchTables, int * remap = 0; switch (patchTables.GetPatchArrayDescriptor(array).GetType()) { case Descriptor::REGULAR: remap = regular; break; - case Descriptor::BOUNDARY: remap = boundary; break; - case Descriptor::CORNER: remap = corner; break; case Descriptor::GREGORY: case Descriptor::GREGORY_BOUNDARY: case Descriptor::GREGORY_BASIS: remap = gregory; break; diff --git a/opensubdiv/far/interpolate.cpp b/opensubdiv/far/interpolate.cpp index be1e5c3a..c5c4d07c 100644 --- a/opensubdiv/far/interpolate.cpp +++ b/opensubdiv/far/interpolate.cpp @@ -53,66 +53,59 @@ public: // patch weights static void GetPatchWeights(PatchParam::BitField bits, float s, float t, float point[], float deriv1[], float deriv2[]); + + // adjust patch weights for boundary (and corner) edges + static void AdjustBoundaryWeights(PatchParam::BitField bits, + float sWeights[4], float tWeights[4]); }; template <> inline void Spline::GetWeights( - float t, float point[4], float deriv[3]) { + float t, float point[4], float deriv[4]) { - // The weights for the four uniform cubic Bezier basis functions are: - // (1 - t)^3 - // 3 * t * (1-t) - // 3 * t^2 * (1-t) - // t^3 - float t2 = t*t, - w0 = 1.0f - t, - w2 = w0 * w0; + // The four uniform cubic Bezier basis functions (in terms of t and its + // complement tC) evaluated at t: + float t2 = t*t; + float tC = 1.0f - t; + float tC2 = tC * tC; assert(point); - point[0] = w0*w2; - point[1] = 3.0f * t * w2; - point[2] = 3.0f * t2 * w0; - point[3] = t * t2; + point[0] = tC2 * tC; + point[1] = tC2 * t * 3.0f; + point[2] = t2 * tC * 3.0f; + point[3] = t2 * t; - // The weights for the three uniform quadratic basis functions are: - // (1-t)^2 - // 2 * t * (1-t) - // t^2 + // Derivatives of the above four basis functions at t: if (deriv) { - deriv[0] = w2; - deriv[1] = 2.0f * t * w0; - deriv[2] = t2; + deriv[0] = -3.0f * tC2; + deriv[1] = 9.0f * t2 - 12.0f * t + 3.0f; + deriv[2] = -9.0f * t2 + 6.0f * t; + deriv[3] = 3.0f * t2; } } template <> inline void Spline::GetWeights( - float t, float point[4], float deriv[3]) { + float t, float point[4], float deriv[4]) { - // The weights for the four uniform cubic B-Spline basis functions are: - // (1/6)(1 - t)^3 - // (1/6)(3t^3 - 6t^2 + 4) - // (1/6)(-3t^3 + 3t^2 + 3t + 1) - // (1/6)t^3 - float t2 = t*t, - t3 = 3.0f*t2*t, - w0 = 1.0f-t; + // The four uniform cubic B-Spline basis functions evaluated at t: + float const one6th = 1.0f / 6.0f; + + float t2 = t * t; + float t3 = t * t2; assert(point); - point[0] = (w0*w0*w0) / 6.0f; - point[1] = (t3 - 6.0f*t2 + 4.0f) / 6.0f; - point[2] = (3.0f*t2 - t3 + 3.0f*t + 1.0f) / 6.0f; - point[3] = t3 / 18.0f; + point[0] = one6th * (1.0f - 3.0f*(t - t2) - t3); + point[1] = one6th * (4.0f - 6.0f*t2 + 3.0f*t3); + point[2] = one6th * (1.0f + 3.0f*(t + t2 - t3)); + point[3] = one6th * ( t3); - - // The weights for the three uniform quadratic basis functions are: - // (1/2)(1-t)^2 - // (1/2)(1 + 2t - 2t^2) - // (1/2)t^2 + // Derivatives of the above four basis functions at t: if (deriv) { - deriv[0] = 0.5f * w0 * w0; - deriv[1] = 0.5f + t - t2; - deriv[2] = 0.5f * t2; + deriv[0] = -0.5f*t2 + t - 0.5f; + deriv[1] = 1.5f*t2 - 2.0f*t; + deriv[2] = -1.5f*t2 + t + 0.5f; + deriv[3] = 0.5f*t2; } } @@ -173,119 +166,99 @@ template <> inline void Spline::GetPatchWeights(PatchParam::BitField bits, float s, float t, float point[4], float deriv1[4], float deriv2[4]) { - static int const rots[4][4] = - { { 0, 1, 2, 3 }, - { 3, 0, 1, 2 }, - { 2, 3, 0, 1 }, - { 1, 2, 3, 0 } }; - - assert(bits.GetRotation()<4); - int const * rot = rots[bits.GetRotation()]; - bits.Normalize(s,t); float os = 1.0f - s, ot = 1.0f - t; if (point) { - point[rot[0]] = os*ot; - point[rot[1]] = s*ot; - point[rot[2]] = s*t; - point[rot[3]] = os*t; + point[0] = os*ot; + point[1] = s*ot; + point[2] = s*t; + point[3] = os*t; } if (deriv1 and deriv2) { - deriv1[rot[0]] = t-1.0f; - deriv1[rot[1]] = ot; - deriv1[rot[2]] = t; - deriv1[rot[3]] = -t; + deriv1[0] = t-1.0f; + deriv1[1] = ot; + deriv1[2] = t; + deriv1[3] = -t; - deriv2[rot[0]] = s-1.0f; - deriv2[rot[1]] = -s; - deriv2[rot[2]] = s; - deriv2[rot[3]] = os; + deriv2[0] = s-1.0f; + deriv2[1] = -s; + deriv2[2] = s; + deriv2[3] = os; + } +} + +template +void Spline::AdjustBoundaryWeights(PatchParam::BitField bits, + float sWeights[4], float tWeights[4]) { + + int boundary = bits.GetBoundary(); + + if (boundary & 1) { + tWeights[2] -= tWeights[0]; + tWeights[1] += 2*tWeights[0]; + tWeights[0] = 0; + } + if (boundary & 2) { + sWeights[1] -= sWeights[3]; + sWeights[2] += 2*sWeights[3]; + sWeights[3] = 0; + } + if (boundary & 4) { + tWeights[1] -= tWeights[3]; + tWeights[2] += 2*tWeights[3]; + tWeights[3] = 0; + } + if (boundary & 8) { + sWeights[2] -= sWeights[0]; + sWeights[1] += 2*sWeights[0]; + sWeights[0] = 0; } } template void Spline::GetPatchWeights(PatchParam::BitField bits, - float s, float t, float point[16], float deriv1[16], float deriv2[16]) { + float s, float t, float point[16], float derivS[16], float derivT[16]) { - static int const rots[4][16] = - { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 12, 8, 4, 0, 13, 9, 5, 1, 14, 10, 6, 2, 15, 11, 7, 3 }, - { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }, - { 3, 7, 11, 15, 2, 6, 10, 14, 1, 5, 9, 13, 0, 4, 8, 12 } }; - - assert(bits.GetRotation()<4); - int const * rot = rots[bits.GetRotation()]; - - float sWeights[4], tWeights[4], d1Weights[3], d2Weights[3]; + float sWeights[4], tWeights[4], dsWeights[4], dtWeights[4]; bits.Normalize(s,t); - Spline::GetWeights(s, point ? sWeights : 0, deriv1 ? d1Weights : 0); - Spline::GetWeights(t, point ? tWeights : 0, deriv2 ? d2Weights : 0); + Spline::GetWeights(s, point ? sWeights : 0, derivS ? dsWeights : 0); + Spline::GetWeights(t, point ? tWeights : 0, derivT ? dtWeights : 0); + + int boundary = bits.GetBoundary(); if (point) { - // Compute the tensor product weight corresponding to each control - // vertex - memset(point, 0, 16*sizeof(float)); + // Compute the tensor product weight of the (s,t) basis function + // corresponding to each control vertex: + + AdjustBoundaryWeights(bits, sWeights, tWeights); + for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { - point[rot[4*i+j]] += sWeights[j] * tWeights[i]; + point[4*i+j] = sWeights[j] * tWeights[i]; } } } - if (deriv1 and deriv2) { - // Compute the tangent stencil. This is done by taking the tensor - // product between the quadratic weights computed for s and the cubic - // weights computed for t. The stencil is constructed using - // differences between consecutive vertices in each row (i.e. - // in the s direction). - memset(deriv1, 0, 16*sizeof(float)); - for (int i = 0, k = 0; i < 4; ++i) { - float prevWeight = 0.0f; - for (int j = 0; j < 3; ++j) { - float weight = d1Weights[j]*tWeights[i]; - deriv1[rot[k++]] += prevWeight - weight; - prevWeight = weight; - } - deriv1[rot[k++]]+=prevWeight; - } + if (derivS and derivT) { + // Compute the tensor product weight of the differentiated (s,t) basis + // function corresponding to each control vertex (scaled accordingly): - memset(deriv2, 0, 16*sizeof(float)); -#define FASTER_TENSOR -#ifdef FASTER_TENSOR - // XXXX manuelk this might be slightly more efficient ? - float dW[4]; - dW[0] = - d2Weights[0]; - dW[1] = d2Weights[0] - d2Weights[1]; - dW[2] = d2Weights[1] - d2Weights[2]; - dW[3] = d2Weights[2]; - for (int i = 0, k = 0; i < 4; ++i) { + float dScale = (float)(1 << bits.GetDepth()); + + AdjustBoundaryWeights(bits, dsWeights, dtWeights); + + for (int i = 0; i < 4; ++i) { for (int j = 0; j < 4; ++j) { - deriv2[rot[k++]] = sWeights[j] * dW[i]; + derivS[4*i+j] = dsWeights[j] * tWeights[i] * dScale; + derivT[4*i+j] = sWeights[j] * dtWeights[i] * dScale; } } -#else - for (int j = 0; j < 4; ++j) { - float prevWeight = 0.0f; - for (int i = 0; i < 3; ++i) { - float weight = sWeights[j]*d2Weights[i]; - deriv2[rot[4*i+j]]+=prevWeight - weight; - prevWeight = weight; - } - deriv2[rot[12+j]] += prevWeight; - } -#endif - // Scale derivatives up based on level of subdivision - float scale = float(1 << bits.GetDepth()); - for (int k=0; k<16; ++k) { - deriv1[k] *= scale; - deriv2[k] *= scale; - } } } @@ -307,6 +280,96 @@ void GetBSplineWeights(PatchParam::BitField bits, Spline::GetPatchWeights(bits, s, t, point, deriv1, deriv2); } +void GetGregoryWeights(PatchParam::BitField bits, + float s, float t, float point[20], float deriv1[20], float deriv2[20]) { + + // + // P3 e3- e2+ P2 + // 15------17-------11--------10 + // | | | | + // | | | | + // | | f3- | f2+ | + // | 19 13 | + // e3+ 16-----18 14-----12 e2- + // | f3+ f2- | + // | | + // | | + // | f0- f1+ | + // e0- 2------4 8------6 e1+ + // | 3 9 | + // | | f0+ | f1- | + // | | | | + // | | | | + // O--------1--------7--------5 + // P0 e0+ e1- P1 + // + + // Indices of boundary and interior points and their corresponding Bezier points: + // + static int const boundaryGregory[12] = { 0, 1, 7, 5, 2, 6, 16, 12, 15, 17, 11, 10 }; + static int const boundaryBezier[12] = { 0, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 15 }; + + static int const interiorGregory[8] = { 3, 4, 8, 9, 13, 14, 18, 19 }; + static int const interiorBezier[8] = { 5, 5, 6, 6, 10, 10, 9, 9 }; + + // Rational multipliers of the Bezier basis functions (note we need a set of weights + // for each derivative for proper differentiation -- TBD): + // + float sComp = 1.0f - s; + float tComp = 1.0f - t; + + // Use <= here to avoid compiler warnings -- the sums should always be non-negative: + float df0 = s + t; if (df0 <= 0.0f) df0 = 1.0f; + float df1 = sComp + t; if (df1 <= 0.0f) df1 = 1.0f; + float df2 = sComp + tComp; if (df2 <= 0.0f) df2 = 1.0f; + float df3 = s + tComp; if (df3 <= 0.0f) df3 = 1.0f; + + float interiorPointBasis[8] = { s/df0, t/df0, + t/df1, sComp/df1, + sComp/df2, tComp/df2, + tComp/df3, s/df3 }; + + // Weights from a bicubic Bezier patch: + // + float bezierPoint[16], bezierDeriv1[16], bezierDeriv2[16]; + + GetBezierWeights(bits, s, t, bezierPoint, bezierDeriv1, bezierDeriv2); + + // Copy basis functions (weights) for boundary points and scale the interior basis + // functions by their rational multipliers: + // + for (int i = 0; i < 12; ++i) { + point[boundaryGregory[i]] = bezierPoint[boundaryBezier[i]]; + } + for (int i = 0; i < 8; ++i) { + point[interiorGregory[i]] = bezierPoint[interiorBezier[i]] * interiorPointBasis[i]; + } + + if (deriv1 and deriv2) { + // Copy the differentiated basis functions for Bezier to the boundary points: + // + for (int i = 0; i < 12; ++i) { + deriv1[boundaryGregory[i]] = bezierDeriv1[boundaryBezier[i]]; + deriv2[boundaryGregory[i]] = bezierDeriv2[boundaryBezier[i]]; + } + + // XXX barry: NOTE -- THIS IS NOT CORRECT (the correction is pending)... + // + // The basis functions for the interior points are rational and require appropriate + // differentiation wrt each parametric direction. So we will need the Bezier basis + // functions in each of s and t rather than for (s,t) combined. The correction will + // also need to support higher order derivatives and so will be combined with that + // extension. + // + // What follows preserves what has been done with Gregory derivatives to this point. + // + for (int i = 0; i < 8; ++i) { + deriv1[interiorGregory[i]] = bezierDeriv1[interiorBezier[i]] * interiorPointBasis[i]; + deriv2[interiorGregory[i]] = bezierDeriv2[interiorBezier[i]] * interiorPointBasis[i]; + } + } +} + } // end namespace Far } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/far/interpolate.h b/opensubdiv/far/interpolate.h index d3dc420a..ea52f25c 100644 --- a/opensubdiv/far/interpolate.h +++ b/opensubdiv/far/interpolate.h @@ -52,6 +52,9 @@ void GetBezierWeights(PatchParam::BitField bits, void GetBSplineWeights(PatchParam::BitField bits, float s, float t, float point[16], float deriv1[16], float deriv2[16]); +void GetGregoryWeights(PatchParam::BitField bits, + float s, float t, float point[20], float deriv1[20], float deriv2[20]); + /// \brief Interpolate the (s,t) parametric location of a bilinear (quad) /// patch diff --git a/opensubdiv/far/patchDescriptor.cpp b/opensubdiv/far/patchDescriptor.cpp index 3e5826a6..e37418da 100644 --- a/opensubdiv/far/patchDescriptor.cpp +++ b/opensubdiv/far/patchDescriptor.cpp @@ -47,11 +47,7 @@ PatchDescriptor::GetAdaptivePatchDescriptors(Sdc::SchemeType type) { }; static PatchDescriptor _catmarkDescriptors[] = { - - // XXXdyu-patch-drawing PatchDescriptor(REGULAR), - PatchDescriptor(BOUNDARY), - PatchDescriptor(CORNER), PatchDescriptor(GREGORY), PatchDescriptor(GREGORY_BOUNDARY), PatchDescriptor(GREGORY_BASIS), @@ -75,9 +71,8 @@ PatchDescriptor::GetAdaptivePatchDescriptors(Sdc::SchemeType type) { void PatchDescriptor::print() const { static char const * types[13] = { - "NON_PATCH", "POINTS", "LINES", "QUADS", "TRIANGLES", "LOOP", "REGULAR", - "SINGLE_CREASE", "BOUNDARY", "CORNER", "GREGORY", - "GREGORY_BOUNDARY", "GREGORY_BASIS" }; + "NON_PATCH", "POINTS", "LINES", "QUADS", "TRIANGLES", "LOOP", + "REGULAR", "GREGORY", "GREGORY_BOUNDARY", "GREGORY_BASIS" }; printf(" type %s\n", types[_type]); diff --git a/opensubdiv/far/patchDescriptor.h b/opensubdiv/far/patchDescriptor.h index b404af46..27e08aaa 100644 --- a/opensubdiv/far/patchDescriptor.h +++ b/opensubdiv/far/patchDescriptor.h @@ -45,7 +45,7 @@ namespace Far { /// or TRIANGLES /// /// * Adaptively subdivided meshes contain bicubic patches of types REGULAR, -/// BOUNDARY, CORNER, GREGORY, GREGORY_BOUNDARY, GREGOYR_BASIS. +/// GREGORY, GREGORY_BOUNDARY, GREGORY_BASIS. /// These bicubic patches are also further distinguished by a transition /// pattern as well as a rotational orientation. /// @@ -71,8 +71,6 @@ public: LOOP, ///< Loop patch REGULAR, ///< feature-adaptive bicubic patches - BOUNDARY, - CORNER, GREGORY, GREGORY_BOUNDARY, GREGORY_BASIS @@ -128,12 +126,6 @@ public: /// \brief Number of control vertices of Regular Patches in table. static short GetRegularPatchSize() { return 16; } - /// \brief Number of control vertices of Boundary Patches in table. - static short GetBoundaryPatchSize() { return 16; } - - /// \brief Number of control vertices of Boundary Patches in table. - static short GetCornerPatchSize() { return 16; } - /// \brief Number of control vertices of Gregory (and Gregory Boundary) Patches in table. static short GetGregoryPatchSize() { return 4; } @@ -169,8 +161,6 @@ PatchDescriptor::GetNumControlVertices( Type type ) { case GREGORY : case GREGORY_BOUNDARY : return GetGregoryPatchSize(); case GREGORY_BASIS : return GetGregoryBasisPatchSize(); - case BOUNDARY : return GetBoundaryPatchSize(); - case CORNER : return GetCornerPatchSize(); case TRIANGLES : return 3; case LINES : return 2; case POINTS : return 1; @@ -185,8 +175,6 @@ PatchDescriptor::GetNumFVarControlVertices( Type type ) { case REGULAR : return GetRegularPatchSize(); case QUADS : return 4; case TRIANGLES : return 3; - case BOUNDARY : return GetBoundaryPatchSize(); - case CORNER : return GetCornerPatchSize(); case LINES : return 2; case POINTS : return 1; case GREGORY_BASIS : assert(0); return GetGregoryBasisPatchSize(); diff --git a/opensubdiv/far/patchParam.h b/opensubdiv/far/patchParam.h index 5261c76a..f6f671dc 100644 --- a/opensubdiv/far/patchParam.h +++ b/opensubdiv/far/patchParam.h @@ -90,9 +90,6 @@ struct PatchParam { /// \brief Returns the boundary edge encoding for the patch. unsigned short GetBoundary() const { return (unsigned short)((field >> 4) & 0xf); } - /// \brief Deprecated XXXdyu-patch-drawing (patches rotated when gathered from refiner) - unsigned char GetRotation() const { return 0; } - /// \brief True if the parent coarse face is a non-quad bool NonQuadRoot() const { return (field >> 3) & 0x1; } diff --git a/opensubdiv/far/patchTables.h b/opensubdiv/far/patchTables.h index ab2cfa24..e1996f78 100644 --- a/opensubdiv/far/patchTables.h +++ b/opensubdiv/far/patchTables.h @@ -470,14 +470,10 @@ PatchTables::Evaluate(PatchHandle const & handle, float s, float t, if (ptype==PatchDescriptor::REGULAR) { - GetBSplineWeights(bits, s, t, Q, Qd1, Qd2); - ConstIndexArray cvs = GetPatchVertices(handle); + GetBSplineWeights(bits, s, t, Q, Qd1, Qd2); InterpolateRegularPatch(cvs.begin(), Q, Qd1, Qd2, src, dst); - // XXXdyu bits InterpolateBoundaryPatch(cvs.begin(), Q, Qd1, Qd2, src, dst); - // XXXdyu bits InterpolateCornerPatch(cvs.begin(), Q, Qd1, Qd2, src, dst); - } else if (ptype==PatchDescriptor::GREGORY_BASIS) { @@ -526,8 +522,6 @@ PatchTables::EvaluateFaceVarying(int channel, PatchHandle const & handle, case PatchDescriptor::REGULAR: GetBSplineWeights(bits, s, t, Q, Qd1, Qd2); InterpolateRegularPatch(cvs.begin(), Q, Qd1, Qd2, src, dst); - // XXXdyu bits InterpolateBoundaryPatch(cvs.begin(), Q, Qd1, Qd2, src, dst); - // XXXdyu bits InterpolateCornerPatch(cvs.begin(), Q, Qd1, Qd2, src, dst); break; default: assert(0); diff --git a/opensubdiv/far/patchTablesFactory.cpp b/opensubdiv/far/patchTablesFactory.cpp index cfae13b0..d7bc642a 100644 --- a/opensubdiv/far/patchTablesFactory.cpp +++ b/opensubdiv/far/patchTablesFactory.cpp @@ -47,8 +47,6 @@ struct PatchTypes { TYPE R, // regular patch - B, // boundary patch (4 rotations) - C, // corner patch (4 rotations) G, // gregory patch GB, // gregory boundary patch GP; // gregory basis patch @@ -59,8 +57,6 @@ struct PatchTypes { TYPE & getValue( Far::PatchDescriptor desc ) { switch (desc.GetType()) { case Far::PatchDescriptor::REGULAR : return R; - case Far::PatchDescriptor::BOUNDARY : return B; - case Far::PatchDescriptor::CORNER : return C; case Far::PatchDescriptor::GREGORY : return G; case Far::PatchDescriptor::GREGORY_BOUNDARY : return GB; case Far::PatchDescriptor::GREGORY_BASIS : return GP; @@ -75,8 +71,6 @@ struct PatchTypes { int getNumPatchArrays() const { int result=0; if (R) ++result; - if (B) ++result; - if (C) ++result; if (G) ++result; if (GB) ++result; if (GP) ++result; @@ -573,10 +567,6 @@ PatchTablesFactory::gatherFVarData(AdaptiveContext & context, int level, // compute the 20 cvs basis) fvarPatchType = context.options.useFVarQuadEndCaps ? PatchDescriptor::QUADS : PatchDescriptor::GREGORY_BASIS; - } else if (fvarPatchTag._boundaryCount > 1) { - fvarPatchType = PatchDescriptor::CORNER; - } else if (fvarPatchTag._boundaryCount == 1) { - fvarPatchType = PatchDescriptor::BOUNDARY; } else if (fvarPatchTag._isSingleCrease) { fvarPatchType = PatchDescriptor::REGULAR; } @@ -596,17 +586,31 @@ PatchTablesFactory::gatherFVarData(AdaptiveContext & context, int level, // revisited... int orientationIndex = fvarPatchTag._boundaryIndex; if (fvarPatchType == PatchDescriptor::REGULAR) { - static int const permuteRegular[16] = { 5, 6, 7, 8, 4, 0, 1, 9, 15, 3, 2, 10, 14, 13, 12, 11 }; - vtxLevel.gatherQuadRegularInteriorPatchPoints(faceIndex, patchVerts, orientationIndex, *fvc); - permutation = permuteRegular; - } else if (fvarPatchType == PatchDescriptor::CORNER) { - static int const permuteCorner[9] = { 8, 3, 0, 7, 2, 1, 6, 5, 4 }; - vtxLevel.gatherQuadRegularCornerPatchPoints(faceIndex, patchVerts, orientationIndex, *fvc); - permutation = permuteCorner; - } else if (fvarPatchType == PatchDescriptor::BOUNDARY) { - static int const permuteBoundary[12] = { 11, 3, 0, 4, 10, 2, 1, 5, 9, 8, 7, 6 }; - vtxLevel.gatherQuadRegularBoundaryPatchPoints(faceIndex, patchVerts, orientationIndex, *fvc); - permutation = permuteBoundary; + if (fvarPatchTag._boundaryCount == 0) { + static int const permuteRegular[16] = { 5, 6, 7, 8, 4, 0, 1, 9, 15, 3, 2, 10, 14, 13, 12, 11 }; + permutation = permuteRegular; + vtxLevel.gatherQuadRegularInteriorPatchPoints(faceIndex, patchVerts, orientationIndex, *fvc); + } else if (fvarPatchTag._boundaryCount == 1) { + // Expand boundary patch vertices and rotate to restore correct orientation. + static int const permuteBoundary[4][16] = { + { -1, -1, -1, -1, 11, 3, 0, 4, 10, 2, 1, 5, 9, 8, 7, 6 }, + { 9, 10, 11, -1, 8, 2, 3, -1, 7, 1, 0, -1, 6, 5, 4, -1 }, + { 6, 7, 8, 9, 5, 1, 2, 10, 4, 0, 3, 11, -1, -1, -1, -1 }, + { -1, 4, 5, 6, -1, 0, 1, 7, -1, 3, 2, 8, -1, 11, 10, 9 } }; + permutation = permuteBoundary[orientationIndex]; + vtxLevel.gatherQuadRegularBoundaryPatchPoints(faceIndex, patchVerts, orientationIndex); + } else if (fvarPatchTag._boundaryCount == 2) { + // Expand corner patch vertices and rotate to restore correct orientation. + static int const permuteCorner[4][16] = { + { -1, -1, -1, -1, -1, 0, 1, 4, -1, 3, 2, 5, -1, 8, 7, 6 }, + { -1, -1, -1, -1, 8, 3, 0, -1, 7, 2, 1, -1, 6, 5, 4, -1 }, + { 6, 7, 8, -1, 5, 2, 3, -1, 4, 1, 0, -1, -1, -1, -1, -1 }, + { -1, 4, 5, 6, -1, 1, 2, 7, -1, 0, 3, 8, -1, -1, -1, -1 } }; + permutation = permuteCorner[orientationIndex]; + vtxLevel.gatherQuadRegularCornerPatchPoints(faceIndex, patchVerts, orientationIndex, *fvc); + } else { + assert(fvarPatchTag._boundaryCount >=0 && fvarPatchTag._boundaryCount <= 2); + } } else if (fvarPatchType == PatchDescriptor::QUADS) { vtxLevel.gatherQuadLinearPatchPoints(faceIndex, patchVerts, orientationIndex, *fvc); permutation = 0; diff --git a/opensubdiv/osd/CMakeLists.txt b/opensubdiv/osd/CMakeLists.txt index 943fbda8..88381328 100644 --- a/opensubdiv/osd/CMakeLists.txt +++ b/opensubdiv/osd/CMakeLists.txt @@ -27,20 +27,11 @@ #------------------------------------------------------------------------------- # source & headers set(CPU_SOURCE_FILES + cpuEvaluator.cpp cpuKernel.cpp - cpuComputeController.cpp - cpuComputeContext.cpp - cpuEvalLimitContext.cpp - cpuEvalLimitController.cpp - cpuEvalLimitKernel.cpp - cpuEvalStencilsContext.cpp - cpuEvalStencilsController.cpp - cpuSmoothNormalContext.cpp - cpuSmoothNormalController.cpp cpuVertexBuffer.cpp drawContext.cpp drawRegistry.cpp - evalLimitContext.cpp ) set(GPU_SOURCE_FILES ) @@ -50,91 +41,31 @@ set(INC_FILES ) set(PRIVATE_HEADER_FILES debug.h cpuKernel.h - cpuEvalLimitKernel.h ) set(PUBLIC_HEADER_FILES - computeController.h - cpuComputeContext.h - cpuComputeController.h - cpuEvalLimitContext.h - cpuEvalLimitController.h - cpuEvalStencilsContext.h - cpuEvalStencilsController.h - cpuSmoothNormalContext.h - cpuSmoothNormalController.h + cpuEvaluator.h cpuVertexBuffer.h - evalLimitContext.h mesh.h nonCopyable.h opengl.h drawContext.h drawRegistry.h - vertex.h vertexDescriptor.h ) set(DOXY_HEADER_FILES ${PUBLIC_HEADER_FILES}) -#------------------------------------------------------------------------------- -set(GL_PTEX_PUBLIC_HEADERS - glPtexTexture.h - glPtexMipmapTexture.h -) -set(DX_PTEX_PUBLIC_HEADERS - d3d11PtexTexture.h - d3d11PtexMipmapTexture.h -) - -if( PTEX_FOUND ) - list(APPEND PUBLIC_HEADER_FILES - ptexTextureLoader.h - ptexMipmapTextureLoader.h - ) - list(APPEND CPU_SOURCE_FILES - ptexTextureLoader.cpp - ptexMipmapTextureLoader.cpp - ) - if( OPENGL_FOUND ) - list(APPEND GPU_SOURCE_FILES - glPtexTexture.cpp - glPtexMipmapTexture.cpp - ) - list(APPEND PUBLIC_HEADER_FILES - ${GL_PTEX_PUBLIC_HEADERS} - ) - endif() - if( DXSDK_FOUND ) - list(APPEND GPU_SOURCE_FILES - d3d11PtexTexture.cpp - d3d11PtexMipmapTexture.cpp - ) - list(APPEND PUBLIC_HEADER_FILES - ${DX_PTEX_PUBLIC_HEADERS} - ) - endif() - include_directories( "${PTEX_INCLUDE_DIR}" ) - list(APPEND PLATFORM_CPU_LIBRARIES - ${PTEX_LIBRARY} - ) -endif() - -list(APPEND DOXY_HEADER_FILES ${GL_PTEX_PUBLIC_HEADERS} ${DX_PTEX_PUBLIC_HEADERS}) - #------------------------------------------------------------------------------- set(OPENMP_PUBLIC_HEADERS + ompEvaluator.h ompKernel.h - ompComputeController.h - ompEvalStencilsController.h - ompSmoothNormalController.h ) if(OPENMP_FOUND ) list(APPEND CPU_SOURCE_FILES + ompEvaluator.cpp ompKernel.cpp - ompComputeController.cpp - ompEvalStencilsController.cpp - ompSmoothNormalController.cpp ) list(APPEND PUBLIC_HEADER_FILES ${OPENMP_PUBLIC_HEADERS}) @@ -148,20 +79,16 @@ list(APPEND DOXY_HEADER_FILES ${OPENMP_PUBLIC_HEADERS}) #------------------------------------------------------------------------------- set(TBB_PUBLIC_HEADERS + tbbEvaluator.h tbbKernel.h - tbbComputeController.h - tbbEvalStencilsController.h - tbbSmoothNormalController.h ) if( TBB_FOUND ) include_directories("${TBB_INCLUDE_DIR}") list(APPEND CPU_SOURCE_FILES + tbbEvaluator.cpp tbbKernel.cpp - tbbComputeController.cpp - tbbEvalStencilsController.cpp - tbbSmoothNormalController.cpp ) list(APPEND PUBLIC_HEADER_FILES ${TBB_PUBLIC_HEADERS}) @@ -201,8 +128,6 @@ if( OPENGL_FOUND OR OPENGLES_FOUND ) glslPatchBSpline.glsl glslPatchGregory.glsl glslPatchGregoryBasis.glsl - glslPatchTransition.glsl - glslPtexCommon.glsl ) endif() endif() @@ -213,18 +138,16 @@ list(APPEND DOXY_HEADER_FILES ${GL_PUBLIC_HEADERS}) # OpenGL 4.2 dependencies # note : (GLSL transform feedback kernels require GL 4.2) set(GL_4_2_PUBLIC_HEADERS - glslTransformFeedbackComputeContext.h - glslTransformFeedbackComputeController.h + glXFBEvaluator.h ) if( OPENGL_4_2_FOUND ) list(APPEND GPU_SOURCE_FILES - glslTransformFeedbackComputeContext.cpp - glslTransformFeedbackComputeController.cpp + glXFBEvaluator.cpp ) list(APPEND PUBLIC_HEADER_FILES ${GL_4_2_PUBLIC_HEADERS}) list(APPEND KERNEL_FILES - glslTransformFeedbackKernel.glsl + glslXFBKernel.glsl ) list(APPEND PLATFORM_GPU_LIBRARIES ${GLEW_LIBRARY} @@ -238,14 +161,12 @@ list(APPEND DOXY_HEADER_FILES ${GL_4_2_PUBLIC_HEADERS}) # OpenGL 4.3 dependencies # note : (GLSL compute shader kernels require GL 4.3) set(GL_4_3_PUBLIC_HEADERS - glslComputeContext.h - glslComputeController.h + glComputeEvaluator.h ) if( OPENGL_4_3_FOUND ) list(APPEND GPU_SOURCE_FILES - glslComputeContext.cpp - glslComputeController.cpp + glComputeEvaluator.cpp ) list(APPEND PUBLIC_HEADER_FILES ${GL_4_3_PUBLIC_HEADERS}) list(APPEND KERNEL_FILES @@ -262,8 +183,7 @@ list(APPEND DOXY_HEADER_FILES ${GL_4_3_PUBLIC_HEADERS}) # DX11 code & dependencies set(DXSDK_PUBLIC_HEADERS cpuD3D11VertexBuffer.h - d3d11ComputeContext.h - d3d11ComputeController.h + d3d11ComputeEvaluator.h d3d11DrawContext.h d3d11DrawRegistry.h d3d11VertexBuffer.h @@ -272,8 +192,7 @@ set(DXSDK_PUBLIC_HEADERS if( DXSDK_FOUND ) list(APPEND GPU_SOURCE_FILES cpuD3D11VertexBuffer.cpp - d3d11ComputeContext.cpp - d3d11ComputeController.cpp + d3d11ComputeEvaluator.cpp d3d11DrawContext.cpp d3d11DrawRegistry.cpp d3d11VertexBuffer.cpp @@ -284,8 +203,6 @@ if( DXSDK_FOUND ) hlslPatchCommon.hlsl hlslPatchBSpline.hlsl hlslPatchGregory.hlsl - hlslPatchTransition.hlsl - hlslPtexCommon.hlsl ) list(APPEND PLATFORM_GPU_LIBRARIES ${DXSDK_LIBRARIES} @@ -297,16 +214,14 @@ list(APPEND DOXY_HEADER_FILES ${DXSDK_PUBLIC_HEADERS}) #------------------------------------------------------------------------------- # OpenCL code & dependencies set(OPENCL_PUBLIC_HEADERS - clComputeContext.h - clComputeController.h + clEvaluator.h clVertexBuffer.h opencl.h ) if ( OPENCL_FOUND ) list(APPEND GPU_SOURCE_FILES - clComputeContext.cpp - clComputeController.cpp + clEvaluator.cpp clVertexBuffer.cpp ) list(APPEND PUBLIC_HEADER_FILES ${OPENCL_PUBLIC_HEADERS}) @@ -337,15 +252,13 @@ list(APPEND DOXY_HEADER_FILES ${OPENCL_PUBLIC_HEADERS}) #------------------------------------------------------------------------------- # CUDA code & dependencies set(CUDA_PUBLIC_HEADERS - cudaComputeContext.h - cudaComputeController.h + cudaEvaluator.h cudaVertexBuffer.h ) if( CUDA_FOUND ) list(APPEND GPU_SOURCE_FILES - cudaComputeController.cpp - cudaComputeContext.cpp + cudaEvaluator.cpp cudaVertexBuffer.cpp ) list(APPEND PUBLIC_HEADER_FILES ${CUDA_PUBLIC_HEADERS}) diff --git a/opensubdiv/osd/clComputeContext.cpp b/opensubdiv/osd/clComputeContext.cpp deleted file mode 100644 index 674a1a4a..00000000 --- a/opensubdiv/osd/clComputeContext.cpp +++ /dev/null @@ -1,233 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/clComputeContext.h" - -#include - -#include "../far/stencilTables.h" -#include "../far/error.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// ----------------------------------------------------------------------------- - -template cl_mem -createCLBuffer(std::vector const & src, cl_context clContext) { - - cl_mem devicePtr = 0; - cl_int errNum = 0; - - devicePtr = clCreateBuffer(clContext, - CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, - src.size()*sizeof(T), - (void*)(&src.at(0)), - &errNum); - - if (errNum != CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", errNum); - } - - return devicePtr; -} - -// ----------------------------------------------------------------------------- - -class CLComputeContext::CLStencilTables { - -public: - CLStencilTables(Far::StencilTables const & stencilTables, - cl_context clContext) { - _numStencils = stencilTables.GetNumStencils(); - if (_numStencils > 0) { - _sizes = createCLBuffer(stencilTables.GetSizes(), clContext); - _offsets = createCLBuffer(stencilTables.GetOffsets(), clContext); - _indices = createCLBuffer(stencilTables.GetControlIndices(), - clContext); - _weights = createCLBuffer(stencilTables.GetWeights(), clContext); - } else { - _sizes = _offsets = _indices = _weights = NULL; - } - } - - ~CLStencilTables() { - if (_sizes) clReleaseMemObject(_sizes); - if (_offsets) clReleaseMemObject(_offsets); - if (_indices) clReleaseMemObject(_indices); - if (_weights) clReleaseMemObject(_weights); - } - - bool IsValid() const { - return _sizes and _offsets and _indices and _weights; - } - - cl_mem GetSizes() const { - return _sizes; - } - - cl_mem GetOffsets() const { - return _offsets; - } - - cl_mem GetIndices() const { - return _indices; - } - - cl_mem GetWeights() const { - return _weights; - } - - int GetNumStencils() const { - return _numStencils; - } - -private: - cl_mem _sizes, - _offsets, - _indices, - _weights; - int _numStencils; -}; - -// ----------------------------------------------------------------------------- - -CLComputeContext::CLComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - cl_context clContext) : - _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0) { - - if (vertexStencilTables) { - _vertexStencilTables = new CLStencilTables(*vertexStencilTables, - clContext); - _numControlVertices = vertexStencilTables->GetNumControlVertices(); - } - - if (varyingStencilTables) { - _varyingStencilTables = new CLStencilTables(*varyingStencilTables, - clContext); - - if (_numControlVertices) { - assert(_numControlVertices - == varyingStencilTables->GetNumControlVertices()); - } else { - _numControlVertices = varyingStencilTables->GetNumControlVertices(); - } - } -} - -CLComputeContext::~CLComputeContext() { - delete _vertexStencilTables; - delete _varyingStencilTables; -} - -// ---------------------------------------------------------------------------- - -bool -CLComputeContext::HasVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->IsValid() : false; -} - -bool -CLComputeContext::HasVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; -} - -int -CLComputeContext::GetNumStencilsInVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; -} - -int -CLComputeContext::GetNumStencilsInVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; -} -// ---------------------------------------------------------------------------- - -cl_mem -CLComputeContext::GetVertexStencilTablesSizes() const { - return _vertexStencilTables ? _vertexStencilTables->GetSizes() : 0; -} - -cl_mem -CLComputeContext::GetVertexStencilTablesOffsets() const { - return _vertexStencilTables ? _vertexStencilTables->GetOffsets() : 0; -} - -cl_mem -CLComputeContext::GetVertexStencilTablesIndices() const { - return _vertexStencilTables ? _vertexStencilTables->GetIndices() : 0; -} - -cl_mem -CLComputeContext::GetVertexStencilTablesWeights() const { - return _vertexStencilTables ? _vertexStencilTables->GetWeights() : 0; -} - -// ---------------------------------------------------------------------------- - -cl_mem -CLComputeContext::GetVaryingStencilTablesSizes() const { - return _varyingStencilTables ? _varyingStencilTables->GetSizes() : 0; -} - -cl_mem -CLComputeContext::GetVaryingStencilTablesOffsets() const { - return _varyingStencilTables ? _varyingStencilTables->GetOffsets() : 0; -} - -cl_mem -CLComputeContext::GetVaryingStencilTablesIndices() const { - return _varyingStencilTables ? _varyingStencilTables->GetIndices() : 0; -} - -cl_mem -CLComputeContext::GetVaryingStencilTablesWeights() const { - return _varyingStencilTables ? _varyingStencilTables->GetWeights() : 0; -} - - -// ----------------------------------------------------------------------------- - -CLComputeContext * -CLComputeContext::Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - cl_context clContext) { - - CLComputeContext *result = - new CLComputeContext( - vertexStencilTables, varyingStencilTables, clContext); - - return result; -} - -// ----------------------------------------------------------------------------- -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/clComputeContext.h b/opensubdiv/osd/clComputeContext.h deleted file mode 100644 index 6ef2f50f..00000000 --- a/opensubdiv/osd/clComputeContext.h +++ /dev/null @@ -1,155 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CL_COMPUTE_CONTEXT_H -#define OSD_CL_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include "../osd/nonCopyable.h" -#include "../osd/opencl.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far { class StencilTables; } - -namespace Osd { - -/// -/// \brief OpenCL Refine Context -/// -/// The OpenCL-Compute implementation of the Refine module contextual functionality. -/// -/// Contexts interface the serialized topological data pertaining to the -/// geometric primitives with the capabilities of the selected discrete -/// compute device. -/// -class CLComputeContext : public NonCopyable { - -public: - /// Creates an CLComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param clContext An active OpenCL compute context - /// - static CLComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - cl_context clContext); - - /// Creates an CLComputeContext instance (template version) - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param context A user defined class to provide cl_context. - /// must implement GetContext() - /// - template - static CLComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - DEVICE_CONTEXT context) { - return Create(vertexStencilTables, varyingStencilTables, - context->GetContext()); - } - - /// Destructor - virtual ~CLComputeContext(); - - - /// Returns true if the Context has a 'vertex' interpolation stencil table - bool HasVertexStencilTables() const; - - /// Returns true if the Context has a 'varying' interpolation stencil table - bool HasVaryingStencilTables() const; - - /// Returns the number of control vertices - int GetNumControlVertices() const { - return _numControlVertices; - } - - /// Returns the number of stencils in vertex stencil table - int GetNumStencilsInVertexStencilTables() const; - - /// Returns the number of stencils in varying stencil table - int GetNumStencilsInVaryingStencilTables() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil sizes - cl_mem GetVertexStencilTablesSizes() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil offsets - cl_mem GetVertexStencilTablesOffsets() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil indices - cl_mem GetVertexStencilTablesIndices() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil weights - cl_mem GetVertexStencilTablesWeights() const; - - - /// Returns the Cuda buffer containing Varying-stencil stencil sizes - cl_mem GetVaryingStencilTablesSizes() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil offsets - cl_mem GetVaryingStencilTablesOffsets() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil indices - cl_mem GetVaryingStencilTablesIndices() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil weights - cl_mem GetVaryingStencilTablesWeights() const; - - -protected: - explicit CLComputeContext(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - cl_context clContext); - -private: - class CLStencilTables; - - CLStencilTables * _vertexStencilTables, - * _varyingStencilTables; - - int _numControlVertices; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_CL_COMPUTE_CONTEXT_H diff --git a/opensubdiv/osd/clComputeController.cpp b/opensubdiv/osd/clComputeController.cpp deleted file mode 100644 index 30b6b65f..00000000 --- a/opensubdiv/osd/clComputeController.cpp +++ /dev/null @@ -1,271 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/clComputeController.h" -#include "../far/error.h" - -#if defined(_WIN32) - #include -#endif - -#include -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -static const char *clSource = -#include "clKernel.gen.h" -; - -// ----------------------------------------------------------------------------- - -static cl_kernel buildKernel(cl_program prog, const char * name) { - - cl_int errNum; - cl_kernel k = clCreateKernel(prog, name, &errNum); - - if (errNum != CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel '%s' (%d)\n", name, errNum); - } - return k; -} - -// ----------------------------------------------------------------------------- - -class CLComputeController::KernelBundle : - NonCopyable { - -public: - - bool Compile(cl_context clContext, VertexBufferDescriptor const & desc) { - - cl_int errNum; - - _desc = VertexBufferDescriptor(0, desc.length, desc.stride); - - std::ostringstream defines; - defines << "#define OFFSET " << _desc.offset << "\n" - << "#define LENGTH " << _desc.length << "\n" - << "#define STRIDE " << _desc.stride << "\n"; - std::string defineStr = defines.str(); - - const char *sources[] = { defineStr.c_str(), clSource }; - _program = clCreateProgramWithSource(clContext, 2, sources, 0, &errNum); - if (errNum!=CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "clCreateProgramWithSource (%d)", errNum); - } - - errNum = clBuildProgram(_program, 0, NULL, NULL, NULL, NULL); - if (errNum != CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, "clBuildProgram (%d) \n", errNum); - - cl_int numDevices = 0; - clGetContextInfo(clContext, - CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL); - - cl_device_id *devices = new cl_device_id[numDevices]; - clGetContextInfo(clContext, CL_CONTEXT_DEVICES, - sizeof(cl_device_id)*numDevices, devices, NULL); - - for (int i = 0; i < numDevices; ++i) { - char cBuildLog[10240]; - clGetProgramBuildInfo(_program, devices[i], - CL_PROGRAM_BUILD_LOG, sizeof(cBuildLog), cBuildLog, NULL); - Far::Error(Far::FAR_RUNTIME_ERROR, cBuildLog); - } - delete[] devices; - - return false; - } - - // compile all cl compute kernels - _stencilsKernel = buildKernel(_program, "computeStencils"); - - return true; - } - - cl_kernel GetStencilsKernel() const { - return _stencilsKernel; - } - - struct Match { - - Match(VertexBufferDescriptor const & d) : desc(d) { } - - bool operator() (KernelBundle const * kernel) { - return (desc.length==kernel->_desc.length and - desc.stride==kernel->_desc.stride); - } - - VertexBufferDescriptor desc; - }; - -private: - - cl_program _program; - - cl_kernel _stencilsKernel; - - VertexBufferDescriptor _desc; -}; - -// ---------------------------------------------------------------------------- - -void -CLComputeController::ApplyStencilTableKernel(ComputeContext const *context) { - - assert(context); - - cl_int errNum; - - size_t globalWorkSize = 0; - - int ncvs = context->GetNumControlVertices(); - - if (context->HasVertexStencilTables()) { - int start = 0; - int end = context->GetNumStencilsInVertexStencilTables(); - globalWorkSize = (size_t)(end - start); - - KernelBundle const * bundle = getKernel(_currentBindState.vertexDesc); - - cl_kernel kernel = bundle->GetStencilsKernel(); - - cl_mem sizes = context->GetVertexStencilTablesSizes(), - offsets = context->GetVertexStencilTablesOffsets(), - indices = context->GetVertexStencilTablesIndices(), - weights = context->GetVertexStencilTablesWeights(); - - clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.vertexBuffer); - - clSetKernelArg(kernel, 1, sizeof(cl_mem), &sizes); - clSetKernelArg(kernel, 2, sizeof(cl_mem), &offsets); - clSetKernelArg(kernel, 3, sizeof(cl_mem), &indices); - clSetKernelArg(kernel, 4, sizeof(cl_mem), &weights); - - clSetKernelArg(kernel, 5, sizeof(int), &start); - clSetKernelArg(kernel, 6, sizeof(int), &end); - - clSetKernelArg(kernel, 7, sizeof(int), &_currentBindState.vertexDesc.offset); - clSetKernelArg(kernel, 8, sizeof(int), &ncvs); - - errNum = clEnqueueNDRangeKernel( - _clQueue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL); - if (errNum!=CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "ApplyStencilTableKernel (%d) ", errNum); - } - } - - if (context->HasVaryingStencilTables()) { - int start = 0; - int end = context->GetNumStencilsInVaryingStencilTables(); - globalWorkSize = (size_t)(end - start); - - KernelBundle const * bundle = getKernel(_currentBindState.varyingDesc); - - cl_kernel kernel = bundle->GetStencilsKernel(); - - cl_mem sizes = context->GetVaryingStencilTablesSizes(), - offsets = context->GetVaryingStencilTablesOffsets(), - indices = context->GetVaryingStencilTablesIndices(), - weights = context->GetVaryingStencilTablesWeights(); - - clSetKernelArg(kernel, 0, sizeof(cl_mem), &_currentBindState.varyingBuffer); - - clSetKernelArg(kernel, 1, sizeof(cl_mem), &sizes); - clSetKernelArg(kernel, 2, sizeof(cl_mem), &offsets); - clSetKernelArg(kernel, 3, sizeof(cl_mem), &indices); - clSetKernelArg(kernel, 4, sizeof(cl_mem), &weights); - - clSetKernelArg(kernel, 5, sizeof(int), &start); - clSetKernelArg(kernel, 6, sizeof(int), &end); - - clSetKernelArg(kernel, 7, sizeof(int), &_currentBindState.varyingDesc.offset); - clSetKernelArg(kernel, 8, sizeof(int), &ncvs); - - errNum = clEnqueueNDRangeKernel( - _clQueue, kernel, 1, NULL, &globalWorkSize, NULL, 0, NULL, NULL); - if (errNum!=CL_SUCCESS) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "ApplyStencilTableKernel (%d)", errNum); - } - } -} - - -// ---------------------------------------------------------------------------- - -CLComputeController::KernelBundle const * -CLComputeController::getKernel(VertexBufferDescriptor const &desc) { - - KernelRegistry::iterator it = - std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(), - KernelBundle::Match(desc)); - - if (it != _kernelRegistry.end()) { - return *it; - } else { - KernelBundle * kernelBundle = new KernelBundle(); - kernelBundle->Compile(_clContext, desc); - _kernelRegistry.push_back(kernelBundle); - return kernelBundle; - } -} - -// ---------------------------------------------------------------------------- - -CLComputeController::CLComputeController( - cl_context clContext, cl_command_queue queue) : - _clContext(clContext), _clQueue(queue) { -} - -CLComputeController::~CLComputeController() { - for (KernelRegistry::iterator it = _kernelRegistry.begin(); - it != _kernelRegistry.end(); ++it) { - delete *it; - } -} - -// ---------------------------------------------------------------------------- - -void -CLComputeController::Synchronize() { - - clFinish(_clQueue); -} - - -// ----------------------------------------------------------------------------- - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/clComputeController.h b/opensubdiv/osd/clComputeController.h deleted file mode 100644 index 16b59d11..00000000 --- a/opensubdiv/osd/clComputeController.h +++ /dev/null @@ -1,204 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CL_COMPUTE_CONTROLLER_H -#define OSD_CL_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/clComputeContext.h" -#include "../osd/vertexDescriptor.h" -#include "../osd/opencl.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -class CLKernelBundle; - -/// \brief Compute controller for launching OpenCL Compute subdivision kernels. -/// -/// CLComputeController is a compute controller class to launch -/// OpenCL subdivision kernels. It requires CLVertexBufferInterface -/// as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class CLComputeController { -public: - typedef CLComputeContext ComputeContext; - - /// Constructor. - /// - /// @param clContext a valid instanciated OpenCL context - /// - /// @param queue a valid non-zero OpenCL command queue - /// - CLComputeController(cl_context clContext, cl_command_queue queue); - - /// Destructor. - ~CLComputeController(); - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CLContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( CLComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - - ApplyStencilTableKernel(context); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CLContext to apply refinement operations to - /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(CLComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - - /// Returns CL context - cl_context GetContext() const { return _clContext; } - - /// Returns CL command queue - cl_command_queue GetCommandQueue() const { return _clQueue; } - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context); - - template - void bind( VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc, - VertexBufferDescriptor const * varyingDesc ) { - - // if the vertex buffer descriptor is specified, use it. - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (vertexDesc) { - _currentBindState.vertexDesc = *vertexDesc; - } else { - int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0; - _currentBindState.vertexDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - if (varyingDesc) { - _currentBindState.varyingDesc = *varyingDesc; - } else { - int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0; - _currentBindState.varyingDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.vertexBuffer = vertexBuffer ? - vertexBuffer->BindCLBuffer(_clQueue) : 0; - _currentBindState.varyingBuffer = varyingBuffer ? - varyingBuffer->BindCLBuffer(_clQueue) : 0; - } - - void unbind() { - _currentBindState.Reset(); - } - - -private: - - class KernelBundle; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : vertexBuffer(0), varyingBuffer(0) { } - - void Reset() { - vertexBuffer = varyingBuffer = NULL; - vertexDesc.Reset(); - varyingDesc.Reset(); - } - - cl_mem vertexBuffer, - varyingBuffer; - - VertexBufferDescriptor vertexDesc, - varyingDesc; - }; - - BindState _currentBindState; - - KernelBundle const * getKernel(VertexBufferDescriptor const &desc); - - typedef std::vector KernelRegistry; - - KernelRegistry _kernelRegistry; - - cl_context _clContext; - cl_command_queue _clQueue; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/clD3D11VertexBuffer.cpp b/opensubdiv/osd/clD3D11VertexBuffer.cpp old mode 100755 new mode 100644 diff --git a/opensubdiv/osd/clD3D11VertexBuffer.h b/opensubdiv/osd/clD3D11VertexBuffer.h index c28881a6..3d318934 100644 --- a/opensubdiv/osd/clD3D11VertexBuffer.h +++ b/opensubdiv/osd/clD3D11VertexBuffer.h @@ -51,7 +51,7 @@ namespace Osd { /// D3D11VertexBuffer implements CLVertexBufferInterface and /// D3D11VertexBufferInterface. /// -/// An instance of this buffer class can be passed to D3D11ComputeController. +/// An instance of this buffer class can be passed to D3D11ComputeEvaluator. /// class CLD3D11VertexBuffer { public: diff --git a/opensubdiv/osd/clEvaluator.cpp b/opensubdiv/osd/clEvaluator.cpp new file mode 100644 index 00000000..edc6fbee --- /dev/null +++ b/opensubdiv/osd/clEvaluator.cpp @@ -0,0 +1,206 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/clEvaluator.h" + +#include +#include +#include + +#include "../osd/opencl.h" +#include "../far/error.h" +#include "../far/stencilTables.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +static const char *clSource = +#include "clKernel.gen.h" +; + +// ---------------------------------------------------------------------------- + +template cl_mem +createCLBuffer(std::vector const & src, cl_context clContext) { + cl_int errNum = 0; + cl_mem devicePtr = clCreateBuffer(clContext, + CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, + src.size()*sizeof(T), + (void*)(&src.at(0)), + &errNum); + + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, "clCreateBuffer: %d", errNum); + } + + return devicePtr; +} + +// ---------------------------------------------------------------------------- + +CLStencilTables::CLStencilTables(Far::StencilTables const *stencilTables, + cl_context clContext) { + _numStencils = stencilTables->GetNumStencils(); + + if (_numStencils > 0) { + _sizes = createCLBuffer(stencilTables->GetSizes(), clContext); + _offsets = createCLBuffer(stencilTables->GetOffsets(), clContext); + _indices = createCLBuffer(stencilTables->GetControlIndices(), + clContext); + _weights = createCLBuffer(stencilTables->GetWeights(), clContext); + } else { + _sizes = _offsets = _indices = _weights = NULL; + } +} + +CLStencilTables::~CLStencilTables() { + if (_sizes) clReleaseMemObject(_sizes); + if (_offsets) clReleaseMemObject(_offsets); + if (_indices) clReleaseMemObject(_indices); + if (_weights) clReleaseMemObject(_weights); +} + +// --------------------------------------------------------------------------- + +CLEvaluator::CLEvaluator(cl_context context, cl_command_queue queue) + : _clContext(context), _clCommandQueue(queue), + _program(NULL), _stencilsKernel(NULL) { +} + +CLEvaluator::~CLEvaluator() { + if (_stencilsKernel) clReleaseKernel(_stencilsKernel); + if (_program) clReleaseProgram(_program); +} + +bool +CLEvaluator::Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc) { + if (srcDesc.length > dstDesc.length) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "srcDesc length must be less than or equal to " + "dstDesc length.\n"); + return false; + } + + cl_int errNum; + + std::ostringstream defines; + defines << "#define LENGTH " << srcDesc.length << "\n" + << "#define SRC_STRIDE " << srcDesc.stride << "\n" + << "#define DST_STRIDE " << dstDesc.stride << "\n"; + std::string defineStr = defines.str(); + + const char *sources[] = { defineStr.c_str(), clSource }; + _program = clCreateProgramWithSource(_clContext, 2, sources, 0, &errNum); + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "clCreateProgramWithSource (%d)", errNum); + } + + errNum = clBuildProgram(_program, 0, NULL, NULL, NULL, NULL); + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, "clBuildProgram (%d) \n", errNum); + + cl_int numDevices = 0; + clGetContextInfo( + _clContext, CL_CONTEXT_NUM_DEVICES, + sizeof(cl_uint), &numDevices, NULL); + + cl_device_id *devices = new cl_device_id[numDevices]; + clGetContextInfo(_clContext, CL_CONTEXT_DEVICES, + sizeof(cl_device_id)*numDevices, devices, NULL); + + for (int i = 0; i < numDevices; ++i) { + char cBuildLog[10240]; + clGetProgramBuildInfo( + _program, devices[i], + CL_PROGRAM_BUILD_LOG, sizeof(cBuildLog), cBuildLog, NULL); + Far::Error(Far::FAR_RUNTIME_ERROR, cBuildLog); + } + delete[] devices; + + return false; + } + + _stencilsKernel = clCreateKernel(_program, "computeStencils", &errNum); + + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, "buildKernel (%d)\n", errNum); + return false; + } + return true; +} + +bool +CLEvaluator::EvalStencils(cl_mem src, + VertexBufferDescriptor const &srcDesc, + cl_mem dst, + VertexBufferDescriptor const &dstDesc, + cl_mem sizes, + cl_mem offsets, + cl_mem indices, + cl_mem weights, + int start, + int end) const { + if (end <= start) return true; + + size_t globalWorkSize = (size_t)(end - start); + + clSetKernelArg(_stencilsKernel, 0, sizeof(cl_mem), &src); + clSetKernelArg(_stencilsKernel, 1, sizeof(int), &srcDesc.offset); + clSetKernelArg(_stencilsKernel, 2, sizeof(cl_mem), &dst); + clSetKernelArg(_stencilsKernel, 3, sizeof(int), &dstDesc.offset); + clSetKernelArg(_stencilsKernel, 4, sizeof(cl_mem), &sizes); + clSetKernelArg(_stencilsKernel, 5, sizeof(cl_mem), &offsets); + clSetKernelArg(_stencilsKernel, 6, sizeof(cl_mem), &indices); + clSetKernelArg(_stencilsKernel, 7, sizeof(cl_mem), &weights); + clSetKernelArg(_stencilsKernel, 8, sizeof(int), &start); + clSetKernelArg(_stencilsKernel, 9, sizeof(int), &end); + + cl_int errNum = clEnqueueNDRangeKernel( + _clCommandQueue, _stencilsKernel, 1, NULL, + &globalWorkSize, NULL, 0, NULL, NULL); + + if (errNum != CL_SUCCESS) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "ApplyStencilTableKernel (%d) ", errNum); + return false; + } + + clFinish(_clCommandQueue); + return true; +} + +/* static */ +void +CLEvaluator::Synchronize(cl_command_queue clCommandQueue) { + clFinish(clCommandQueue); +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/clEvaluator.h b/opensubdiv/osd/clEvaluator.h new file mode 100644 index 00000000..73fff8ff --- /dev/null +++ b/opensubdiv/osd/clEvaluator.h @@ -0,0 +1,230 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_CL_EVALUATOR_H +#define OPENSUBDIV_OSD_CL_EVALUATOR_H + +#include "../version.h" + +#include "../osd/opencl.h" +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far { + class StencilTables; +} + +namespace Osd { + +/// \brief OpenCL stencil tables +/// +/// This class is an OpenCL buffer representation of Far::StencilTables. +/// +/// CLCompute consumes this table to apply stencils +/// +/// +class CLStencilTables { +public: + template + static CLStencilTables *Create(Far::StencilTables const *stencilTables, + DEVICE_CONTEXT context) { + return new CLStencilTables(stencilTables, context->GetContext()); + } + + CLStencilTables(Far::StencilTables const *stencilTables, + cl_context clContext); + ~CLStencilTables(); + + // interfaces needed for CLComputeKernel + cl_mem GetSizesBuffer() const { return _sizes; } + cl_mem GetOffsetsBuffer() const { return _offsets; } + cl_mem GetIndicesBuffer() const { return _indices; } + cl_mem GetWeightsBuffer() const { return _weights; } + int GetNumStencils() const { return _numStencils; } + +private: + cl_mem _sizes; + cl_mem _offsets; + cl_mem _indices; + cl_mem _weights; + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +/// \brief OpenCL stencil kernel +/// +/// +class CLEvaluator { +public: + typedef bool Instantiatable; + /// Constructor. + CLEvaluator(cl_context context, cl_command_queue queue); + + /// Desctructor. + ~CLEvaluator(); + + /// Generic creator template. + template + static CLEvaluator *Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + DEVICE_CONTEXT deviceContext) { + return Create(srcDesc, dstDesc, + deviceContext->GetContext(), + deviceContext->GetCommandQueue()); + } + + static CLEvaluator * Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + cl_context clContext, + cl_command_queue clCommandQueue) { + CLEvaluator *kernel = new CLEvaluator(clContext, clCommandQueue); + if (kernel->Compile(srcDesc, dstDesc)) return kernel; + delete kernel; + return NULL; + } + + /// \brief Generic static compute function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCLBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCLBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTables stencil table to be applied. The table must have + /// OpenCL memory interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext client providing context class which supports + /// cL_context GetContext() + /// cl_command_queue GetCommandQueue() + /// methods. + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + CLEvaluator const *instance, + DEVICE_CONTEXT deviceContext) { + if (instance) { + return instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + } else { + // Create an instance on demand (slow) + instance = Create(srcDesc, dstDesc, deviceContext); + if (instance) { + bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// Generic compute function. + /// Dispatch the CL compute kernel asynchronously. + /// Returns false if the kernel hasn't been compiled yet. + template + bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const { + return EvalStencils(srcVertexBuffer->BindCLBuffer(_clCommandQueue), + srcDesc, + dstVertexBuffer->BindCLBuffer(_clCommandQueue), + dstDesc, + stencilTable->GetSizesBuffer(), + stencilTable->GetOffsetsBuffer(), + stencilTable->GetIndicesBuffer(), + stencilTable->GetWeightsBuffer(), + 0, + stencilTable->GetNumStencils()); + } + + /// Dispatch the CL compute kernel asynchronously. + /// returns false if the kernel hasn't been compiled yet. + bool EvalStencils(cl_mem src, + VertexBufferDescriptor const &srcDesc, + cl_mem dst, + VertexBufferDescriptor const &dstDesc, + cl_mem sizes, + cl_mem offsets, + cl_mem indices, + cl_mem weights, + int start, + int end) const; + + /// Configure OpenCL kernel. + /// Returns false if it fails to compile the kernel. + bool Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc); + + /// Wait the OpenCL kernels finish. + template + static void Synchronize(DEVICE_CONTEXT deviceContext) { + Synchronize(deviceContext->GetCommandQueue()); + } + + static void Synchronize(cl_command_queue queue); + +private: + cl_context _clContext; + cl_command_queue _clCommandQueue; + cl_program _program; + cl_kernel _stencilsKernel; +}; + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_CL_EVALUATOR_H diff --git a/opensubdiv/osd/clKernel.cl b/opensubdiv/osd/clKernel.cl index f95b78d3..de8c42c8 100644 --- a/opensubdiv/osd/clKernel.cl +++ b/opensubdiv/osd/clKernel.cl @@ -36,7 +36,7 @@ static void addWithWeight(struct Vertex *dst, __global float *srcOrigin, int index, float weight) { - __global float *src = srcOrigin + index * STRIDE; + __global float *src = srcOrigin + index * SRC_STRIDE; for (int i = 0; i < LENGTH; ++i) { dst->v[i] += src[i] * weight; } @@ -46,40 +46,42 @@ static void writeVertex(__global float *dstOrigin, int index, struct Vertex *src) { - __global float *dst = dstOrigin + index * STRIDE; + __global float *dst = dstOrigin + index * DST_STRIDE; for (int i = 0; i < LENGTH; ++i) { dst[i] = src->v[i]; } } -__kernel void computeStencils( __global float * vertexBuffer, - __global unsigned char * sizes, - __global int * offsets, - __global int * indices, - __global float * weights, - int batchStart, - int batchEnd, - int primvarOffset, - int numCVs ) { +__kernel void computeStencils(__global float * src, + int srcOffset, + __global float * dst, + int dstOffset, + __global unsigned char * sizes, + __global int * offsets, + __global int * indices, + __global float * weights, + int batchStart, + int batchEnd) { int current = get_global_id(0) + batchStart; - if (current>=batchEnd) { - return; - } - - struct Vertex dst; - clear(&dst); + if (current>=batchEnd) { + return; + } + + struct Vertex v; + clear(&v); int size = (int)sizes[current], offset = offsets[current]; - vertexBuffer += primvarOffset; + src += srcOffset; + dst += dstOffset; for (int i=0; i - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// ---------------------------------------------------------------------------- - -CpuComputeContext::CpuComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables) : - _vertexStencilTables(0), _varyingStencilTables(0) { - - // XXXX manuelk we do not own the tables, so use copy-constructor for now - // smart pointers eventually - if (vertexStencilTables) { - _vertexStencilTables = new Far::StencilTables(*vertexStencilTables); - } - - if (varyingStencilTables) { - _varyingStencilTables = new Far::StencilTables(*varyingStencilTables); - } -} - -// ---------------------------------------------------------------------------- - -CpuComputeContext::~CpuComputeContext() { - - delete _vertexStencilTables; - delete _varyingStencilTables; -} - -// ---------------------------------------------------------------------------- - -CpuComputeContext * -CpuComputeContext::Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void * /*deviceContext*/) { - - return new CpuComputeContext(vertexStencilTables, varyingStencilTables); -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuComputeContext.h b/opensubdiv/osd/cpuComputeContext.h deleted file mode 100644 index 4f024897..00000000 --- a/opensubdiv/osd/cpuComputeContext.h +++ /dev/null @@ -1,102 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CPU_COMPUTE_CONTEXT_H -#define OSD_CPU_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include - -#include "../osd/nonCopyable.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far { class StencilTables; } - -namespace Osd { - -/// -/// \brief CPU Compute Context -/// -/// The CPU implementation of the Compute module contextual functionality. -/// -/// The Osd Compute module provides functionality to interpolate primitive -/// variable data according to a subdivision scheme. -/// -/// Contexts provide an interface between the serialized topological data -/// of a geometric primitive and the computation resources of a compute device. -/// -class CpuComputeContext : private NonCopyable { -public: - /// Creates an CpuComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param deviceContext (not used) - /// - static CpuComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void *deviceContext = NULL); - - /// Destructor - virtual ~CpuComputeContext(); - - /// Returns the stencils data applied by this context for vertex - /// interpolation - Far::StencilTables const * GetVertexStencilTables() const { - return _vertexStencilTables; - } - - /// Returns the stencils data applied by this context for varying - /// interpolation - Far::StencilTables const * GetVaryingStencilTables() const { - return _varyingStencilTables; - } - -protected: - explicit CpuComputeContext(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables); - -private: - Far::StencilTables const * _vertexStencilTables, - * _varyingStencilTables; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_CPU_COMPUTE_CONTEXT_H - - diff --git a/opensubdiv/osd/cpuComputeController.cpp b/opensubdiv/osd/cpuComputeController.cpp deleted file mode 100644 index c23130d9..00000000 --- a/opensubdiv/osd/cpuComputeController.cpp +++ /dev/null @@ -1,110 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" -#include "../osd/cpuComputeContext.h" -#include "../osd/cpuComputeController.h" -#include "../osd/cpuKernel.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -CpuComputeController::CpuComputeController() { -} - -CpuComputeController::~CpuComputeController() { -} - -void -CpuComputeController::Synchronize() { -} - -void -CpuComputeController::ApplyStencilTableKernel( - ComputeContext const *context) const { - - assert(context); - - Far::StencilTables const * vertexStencils = context->GetVertexStencilTables(); - - if (vertexStencils and _currentBindState.vertexBuffer) { - - VertexBufferDescriptor const & desc = _currentBindState.vertexDesc; - - float const * srcBuffer = _currentBindState.vertexBuffer + desc.offset; - - float * destBuffer = _currentBindState.vertexBuffer + desc.offset + - vertexStencils->GetNumControlVertices() * desc.stride; - - int start = 0; - int end = vertexStencils->GetNumStencils(); - - if (end > start) { - CpuComputeStencils(_currentBindState.vertexDesc, - srcBuffer, destBuffer, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - start, - end); - } - } - - Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); - - if (varyingStencils and _currentBindState.varyingBuffer) { - - VertexBufferDescriptor const & desc = _currentBindState.varyingDesc; - - float const * srcBuffer = _currentBindState.varyingBuffer + desc.offset; - - float * destBuffer = _currentBindState.varyingBuffer + desc.offset + - varyingStencils->GetNumControlVertices() * desc.stride; - - int start = 0; - int end = varyingStencils->GetNumStencils(); - - if (end > start) { - CpuComputeStencils(_currentBindState.varyingDesc, - srcBuffer, destBuffer, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - start, - end); - } - } -} - - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuComputeController.h b/opensubdiv/osd/cpuComputeController.h deleted file mode 100644 index 2a9edde7..00000000 --- a/opensubdiv/osd/cpuComputeController.h +++ /dev/null @@ -1,179 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CPU_COMPUTE_CONTROLLER_H -#define OSD_CPU_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/cpuComputeContext.h" -#include "../osd/vertexDescriptor.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching CPU subdivision kernels. -/// -/// CpuComputeController is a compute controller class to launch -/// single threaded CPU subdivision kernels. It requires -/// CpuVertexBufferInterface as arguments of the Refine() function. -/// -/// The Osd Compute module provides functionality to interpolate primitive -/// variable data according to a subdivision scheme. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class CpuComputeController { -public: - typedef CpuComputeContext ComputeContext; - - /// Constructor. - CpuComputeController(); - - /// Destructor. - ~CpuComputeController(); - - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( CpuComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - - ApplyStencilTableKernel(context); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(CpuComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context) const; - - template - void bind( VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc, - VertexBufferDescriptor const * varyingDesc ) { - - // if the vertex buffer descriptor is specified, use it. - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (vertexDesc) { - _currentBindState.vertexDesc = *vertexDesc; - } else { - int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0; - _currentBindState.vertexDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - if (varyingDesc) { - _currentBindState.varyingDesc = *varyingDesc; - } else { - int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0; - _currentBindState.varyingDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.vertexBuffer = vertexBuffer ? - vertexBuffer->BindCpuBuffer() : 0; - - _currentBindState.varyingBuffer = varyingBuffer ? - varyingBuffer->BindCpuBuffer() : 0; - } - - void unbind() { - _currentBindState.Reset(); - } - -private: - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : vertexBuffer(0), varyingBuffer(0) { } - - void Reset() { - vertexBuffer = varyingBuffer = 0; - vertexDesc.Reset(); - varyingDesc.Reset(); - } - - float * vertexBuffer, - * varyingBuffer; - - VertexBufferDescriptor vertexDesc, - varyingDesc; - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_CPU_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/cpuD3D11VertexBuffer.h b/opensubdiv/osd/cpuD3D11VertexBuffer.h index eade74dd..145ec599 100644 --- a/opensubdiv/osd/cpuD3D11VertexBuffer.h +++ b/opensubdiv/osd/cpuD3D11VertexBuffer.h @@ -43,7 +43,7 @@ namespace Osd { /// CpuD3D11VertexBuffer implements CpuVertexBufferInterface and /// D3D11VertexBufferInterface. /// -/// An instance of this buffer class can be passed to CpuComputeController. +/// An instance of this buffer class can be passed to CpuEvaluator. /// class CpuD3D11VertexBuffer { public: diff --git a/opensubdiv/osd/cpuEvalLimitContext.cpp b/opensubdiv/osd/cpuEvalLimitContext.cpp deleted file mode 100644 index f505ac94..00000000 --- a/opensubdiv/osd/cpuEvalLimitContext.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/cpuEvalLimitContext.h" -#include "../osd/vertexDescriptor.h" - -#include -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -CpuEvalLimitContext * -CpuEvalLimitContext::Create(Far::PatchTables const & patchTables) { - - // there is no limit with uniform subdivision - if (not patchTables.IsFeatureAdaptive()) - return NULL; - - return new CpuEvalLimitContext(patchTables); -} - -CpuEvalLimitContext::CpuEvalLimitContext(Far::PatchTables const & patchTables) : - EvalLimitContext(patchTables), - _patchTables(patchTables), - _patchMap(patchTables) { -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuEvalLimitContext.h b/opensubdiv/osd/cpuEvalLimitContext.h deleted file mode 100644 index 11a2f8b0..00000000 --- a/opensubdiv/osd/cpuEvalLimitContext.h +++ /dev/null @@ -1,80 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CPU_EVAL_LIMIT_CONTEXT_H -#define OSD_CPU_EVAL_LIMIT_CONTEXT_H - -#include "../version.h" - -#include "../osd/evalLimitContext.h" -#include "../far/patchTables.h" -#include "../far/patchMap.h" - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -class CpuEvalLimitContext : public EvalLimitContext { -public: - - /// \brief Factory - /// Returns an EvalLimitContext from the given far patch tables. - /// Note : the patchtables is expected to be feature-adaptive and have ptex - /// coordinates tables. - /// - /// @param patchTables a pointer to an initialized Far::PatchTables - /// - static CpuEvalLimitContext * Create(Far::PatchTables const &patchTables); - - Far::PatchTables const & GetPatchTables() const { - return _patchTables; - } - - Far::PatchMap const & GetPatchMap() const { - return _patchMap; - } - -protected: - - explicit CpuEvalLimitContext(Far::PatchTables const & patchTables); - -private: - - Far::PatchTables const _patchTables; // Patch topology data - Far::PatchMap const _patchMap; // Patch search accelerator -}; - - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif /* OSD_CPU_EVAL_LIMIT_CONTEXT_H */ diff --git a/opensubdiv/osd/cpuEvalLimitController.cpp b/opensubdiv/osd/cpuEvalLimitController.cpp deleted file mode 100644 index f0498456..00000000 --- a/opensubdiv/osd/cpuEvalLimitController.cpp +++ /dev/null @@ -1,288 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/cpuEvalLimitController.h" -#include "../osd/cpuEvalLimitContext.h" -#include "../osd/cpuEvalLimitKernel.h" -#include "../far/patchTables.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -CpuEvalLimitController::CpuEvalLimitController() { -} - -CpuEvalLimitController::~CpuEvalLimitController() { -} - -// Vertex interpolation of a sample at the limit -int -CpuEvalLimitController::EvalLimitSample( LimitLocation const & coord, - CpuEvalLimitContext * context, - VertexBufferDescriptor const & outDesc, - float * outQ, - float * outDQU, - float * outDQV ) const { - typedef Far::PatchDescriptor Desc; - - float s=coord.s, - t=coord.t; - - Far::PatchMap::Handle const * handle = context->GetPatchMap().FindPatch( coord.ptexIndex, s, t ); - if (not handle) { - return 0; // no handle if there is a hole or 'coord' is incorrect - } - - VertexData const & vertexData = _currentBindState.vertexData; - - if (vertexData.in) { - - Far::PatchTables const & ptables = context->GetPatchTables(); - - Far::PatchParam pparam = ptables.GetPatchParam(*handle); - - Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle); - - Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle); - switch (desc.GetType()) { - case Desc::REGULAR : evalBSpline( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - case Desc::CORNER : evalCorner( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - case Desc::GREGORY : evalGregory( pparam.bitField, t, s, cvs.begin(), - &ptables.GetVertexValenceTable()[0], - ptables.GetPatchQuadOffsets(*handle).begin(), - ptables.GetMaxValence(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(), - &ptables.GetVertexValenceTable()[0], - ptables.GetPatchQuadOffsets(*handle).begin(), - ptables.GetMaxValence(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - case Desc::GREGORY_BASIS : { - evalGregoryBasis( pparam.bitField, s, t, - cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - outQ, outDQU, outDQV ); - } break; - case Desc::QUADS : evalBilinear( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - outDesc, - outQ, outDQU, outDQV ); - break; - default: - assert(0); - } - } - assert(0); - return 1; -} - -// Vertex interpolation of samples at the limit -int -CpuEvalLimitController::_EvalLimitSample( LimitLocation const & coords, - CpuEvalLimitContext * context, - unsigned int index ) const { - typedef Far::PatchDescriptor Desc; - - float s=coords.s, - t=coords.t; - - Far::PatchMap::Handle const * handle = context->GetPatchMap().FindPatch( coords.ptexIndex, s, t ); - if (not handle) { - return 0; // no handle if there is a hole or 'coord' is incorrect - } - - VertexData const & vertexData = _currentBindState.vertexData; - - Far::PatchTables const & ptables = context->GetPatchTables(); - - Far::PatchParam pparam = ptables.GetPatchParam(*handle); - - Far::PatchDescriptor desc = ptables.GetPatchDescriptor(*handle); - - Far::ConstIndexArray cvs = ptables.GetPatchVertices(*handle); - - if (vertexData.in) { - - int offset = vertexData.outDesc.stride * index, - doffset = vertexData.outDesc.length * index; - - if (vertexData.out) { - - // note : don't apply outDesc.offset here, it's done inside patch - // evaluation - float * out = vertexData.out+offset, - * outDu = vertexData.outDu ? vertexData.outDu+doffset : 0, - * outDv = vertexData.outDv ? vertexData.outDv+doffset : 0; - - switch (desc.GetType()) { - case Desc::REGULAR : evalBSpline( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - case Desc::BOUNDARY : evalBoundary( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - case Desc::CORNER : evalCorner( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - case Desc::GREGORY : evalGregory( pparam.bitField, t, s, cvs.begin(), - &ptables.GetVertexValenceTable()[0], - ptables.GetPatchQuadOffsets(*handle).begin(), - ptables.GetMaxValence(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - case Desc::GREGORY_BOUNDARY : evalGregoryBoundary( pparam.bitField, t, s, cvs.begin(), - &ptables.GetVertexValenceTable()[0], - ptables.GetPatchQuadOffsets(*handle).begin(), - ptables.GetMaxValence(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - case Desc::GREGORY_BASIS : { - evalGregoryBasis( pparam.bitField, s, t, - cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - } break; - case Desc::QUADS : evalBilinear( pparam.bitField, s, t, cvs.begin(), - vertexData.inDesc, - vertexData.in, - vertexData.outDesc, - out, outDu, outDv ); - break; - default: - assert(0); - } - } - } - - VaryingData const & varyingData = _currentBindState.varyingData; - - if (varyingData.in and varyingData.out) { - - static int const zeroRings[6][4] = { {5, 6,10, 9}, // regular - {1, 2, 6, 5}, // boundary / single-crease - {1, 2, 5, 4}, // corner - {0, 1, 2, 3}, // no permutation - {0, 5, 10, 15} }; // gregory basis - - int const * permute = 0; - switch (desc.GetType()) { - case Desc::REGULAR : permute = zeroRings[0]; break; - case Desc::BOUNDARY : permute = zeroRings[1]; break; - case Desc::CORNER : permute = zeroRings[2]; break; - case Desc::GREGORY : - case Desc::GREGORY_BOUNDARY : permute = zeroRings[3]; break; - case Desc::GREGORY_BASIS : permute = zeroRings[4]; break; - default: - assert(0); - }; - - int offset = varyingData.outDesc.stride * index; - - Far::Index zeroRing[4] = { cvs[permute[0]], - cvs[permute[1]], - cvs[permute[2]], - cvs[permute[3]] }; - - evalBilinear( pparam.bitField, s, t, zeroRing, - varyingData.inDesc, - varyingData.in, - varyingData.outDesc, - varyingData.out+offset, 0, 0); - - } - - // Note : currently we only support bilinear boundary interpolation rules - // for limit face-varying data. - - FacevaryingData const & facevaryingData = _currentBindState.facevaryingData; - - if (facevaryingData.in and facevaryingData.out) { - - int offset = facevaryingData.outDesc.stride * index; - - static int const zeroRing[4] = {0,1,2,3}; - - // XXXX manuelk this assumes FVar data is ordered with 4 CVs / patch : - // bi-cubic FVar interpolation will require proper topology - // accessors in Far::PatchTables and this code will change - evalBilinear( pparam.bitField, s, t, zeroRing, - facevaryingData.inDesc, - &facevaryingData.in[handle->patchIndex*4*facevaryingData.outDesc.stride], - facevaryingData.outDesc, - facevaryingData.out+offset, 0, 0); - - } - return 1; -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuEvalLimitController.h b/opensubdiv/osd/cpuEvalLimitController.h deleted file mode 100644 index 432fe8ba..00000000 --- a/opensubdiv/osd/cpuEvalLimitController.h +++ /dev/null @@ -1,298 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CPU_EVAL_LIMIT_CONTROLLER_H -#define OSD_CPU_EVAL_LIMIT_CONTROLLER_H - -#include "../version.h" - -#include "../osd/vertexDescriptor.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -struct LimitLocation; -class CpuEvalLimitContext; - -/// \brief CPU controler for limit surface evaluation. -/// -/// A CPU-driven controller that can be called to evaluate samples on the limit -/// surface for a given EvalContext. -/// -/// Warning : this eval controller is re-entrant but it breaks the Osd API pattern -/// by requiring client code to bind and unbind the data buffers to the -/// Controller before calling evaluation methods. -/// -/// Ex : -/// \code -/// evalCtroller->BindVertexBuffers( ... ); -/// evalCtroller->BindVaryingBuffers( ... ); -/// evalCtroller->BindFacevaryingBuffers( ... ); -/// -/// parallel_for( int index=0; iEvalLimitSample( coord, evalCtxt, index ); -/// } -/// -/// evalCtroller->Unbind(); -/// \endcode -/// -class CpuEvalLimitController { - -public: - /// Constructor. - CpuEvalLimitController(); - - /// Destructor. - ~CpuEvalLimitController(); - - /// \brief Binds control vertex data buffer - /// - /// @param iDesc data descriptor shared by all input data buffers - /// - /// @param inQ input vertex data - /// - /// @param oDesc data descriptor for the outQ data buffer - /// -- derivative buffers do not have a descriptor and - /// cannot be offset or padded with a stride (yet ?) - /// - /// @param outQ output vertex data - /// - /// @param outdQu output derivative along "u" of the vertex data (optional) - /// - /// @param outdQv output derivative along "v" of the vertex data (optional) - /// - template - void BindVertexBuffers( VertexBufferDescriptor const & iDesc, INPUT_BUFFER *inQ, - VertexBufferDescriptor const & oDesc, OUTPUT_BUFFER *outQ, - OUTPUT_BUFFER *outdQu=0, - OUTPUT_BUFFER *outdQv=0 ) { - _currentBindState.vertexData.inDesc = iDesc; - _currentBindState.vertexData.in = inQ ? inQ->BindCpuBuffer() : 0; - - _currentBindState.vertexData.outDesc = oDesc; - _currentBindState.vertexData.out = outQ ? outQ->BindCpuBuffer() : 0; - _currentBindState.vertexData.outDu = outdQu ? outdQu->BindCpuBuffer() : 0; - _currentBindState.vertexData.outDv = outdQv ? outdQv->BindCpuBuffer() : 0; - } - - /// \brief Binds the varying-interpolated data streams - /// - /// @param iDesc data descriptor shared by all input data buffers - /// - /// @param inQ input varying data - /// - /// @param oDesc data descriptor for the outQ data buffer - /// - /// @param outQ output varying data - /// - template - void BindVaryingBuffers( VertexBufferDescriptor const & iDesc, INPUT_BUFFER *inQ, - VertexBufferDescriptor const & oDesc, OUTPUT_BUFFER *outQ ) { - _currentBindState.varyingData.inDesc = iDesc; - _currentBindState.varyingData.in = inQ ? inQ->BindCpuBuffer() : 0; - - _currentBindState.varyingData.outDesc = oDesc; - _currentBindState.varyingData.out = outQ ? outQ->BindCpuBuffer() : 0; - } - - /// \brief Binds the face-varying-interpolated data streams - /// - /// Note : currently we only support bilinear boundary interpolation rules - /// for face-varying data. Although Hbr supports 3 addition smooth rule sets, - /// the feature-adaptive patch interpolation code currently does not support - /// them, and neither does this EvalContext - /// - /// @param iDesc data descriptor shared by all input data buffers - /// - /// @param inQ input face-varying data - /// - /// @param oDesc data descriptor for the outQ data buffer - /// - /// @param outQ output face-varying data - /// - template - void BindFacevaryingBuffers( VertexBufferDescriptor const & iDesc, INPUT_BUFFER *inQ, - VertexBufferDescriptor const & oDesc, OUTPUT_BUFFER *outQ ) { - _currentBindState.facevaryingData.inDesc = iDesc; - _currentBindState.facevaryingData.in = inQ ? inQ->BindCpuBuffer() : 0; - - _currentBindState.facevaryingData.outDesc = oDesc; - _currentBindState.facevaryingData.out = outQ ? outQ->BindCpuBuffer() : 0; - } - - /// \brief Vertex interpolation of a single sample at the limit - /// - /// Evaluates "vertex" interpolation of a single sample on the surface limit. - /// - /// This function is re-entrant but does not require binding the - /// output vertex buffers. Pointers to memory where the data is - /// output are explicitly passed to the function. - /// - /// @param coord location on the limit surface to be evaluated - /// - /// @param context the EvalLimitContext that the controller will evaluate - /// - /// @param outDesc data descriptor for the outQ data buffer - /// -- derivative buffers do not have a descriptor and - /// cannot be offset or padded with a stride (yet ?) - /// - /// @param outQ output vertex data - /// - /// @param outDQU output derivative along "u" of the vertex data (optional) - /// - /// @param outDQV output derivative along "v" of the vertex data (optional) - /// - /// @return 1 if the sample was found - /// - int EvalLimitSample( LimitLocation const & coord, - CpuEvalLimitContext * context, - VertexBufferDescriptor const & outDesc, - float * outQ, - float * outDQU, - float * outDQV ) const; - - /// \brief Vertex interpolation of samples at the limit - /// - /// Evaluates "vertex" interpolation of a sample on the surface limit. - /// - /// @param coords location on the limit surface to be evaluated - /// - /// @param context the EvalLimitContext that the controller will evaluate - /// - /// @param index the index of the vertex in the output buffers bound to the - /// context - /// - /// @return the number of samples found (0 if the location was tagged as a hole - /// or the coordinate was invalid) - /// - int EvalLimitSample( LimitLocation const & coords, - CpuEvalLimitContext * context, - unsigned int index ) const { - if (not context) - return 0; - - int n = _EvalLimitSample( coords, context, index ); - - return n; - } - - void Unbind() { - _currentBindState.Reset(); - } - -protected: - - - // Vertex interpolated streams - struct VertexData { - - VertexData() : in(0), out(0), outDu(0), outDv(0) { } - - - void Reset() { - in = out = outDu = outDv = NULL; - inDesc.Reset(); - outDesc.Reset(); - } - - VertexBufferDescriptor inDesc, - outDesc; - float * in, - * out, - * outDu, - * outDv; - }; - - // Varying interpolated streams - struct VaryingData { - - VaryingData() : in(0), out(0) { } - - - void Reset() { - in = out = NULL; - inDesc.Reset(); - outDesc.Reset(); - } - - VertexBufferDescriptor inDesc, - outDesc; - float * in, - * out; - }; - - // Facevarying interpolated streams - struct FacevaryingData { - - FacevaryingData() : in(0), out(0) { } - - void Reset() { - in = out = NULL; - inDesc.Reset(); - outDesc.Reset(); - } - - VertexBufferDescriptor inDesc, - outDesc; - float * in, - * out; - }; - - -private: - - int _EvalLimitSample( LimitLocation const & coords, - CpuEvalLimitContext * context, - unsigned int index ) const; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of vertex buffers. - struct BindState { - - BindState() { } - - void Reset() { - vertexData.Reset(); - varyingData.Reset(); - facevaryingData.Reset(); - } - - VertexData vertexData; // vertex interpolated data descriptor - VaryingData varyingData; // varying interpolated data descriptor - FacevaryingData facevaryingData; // face-varying interpolated data descriptor - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif /* OSD_CPU_EVAL_LIMIT_CONTROLLER_H */ diff --git a/opensubdiv/osd/cpuEvalLimitKernel.cpp b/opensubdiv/osd/cpuEvalLimitKernel.cpp deleted file mode 100644 index 7f8488dc..00000000 --- a/opensubdiv/osd/cpuEvalLimitKernel.cpp +++ /dev/null @@ -1,1131 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/cpuEvalLimitKernel.h" -#include "../far/interpolate.h" -#include "../far/stencilTables.h" - -#include -#include -#include -#include -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -#ifdef TENSOR_PRODUCT_CUBIC_SPLINES - -// manuelk code was refactored to use the matrix formulation of cubic splines -// exposed in Far::PatchTables for consistency. I am keeping these temporarily -// for reference. - -inline void -cubicBezier(float u, float B[4], float BU[3]) { - float u2 = u*u, - w0 = 1.0f - u, - w2 = w0 * w0; - - B[0] = w0*w2; - B[1] = 3.0f * u * w2; - B[2] = 3.0f * u2 * w0; - B[3] = u*u2; - - if (BU) { - BU[0] = w2; - BU[1] = 2.0f * u * w0; - BU[2] = u2; - } -} - -inline void -cubicBSpline(float u, float B[4], float BU[4]) { - float t = u; - float s = 1.0f - u; - - float A0 = s * (0.5f * s); - float A1 = t * (s + 0.5f * t) + s * (0.5f * s + t); - float A2 = t * ( 0.5f * t); - - B[0] = 1.f/3.f * s * A0; - B[1] = (2.f/3.f * s + t) * A0 + (2.f/3.f * s + 1.f/3.f * t) * A1; - B[2] = (1.f/3.f * s + 2.f/3.f * t) * A1 + ( s + 2.f/3.f * t) * A2; - B[3] = 1.f/3.f * t * A2; - - if (BU) { - BU[0] = - A0; - BU[1] = A0 - A1; - BU[2] = A1 - A2; - BU[3] = A2; - } -} - -inline void -univar4x4(float u, float B[4], float D[4]) { - - float t = u; - float s = 1.0f - u; - - float A0 = s * s; - float A1 = 2 * s * t; - float A2 = t * t; - - B[0] = s * A0; - B[1] = t * A0 + s * A1; - B[2] = t * A1 + s * A2; - B[3] = t * A2; - - if (D) { - D[0] = - A0; - D[1] = A0 - A1; - D[2] = A1 - A2; - D[3] = A2; - } -} - -#endif - -void -evalBilinear(Far::PatchParam::BitField bits, - float s, float t, - Far::Index const * vertexIndices, - VertexBufferDescriptor const & inDesc, - float const * inQ, - VertexBufferDescriptor const & outDesc, - float * outQ, - float * outDQ1, - float * outDQ2) { - - assert( outQ and inDesc.length <= (outDesc.stride-outDesc.offset) ); - - float Q[4], dQ1[4], dQ2[4]; - Far::GetBilinearWeights(bits, s, t, outQ ? Q:0, outDQ1 ? dQ1:0, outDQ2 ? dQ2:0); - - float const * inOffset = inQ + inDesc.offset; - - outQ += outDesc.offset; - - memset(outQ, 0, inDesc.length*sizeof(float)); - if (outDQ1) { - memset(outDQ1, 0, inDesc.length*sizeof(float)); - } - if (outDQ2) { - memset(outDQ2, 0, inDesc.length*sizeof(float)); - } - - for (int i=0; i<4; ++i) { - - float const * in = inOffset + vertexIndices[i]*inDesc.stride; - - for (int k=0; k= 0) - ? inOffset + vertexIndices[index]*inDesc.stride - : &CP[(-index-1)*inDesc.length]; - - for (int k=0; k2) { - for (int k=0; k0); - float const * diagonal = inOffset + idx_diagonal * inDesc.stride; - - for (int j=0; j0 and idx_diagonal>0 ); - - float const * neighbor = inOffset + idx_neighbor * inDesc.stride; - diagonal = inOffset + idx_diagonal * inDesc.stride; - - for (int j=0; j 2) { - float s1 = 3.0f - 2.0f*csf(n-3,2)-csf(np-3,2), - s2 = 2.0f*csf(n-3,2), - s3 = 3.0f -2.0f*cosf(2.0f*float(M_PI)/float(n)) - cosf(2.0f*float(M_PI)/float(nm)); - - for (int k=0, ofs=vofs; k { - -public: - /// \brief Creates an CpuEvalStencilsContext instance - /// - /// @param stencils a pointer to the Far::StencilTables - /// - static CpuEvalStencilsContext * Create(Far::LimitStencilTables const *stencils); - - /// \brief Returns the Far::StencilTables applied - Far::LimitStencilTables const * GetStencilTables() const { - return _stencils; - } - -protected: - - CpuEvalStencilsContext(Far::LimitStencilTables const *stencils); - -private: - - Far::LimitStencilTables const * _stencils; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // FAR_CPU_EVALSTENCILS_CONTEXT_H diff --git a/opensubdiv/osd/cpuEvalStencilsController.cpp b/opensubdiv/osd/cpuEvalStencilsController.cpp deleted file mode 100644 index 3ba86929..00000000 --- a/opensubdiv/osd/cpuEvalStencilsController.cpp +++ /dev/null @@ -1,149 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/cpuEvalStencilsController.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -CpuEvalStencilsController::CpuEvalStencilsController() { -} - -CpuEvalStencilsController::~CpuEvalStencilsController() { -} - -int -CpuEvalStencilsController::_UpdateValues( CpuEvalStencilsContext * context ) { - - int result=0; - - Far::LimitStencilTables const * stencils = context->GetStencilTables(); - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return result; - - VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc, - outDesc = _currentBindState.outputDataDesc; - - // make sure that we have control data to work with - if (not ctrlDesc.CanEval(outDesc)) - return 0; - - float const * ctrl = _currentBindState.controlData + ctrlDesc.offset; - - float * out = _currentBindState.outputData + outDesc.offset; - - if ((not ctrl) or (not out)) - return result; - - unsigned char const * sizes = &stencils->GetSizes().at(0); - Far::Index const * index = &stencils->GetControlIndices().at(0); - - float const * weight = &stencils->GetWeights().at(0); - - for (int i=0; iGetStencilTables(); - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return result; - - VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc, - duDesc = _currentBindState.outputDuDesc, - dvDesc = _currentBindState.outputDvDesc; - - // make sure that we have control data to work with - if (not (ctrlDesc.CanEval(duDesc) and ctrlDesc.CanEval(dvDesc))) - return 0; - - float const * ctrl = _currentBindState.controlData + ctrlDesc.offset; - - float * du = _currentBindState.outputUDeriv + duDesc.offset, - * dv = _currentBindState.outputVDeriv + dvDesc.offset; - - if ((not ctrl) or (not du) or (not dv)) - return result; - - unsigned char const * sizes = &stencils->GetSizes().at(0); - Far::Index const * index = &stencils->GetControlIndices().at(0); - - float const * duweight = &stencils->GetDuWeights().at(0), - * dvweight = &stencils->GetDvWeights().at(0); - - for (int i=0; i - int UpdateValues( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDataDesc, OUTPUT_BUFFER *outputData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputData( outputDataDesc, outputData ); - - int n = _UpdateValues( context ); - - unbind(); - - return n; - } - - /// \brief Applies derivative stencil weights to the control vertex data - /// - /// Computes the U and V derivative stencils to the control vertex data at - /// the parametric locations contained in each stencil - /// - /// @param context the CpuEvalStencilsContext with the stencil weights - /// - /// @param controlDataDesc vertex buffer descriptor for the control vertex data - /// - /// @param controlVertices vertex buffer with the control vertices data - /// - /// @param outputDuDesc vertex buffer descriptor for the U derivative output data - /// - /// @param outputDuData output vertex buffer for the U derivative data - /// - /// @param outputDvDesc vertex buffer descriptor for the V deriv output data - /// - /// @param outputDvData output vertex buffer for the V derivative data - /// - template - int UpdateDerivs( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDuDesc, OUTPUT_BUFFER *outputDuData, - VertexBufferDescriptor const & outputDvDesc, OUTPUT_BUFFER *outputDvData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputDerivData( outputDuDesc, outputDuData, outputDvDesc, outputDvData ); - - int n = _UpdateDerivs( context ); - - unbind(); - - return n; - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - /// \brief Binds control vertex data buffer - template - void bindControlData(VertexBufferDescriptor const & controlDataDesc, VERTEX_BUFFER *controlData ) { - - _currentBindState.controlData = controlData ? controlData->BindCpuBuffer() : 0; - _currentBindState.controlDataDesc = controlDataDesc; - - } - - /// \brief Binds output vertex data buffer - template - void bindOutputData( VertexBufferDescriptor const & outputDataDesc, VERTEX_BUFFER *outputData ) { - - _currentBindState.outputData = outputData ? outputData->BindCpuBuffer() : 0; - _currentBindState.outputDataDesc = outputDataDesc; - } - - /// \brief Binds output derivative vertex data buffer - template - void bindOutputDerivData( VertexBufferDescriptor const & outputDuDesc, VERTEX_BUFFER *outputDu, - VertexBufferDescriptor const & outputDvDesc, VERTEX_BUFFER *outputDv ) { - - _currentBindState.outputUDeriv = outputDu ? outputDu ->BindCpuBuffer() : 0; - _currentBindState.outputVDeriv = outputDv ? outputDv->BindCpuBuffer() : 0; - _currentBindState.outputDuDesc = outputDuDesc; - _currentBindState.outputDvDesc = outputDvDesc; - } - - /// \brief Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - } - -private: - - int _UpdateValues( CpuEvalStencilsContext * context ); - int _UpdateDerivs( CpuEvalStencilsContext * context ); - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of vertex buffers. - struct BindState { - - BindState() : controlData(0), outputData(0), outputUDeriv(0), outputVDeriv(0) { } - - void Reset() { - controlData = outputData = outputUDeriv = outputVDeriv = NULL; - controlDataDesc.Reset(); - outputDataDesc.Reset(); - outputDuDesc.Reset(); - outputDvDesc.Reset(); - } - - // transient mesh data - VertexBufferDescriptor controlDataDesc, - outputDataDesc, - outputDuDesc, - outputDvDesc; - - float * controlData, - * outputData, - * outputUDeriv, - * outputVDeriv; - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // FAR_CPU_EVALSTENCILS_CONTROLLER_H diff --git a/opensubdiv/osd/cpuEvaluator.cpp b/opensubdiv/osd/cpuEvaluator.cpp new file mode 100644 index 00000000..e42e8a3e --- /dev/null +++ b/opensubdiv/osd/cpuEvaluator.cpp @@ -0,0 +1,148 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/cpuEvaluator.h" +#include "../osd/cpuKernel.h" + +#include + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +/* static */ +bool +CpuEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end) { + if (end <= start) return true; + if (srcDesc.length != dstDesc.length) return false; + + // XXX: we can probably expand cpuKernel.cpp to here. + CpuEvalStencils(src, srcDesc, dst, dstDesc, + sizes, offsets, indices, weights, start, end); + + return true; +} + +/* static */ +bool +CpuEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + float *dstDu, + VertexBufferDescriptor const &dstDuDesc, + float *dstDv, + VertexBufferDescriptor const &dstDvDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, int end) { + if (end <= start) return true; + if (srcDesc.length != dstDesc.length) return false; + if (srcDesc.length != dstDuDesc.length) return false; + if (srcDesc.length != dstDvDesc.length) return false; + + CpuEvalStencils(src, srcDesc, + dst, dstDesc, + dstDu, dstDuDesc, + dstDv, dstDvDesc, + sizes, offsets, indices, + weights, duWeights, dvWeights, + start, end); + + return true; +} + +template +struct BufferAdapter { + BufferAdapter(T *p, int length, int stride) : + _p(p), _length(length), _stride(stride) { } + void Clear() { + for (int i = 0; i < _length; ++i) _p[i] = 0; + } + void AddWithWeight(T const *src, float w, float wu, float wv) { + (void)wu; + (void)wv; + // TODO: derivatives. + for (int i = 0; i < _length; ++i) { + _p[i] += src[i] * w; + } + } + const T *operator[] (int index) const { + return _p + _stride * index; + } + BufferAdapter & operator ++() { + _p += _stride; + return *this; + } + + T *_p; + int _length; + int _stride; +}; + +/* static */ +int +CpuEvaluator::EvalPatches(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + PatchCoordArray const &patchCoords, + Far::PatchTables const *patchTable) { + src += srcDesc.offset; + dst += dstDesc.offset; + int count = 0; + + // XXX: this implementaion is temporary. + BufferAdapter srcT(src, srcDesc.length, srcDesc.stride); + BufferAdapter dstT(dst, dstDesc.length, dstDesc.stride); + + for (size_t i = 0; i < patchCoords.size(); ++i) { + PatchCoord const &coords = patchCoords[i]; + + patchTable->Evaluate(coords.handle, coords.s, coords.t, + srcT, dstT); + ++count; + ++dstT; + } + return count; +} + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cpuEvaluator.h b/opensubdiv/osd/cpuEvaluator.h new file mode 100644 index 00000000..13a7ed86 --- /dev/null +++ b/opensubdiv/osd/cpuEvaluator.h @@ -0,0 +1,242 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_CPU_EVALUATOR_H +#define OPENSUBDIV_OSD_CPU_EVALUATOR_H + +#include "../version.h" + +#include +#include +#include "../osd/vertexDescriptor.h" +#include "../far/patchTables.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +/// \brief Coordinates set on a patch table +/// XXX: this is a temporary structure, exists during Osd refactoring work. +/// +struct PatchCoord { + /// \brief Constructor + /// + /// @param p patch handle + /// + /// @param s parametric location on the patch + /// + /// @param t parametric location on the patch + /// + PatchCoord(Far::PatchTables::PatchHandle handle, float s, float t) : + handle(handle), s(s), t(t) { } + + Far::PatchTables::PatchHandle handle; ///< patch handle + float s, t; ///< parametric location on patch +}; + +typedef std::vector PatchCoordArray; + + +class CpuEvaluator { +public: + /// \brief Generic static eval stencils function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. + /// + /// @param instance not used in the cpu kernel + /// (declared as a typed pointer to prevent + /// undesirable template resolution) + /// + /// @param deviceContext not used in the cpu kernel + /// + template + static bool EvalStencils(SRC_BUFFER *srcBuffer, + VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + const CpuEvaluator *instance = NULL, + void * deviceContext = NULL) { + (void)instance; // unused + (void)deviceContext; // unused + + return EvalStencils(srcBuffer->BindCpuBuffer(), + srcDesc, + dstBuffer->BindCpuBuffer(), + dstDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + /// stencil evaluate function. + static bool EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end); + + template + static bool EvalStencils(SRC_BUFFER *srcBuffer, + VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + VertexBufferDescriptor const &dstDesc, + DST_BUFFER *dstDuBuffer, + VertexBufferDescriptor const &dstDuDesc, + DST_BUFFER *dstDvBuffer, + VertexBufferDescriptor const &dstDvDesc, + STENCIL_TABLE const *stencilTable, + const CpuEvaluator *evaluator = NULL, + void * deviceContext = NULL) { + (void)evaluator; // unused + (void)deviceContext; // unused + + return EvalStencils(srcBuffer->BindCpuBuffer(), + srcDesc, + dstBuffer->BindCpuBuffer(), + dstDesc, + dstDuBuffer->BindCpuBuffer(), + dstDuDesc, + dstDvBuffer->BindCpuBuffer(), + dstDvDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + &stencilTable->GetDuWeights()[0], + &stencilTable->GetDvWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + static bool EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + float *dstDu, + VertexBufferDescriptor const &dstDuDesc, + float *dstDv, + VertexBufferDescriptor const &dstDvDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + const float * duWeights, + const float * dvWeights, + int start, + int end); + + /// \brief Generic limit eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently. + /// + /// XXX: This interface is still work in progress. XXX + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param patchCoord array of locations to be evaluated. + /// + /// @param patchTable Far::PatchTable + /// + /// @param instanced not used in the cpu evaluator + /// + /// @param deviceContext not used in the cpu evaluator + /// + template + static int EvalPatches(SRC_BUFFER *srcBuffer, + VertexBufferDescriptor const &srcDesc, + DST_BUFFER *dstBuffer, + VertexBufferDescriptor const &dstDesc, + PatchCoordArray const &patchCoords, + Far::PatchTables const *patchTable, + CpuEvaluator const *instance, + void * deviceContext = NULL) { + (void)instance; // unused + (void)deviceContext; // unused + + return EvalPatches(srcBuffer->BindCpuBuffer(), + srcDesc, + dstBuffer->BindCpuBuffer(), + dstDesc, + patchCoords, + patchTable); + } + + /// \brief limit eval function. + static int EvalPatches(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + PatchCoordArray const &patchCoords, + Far::PatchTables const *patchTable); + + /// \brief synchronize all asynchronous computation invoked on this device. + static void Synchronize(void * /*deviceContext = NULL*/) { + // nothing. + } +}; + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_CPU_EVALUATOR_H diff --git a/opensubdiv/osd/cpuKernel.cpp b/opensubdiv/osd/cpuKernel.cpp index 87f0abb5..1a7baddf 100644 --- a/opensubdiv/osd/cpuKernel.cpp +++ b/opensubdiv/osd/cpuKernel.cpp @@ -70,14 +70,15 @@ copy(float *dst, int dstIndex, const float *src, } void -CpuComputeStencils(VertexBufferDescriptor const &vertexDesc, - float const * vertexSrc, - float * vertexDst, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end) { +CpuEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end) { assert(start>=0 and start(vertexSrc, vertexDst, + ComputeStencilKernel<4>(src, dst, sizes, indices, weights, start, end); - } else if(vertexDesc.length==8 and vertexDesc.stride==8) { + } else if (srcDesc.length == 8 and dstDesc.length == 8 and + srcDesc.stride == 8 and dstDesc.stride == 8) { // SIMD fast path for aligned primvar data (8 floats) - ComputeStencilKernel<8>(vertexSrc, vertexDst, + ComputeStencilKernel<8>(src, dst, sizes, indices, weights, start, end); - } - else { + } else { // Slow path for non-aligned data - float * result = (float*)alloca(vertexDesc.length * sizeof(float)); + + float * result = (float*)alloca(srcDesc.length * sizeof(float)); int nstencils = end-start; for (int i=0; i 0) { + sizes += start; + indices += offsets[start]; + weights += offsets[start]; + duWeights += offsets[start]; + dvWeights += offsets[start]; + } + + src += srcDesc.offset; + dst += dstDesc.offset; + dstDu += dstDuDesc.offset; + dstDv += dstDvDesc.offset; + + int nOutLength = dstDesc.length + dstDuDesc.length + dstDvDesc.length; + float * result = (float*)alloca(nOutLength * sizeof(float)); + float * resultDu = result + dstDesc.length; + float * resultDv = resultDu + dstDuDesc.length; + + int nStencils = end - start; + for (int i = 0; i < nStencils; ++i, ++sizes) { + + // clear + memset(result, 0, nOutLength * sizeof(float)); + + for (int j=0; j<*sizes; ++j) { + addWithWeight(result, src, *indices, *weights++, srcDesc); + addWithWeight(resultDu, src, *indices, *duWeights++, srcDesc); + addWithWeight(resultDv, src, *indices, *dvWeights++, srcDesc); + ++indices; + } + copy(dst, i, result, dstDesc); + copy(dstDu, i, resultDu, dstDuDesc); + copy(dstDv, i, resultDv, dstDvDesc); + } +} + } // end namespace Osd } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/cpuKernel.h b/opensubdiv/osd/cpuKernel.h index e4141b21..9b152a6a 100644 --- a/opensubdiv/osd/cpuKernel.h +++ b/opensubdiv/osd/cpuKernel.h @@ -22,31 +22,46 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_CPU_KERNEL_H -#define OSD_CPU_KERNEL_H +#ifndef OPENSUBDIV_OSD_CPU_KERNEL_H +#define OPENSUBDIV_OSD_CPU_KERNEL_H #include "../version.h" - -#include "../osd/vertexDescriptor.h" +#include namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Osd { -struct VertexDescriptor; - - +struct VertexBufferDescriptor; void -CpuComputeStencils(VertexBufferDescriptor const &vertexDesc, - float const * vertexSrc, - float * vertexDst, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end); +CpuEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end); + +void +CpuEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + float * dstDu, + VertexBufferDescriptor const &dstDuDesc, + float * dstDv, + VertexBufferDescriptor const &dstDvDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + float const * duWeights, + float const * dvWeights, + int start, int end); // // SIMD ICC optimization of the stencil kernel diff --git a/opensubdiv/osd/cpuSmoothNormalContext.h b/opensubdiv/osd/cpuSmoothNormalContext.h index da2d19c4..9b717aff 100644 --- a/opensubdiv/osd/cpuSmoothNormalContext.h +++ b/opensubdiv/osd/cpuSmoothNormalContext.h @@ -29,7 +29,6 @@ #include "../osd/nonCopyable.h" #include "../osd/vertexDescriptor.h" -#include "../osd/vertex.h" #include "../far/types.h" #include diff --git a/opensubdiv/osd/cpuVertexBuffer.h b/opensubdiv/osd/cpuVertexBuffer.h index 20b9a9ed..bc85f51e 100644 --- a/opensubdiv/osd/cpuVertexBuffer.h +++ b/opensubdiv/osd/cpuVertexBuffer.h @@ -37,7 +37,7 @@ namespace Osd { /// \brief Concrete vertex buffer class for cpu subvision. /// /// CpuVertexBuffer implements the VertexBufferInterface. An instance -/// of this buffer class can be passed to CpuComputeController +/// of this buffer class can be passed to CpuEvaluator /// class CpuVertexBuffer { public: diff --git a/opensubdiv/osd/cudaComputeContext.cpp b/opensubdiv/osd/cudaComputeContext.cpp deleted file mode 100644 index cdd94d4f..00000000 --- a/opensubdiv/osd/cudaComputeContext.cpp +++ /dev/null @@ -1,227 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" - -#include "../osd/cudaComputeContext.h" - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// ---------------------------------------------------------------------------- - -template void * -createCudaBuffer(std::vector const & src) { - - void * devicePtr = 0; - - size_t size = src.size()*sizeof(T); - - cudaError_t err = cudaMalloc(&devicePtr, size); - if (err != cudaSuccess) { - return devicePtr; - } - - err = cudaMemcpy(devicePtr, &src.at(0), size, cudaMemcpyHostToDevice); - if (err != cudaSuccess) { - cudaFree(devicePtr); - return 0; - } - return devicePtr; -} - -// ---------------------------------------------------------------------------- - -class CudaComputeContext::CudaStencilTables { - -public: - explicit CudaStencilTables(Far::StencilTables const & stencilTables) { - _numStencils = stencilTables.GetNumStencils(); - if (_numStencils > 0) { - _sizes = createCudaBuffer(stencilTables.GetSizes()); - _offsets = createCudaBuffer(stencilTables.GetOffsets()); - _indices = createCudaBuffer(stencilTables.GetControlIndices()); - _weights = createCudaBuffer(stencilTables.GetWeights()); - } else { - _sizes = _offsets = _indices = _weights = NULL; - } - } - - ~CudaStencilTables() { - if (_sizes) { cudaFree(_sizes); } - if (_offsets) { cudaFree(_offsets); } - if (_indices) { cudaFree(_indices); } - if (_weights) { cudaFree(_weights); } - } - - bool IsValid() const { - return _sizes and _offsets and _indices and _weights; - } - - void * GetSizes() const { - return _sizes; - } - - void * GetOffsets() const { - return _offsets; - } - - void * GetIndices() const { - return _indices; - } - - void * GetWeights() const { - return _weights; - } - - int GetNumStencils() const { - return _numStencils; - } - -private: - void * _sizes, - * _offsets, - * _indices, - * _weights; - int _numStencils; -}; - -// ---------------------------------------------------------------------------- - -CudaComputeContext::CudaComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables) : - _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0) { - - if (vertexStencilTables) { - _vertexStencilTables = new CudaStencilTables(*vertexStencilTables); - _numControlVertices = vertexStencilTables->GetNumControlVertices(); - } - - if (varyingStencilTables) { - _varyingStencilTables = new CudaStencilTables(*varyingStencilTables); - - if (_numControlVertices) { - assert(_numControlVertices == - varyingStencilTables->GetNumControlVertices()); - } else { - _numControlVertices = varyingStencilTables->GetNumControlVertices(); - } - } -} - -CudaComputeContext::~CudaComputeContext() { - delete _vertexStencilTables; - delete _varyingStencilTables; -} - -// ---------------------------------------------------------------------------- - -bool -CudaComputeContext::HasVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->IsValid() : false; -} - -bool -CudaComputeContext::HasVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; -} - -int -CudaComputeContext::GetNumStencilsInVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; -} - -int -CudaComputeContext::GetNumStencilsInVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; -} - -// ---------------------------------------------------------------------------- - -void * -CudaComputeContext::GetVertexStencilTablesSizes() const { - return _vertexStencilTables ? _vertexStencilTables->GetSizes() : 0; -} - -void * -CudaComputeContext::GetVertexStencilTablesOffsets() const { - return _vertexStencilTables ? _vertexStencilTables->GetOffsets() : 0; -} - -void * -CudaComputeContext::GetVertexStencilTablesIndices() const { - return _vertexStencilTables ? _vertexStencilTables->GetIndices() : 0; -} - -void * -CudaComputeContext::GetVertexStencilTablesWeights() const { - return _vertexStencilTables ? _vertexStencilTables->GetWeights() : 0; -} - -// ---------------------------------------------------------------------------- - -void * -CudaComputeContext::GetVaryingStencilTablesSizes() const { - return _varyingStencilTables ? _varyingStencilTables->GetSizes() : 0; -} - -void * -CudaComputeContext::GetVaryingStencilTablesOffsets() const { - return _varyingStencilTables ? _varyingStencilTables->GetOffsets() : 0; -} - -void * -CudaComputeContext::GetVaryingStencilTablesIndices() const { - return _varyingStencilTables ? _varyingStencilTables->GetIndices() : 0; -} - -void * -CudaComputeContext::GetVaryingStencilTablesWeights() const { - return _varyingStencilTables ? _varyingStencilTables->GetWeights() : 0; -} - -// ---------------------------------------------------------------------------- - -CudaComputeContext * -CudaComputeContext::Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void * /*deviceContext*/) { - - CudaComputeContext *result = - new CudaComputeContext(vertexStencilTables, varyingStencilTables); - - return result; -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cudaComputeContext.h b/opensubdiv/osd/cudaComputeContext.h deleted file mode 100644 index 61ca7808..00000000 --- a/opensubdiv/osd/cudaComputeContext.h +++ /dev/null @@ -1,134 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CUDA_COMPUTE_CONTEXT_H -#define OSD_CUDA_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include - -#include "../osd/nonCopyable.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far { class StencilTables; } - -namespace Osd { - -/// -/// \brief CUDA Refine Context -/// -/// The CUDA implementation of the Refine module contextual functionality. -/// -/// Contexts interface the serialized topological data pertaining to the -/// geometric primitives with the capabilities of the selected discrete -/// compute device. -/// -class CudaComputeContext : public NonCopyable { - -public: - /// Creates an CudaComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param deviceContext (not used) - /// - static CudaComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void *deviceContext = NULL); - - /// Destructor - virtual ~CudaComputeContext(); - - /// Returns true if the Context has a 'vertex' interpolation stencil table - bool HasVertexStencilTables() const; - - /// Returns true if the Context has a 'varying' interpolation stencil table - bool HasVaryingStencilTables() const; - - /// Returns the number of control vertices - int GetNumControlVertices() const { - return _numControlVertices; - } - - /// Returns the number of stencils in vertex stencil tables - int GetNumStencilsInVertexStencilTables() const; - - /// Returns the number of stencils in varying stencil tables - int GetNumStencilsInVaryingStencilTables() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil sizes - void * GetVertexStencilTablesSizes() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil offsets - void * GetVertexStencilTablesOffsets() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil indices - void * GetVertexStencilTablesIndices() const; - - /// Returns the Cuda buffer containing vertex-stencil stencil weights - void * GetVertexStencilTablesWeights() const; - - - /// Returns the Cuda buffer containing Varying-stencil stencil sizes - void * GetVaryingStencilTablesSizes() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil offsets - void * GetVaryingStencilTablesOffsets() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil indices - void * GetVaryingStencilTablesIndices() const; - - /// Returns the Cuda buffer containing Varying-stencil stencil weights - void * GetVaryingStencilTablesWeights() const; - - -protected: - explicit CudaComputeContext(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables); - -private: - class CudaStencilTables; - - CudaStencilTables * _vertexStencilTables, - * _varyingStencilTables; - - int _numControlVertices; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_CUDA_COMPUTE_CONTEXT_H diff --git a/opensubdiv/osd/cudaComputeController.cpp b/opensubdiv/osd/cudaComputeController.cpp deleted file mode 100644 index b6a7760e..00000000 --- a/opensubdiv/osd/cudaComputeController.cpp +++ /dev/null @@ -1,118 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/cudaComputeController.h" - -#include -#include -#include - -extern "C" { - - void CudaComputeStencils(float const *src, float * dst, - int length, int stride, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end); - -} - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -void -CudaComputeController::ApplyStencilTableKernel( - ComputeContext const *context) const { - - assert(context); - - if (context->HasVertexStencilTables()) { - - int length = _currentBindState.vertexDesc.length, - stride = _currentBindState.vertexDesc.stride; - - int start = 0; - int end = context->GetNumStencilsInVertexStencilTables(); - - float const * src = _currentBindState.GetVertexBufferAtOffset(); - - float * dst = const_cast(src) + - context->GetNumControlVertices() * stride; - - if (end > start) { - CudaComputeStencils(src, dst, length, stride, - (unsigned char const *)context->GetVertexStencilTablesSizes(), - (int const *)context->GetVertexStencilTablesOffsets(), - (int const *)context->GetVertexStencilTablesIndices(), - (float const *)context->GetVertexStencilTablesWeights(), - start, - end); - } - } - - if (context->HasVaryingStencilTables()) { - - int length = _currentBindState.varyingDesc.length, - stride = _currentBindState.varyingDesc.stride; - - int start = 0; - int end = context->GetNumStencilsInVaryingStencilTables(); - - float const * src = _currentBindState.GetVaryingBufferAtOffset(); - - float * dst = const_cast(src) + - context->GetNumControlVertices() * stride; - - if (end > start) { - CudaComputeStencils(src, dst, length, stride, - (unsigned char const *)context->GetVaryingStencilTablesSizes(), - (int const *)context->GetVaryingStencilTablesOffsets(), - (int const *)context->GetVaryingStencilTablesIndices(), - (float const *)context->GetVaryingStencilTablesWeights(), - start, - end); - } - } -} - -CudaComputeController::CudaComputeController() { -} - -CudaComputeController::~CudaComputeController() { -} - -void -CudaComputeController::Synchronize() { - - cudaThreadSynchronize(); -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cudaComputeController.h b/opensubdiv/osd/cudaComputeController.h deleted file mode 100644 index 3309760d..00000000 --- a/opensubdiv/osd/cudaComputeController.h +++ /dev/null @@ -1,188 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_CUDA_COMPUTE_CONTROLLER_H -#define OSD_CUDA_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/cudaComputeContext.h" -#include "../osd/vertexDescriptor.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching CUDA subdivision kernels. -/// -/// CudaComputeController is a compute controller class to launch -/// Cuda subdivision kernels. It requires CudaVertexBufferInterface -/// as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class CudaComputeController { -public: - typedef CudaComputeContext ComputeContext; - - /// Constructor. - CudaComputeController(); - - /// Destructor. - ~CudaComputeController(); - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CudaContext to apply refinement operations to - /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( CudaComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - - ApplyStencilTableKernel(context); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CudaContext to apply refinement operations to - /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(CudaComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context) const; - - template - void bind( VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc, - VertexBufferDescriptor const * varyingDesc ) { - - // if the vertex buffer descriptor is specified, use it. - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (vertexDesc) { - _currentBindState.vertexDesc = *vertexDesc; - } else { - int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0; - _currentBindState.vertexDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - if (varyingDesc) { - _currentBindState.varyingDesc = *varyingDesc; - } else { - int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0; - _currentBindState.varyingDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.vertexBuffer = vertexBuffer ? - static_cast(vertexBuffer->BindCudaBuffer()) : 0; - _currentBindState.varyingBuffer = varyingBuffer ? - static_cast(varyingBuffer->BindCudaBuffer()) : 0; - } - - /// Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - } - -private: - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : vertexBuffer(NULL), varyingBuffer(NULL) {} - - void Reset() { - vertexBuffer = varyingBuffer = NULL; - vertexDesc.Reset(); - varyingDesc.Reset(); - } - - float *GetVertexBufferAtOffset() const { - return vertexBuffer ? vertexBuffer + vertexDesc.offset : 0; - } - - float *GetVaryingBufferAtOffset() const { - return varyingBuffer ? varyingBuffer + varyingDesc.offset : 0; - } - - float * vertexBuffer, // cuda buffers - * varyingBuffer; - - VertexBufferDescriptor vertexDesc, - varyingDesc; - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_CUDA_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/cudaEvaluator.cpp b/opensubdiv/osd/cudaEvaluator.cpp new file mode 100644 index 00000000..4ae450e2 --- /dev/null +++ b/opensubdiv/osd/cudaEvaluator.cpp @@ -0,0 +1,124 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/cudaEvaluator.h" + +#include +#include + +#include "../far/stencilTables.h" + +extern "C" { + void CudaEvalStencils(const float *src, + float *dst, + int length, + int srcStride, + int dstStride, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end); +} + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +template void * +createCudaBuffer(std::vector const & src) { + void * devicePtr = 0; + + size_t size = src.size()*sizeof(T); + + cudaError_t err = cudaMalloc(&devicePtr, size); + if (err != cudaSuccess) { + return devicePtr; + } + + err = cudaMemcpy(devicePtr, &src.at(0), size, cudaMemcpyHostToDevice); + if (err != cudaSuccess) { + cudaFree(devicePtr); + return 0; + } + return devicePtr; +} + +// ---------------------------------------------------------------------------- + +CudaStencilTables::CudaStencilTables(Far::StencilTables const *stencilTables) { + _numStencils = stencilTables->GetNumStencils(); + if (_numStencils > 0) { + _sizes = createCudaBuffer(stencilTables->GetSizes()); + _offsets = createCudaBuffer(stencilTables->GetOffsets()); + _indices = createCudaBuffer(stencilTables->GetControlIndices()); + _weights = createCudaBuffer(stencilTables->GetWeights()); + } else { + _sizes = _offsets = _indices = _weights = NULL; + } +} + +CudaStencilTables::~CudaStencilTables() { + if (_sizes) cudaFree(_sizes); + if (_offsets) cudaFree(_offsets); + if (_indices) cudaFree(_indices); + if (_weights) cudaFree(_weights); +} + +// --------------------------------------------------------------------------- + +/* static */ +bool +CudaEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end) { + CudaEvalStencils(src + srcDesc.offset, + dst + dstDesc.offset, + srcDesc.length, + srcDesc.stride, + dstDesc.stride, + sizes, offsets, indices, weights, + start, end); + return true; +} + +/* static */ +void +CudaEvaluator::Synchronize(void * /*deviceContext*/) { + cudaThreadSynchronize(); +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/cudaEvaluator.h b/opensubdiv/osd/cudaEvaluator.h new file mode 100644 index 00000000..c0df1bba --- /dev/null +++ b/opensubdiv/osd/cudaEvaluator.h @@ -0,0 +1,148 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_CUDA_EVALUATOR_H +#define OPENSUBDIV_OSD_CUDA_EVALUATOR_H + +#include "../version.h" + +#include +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far { + class StencilTables; +} + +namespace Osd { + +/// \brief CUDA stencil tables +/// +/// This class is a cuda buffer representation of Far::StencilTables. +/// +/// CudaComputeKernel consumes this table to apply stencils +/// +/// +class CudaStencilTables { +public: + static CudaStencilTables *Create(Far::StencilTables const *stencilTables, + void *deviceContext = NULL) { + (void)deviceContext; // unused + return new CudaStencilTables(stencilTables); + } + + explicit CudaStencilTables(Far::StencilTables const *stencilTables); + ~CudaStencilTables(); + + // interfaces needed for CudaCompute + void *GetSizesBuffer() const { return _sizes; } + void *GetOffsetsBuffer() const { return _offsets; } + void *GetIndicesBuffer() const { return _indices; } + void *GetWeightsBuffer() const { return _weights; } + int GetNumStencils() const { return _numStencils; } + +private: + void * _sizes, + * _offsets, + * _indices, + * _weights; + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +class CudaEvaluator { +public: + /// \brief Generic static compute function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCudaBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCudaBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTables stencil table to be applied. The table must have + /// Cuda memory interfaces. + /// + /// @param instance not used in the CudaEvaluator + /// + /// @param deviceContext not used in the CudaEvaluator + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + const void *instance = NULL, + void * deviceContext = NULL) { + + (void)instance; // unused + (void)deviceContext; // unused + return EvalStencils(srcVertexBuffer->BindCudaBuffer(), + srcDesc, + dstVertexBuffer->BindCudaBuffer(), + dstDesc, + (unsigned char const *)stencilTable->GetSizesBuffer(), + (int const *)stencilTable->GetOffsetsBuffer(), + (int const *)stencilTable->GetIndicesBuffer(), + (float const *)stencilTable->GetWeightsBuffer(), + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + static bool EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end); + + static void Synchronize(void *deviceContext = NULL); +}; + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_CUDA_EVALUATOR_H diff --git a/opensubdiv/osd/cudaKernel.cu b/opensubdiv/osd/cudaKernel.cu index d6f893c3..58a8bc5f 100644 --- a/opensubdiv/osd/cudaKernel.cu +++ b/opensubdiv/osd/cudaKernel.cu @@ -98,12 +98,14 @@ computeStencils(float const * cvs, float * vbuffer, __global__ void computeStencils(float const * cvs, float * dst, - int length, int stride, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end) { + int length, + int srcStride, + int dstStride, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end) { int first = start + threadIdx.x + blockIdx.x*blockDim.x; @@ -112,12 +114,12 @@ computeStencils(float const * cvs, float * dst, int const * lindices = indices + offsets[i]; float const * lweights = weights + offsets[i]; - float * dstVert = dst + i*stride; + float * dstVert = dst + i*dstStride; clear(dstVert, length); for (int j=0; j(cvs)[indices[j]]; + float4 tmp = reinterpret_cast(cvs)[indices[j]]; x.x += w*tmp.x; x.y += w*tmp.y; x.z += w*tmp.z; @@ -239,14 +241,14 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs, #include "../version.h" #define OPT_KERNEL(NUM_ELEMENTS, KERNEL, X, Y, ARG) \ - if (length==NUM_ELEMENTS && stride==length) { \ + if (length==NUM_ELEMENTS && srcStride==length && dstStride==length) { \ KERNEL<<>>ARG; \ return; \ } #ifdef USE_NVIDIA_OPTIMIZATION #define OPT_KERNEL_NVIDIA(NUM_ELEMENTS, KERNEL, X, Y, ARG) \ - if (length==NUM_ELEMENTS && stride==length) { \ + if (length==NUM_ELEMENTS && srcStride==length && dstStride==length) { \ int gridDim = min(X, (end-start+Y-1)/Y); \ KERNEL<<>>ARG; \ return; \ @@ -255,35 +257,45 @@ __global__ void computeStencilsNv_v4(float const *__restrict cvs, extern "C" { -void -CudaComputeStencils(float const *cvs, float * dst, - int length, int stride, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end) +void CudaEvalStencils(const float *src, + float *dst, + int length, + int srcStride, + int dstStride, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end) { - assert(cvs and dst and sizes and offsets and indices and weights and (end>=start)); +// assert(cvs and dst and sizes and offsets and indices and weights and (end>=start)); - if (length==0 or stride==0) { + if (length == 0 or srcStride == 0 or dstStride == 0 or (end <= start)) { return; } #ifdef USE_NVIDIA_OPTIMIZATION - OPT_KERNEL_NVIDIA(3, computeStencilsNv, 2048, 256, (cvs, dst, sizes, offsets, indices, weights, start, end)); - //OPT_KERNEL_NVIDIA(4, computeStencilsNv, 2048, 256, (cvs, dst, sizes, offsets, indices, weights, start, end)); - if( length==4 && stride==length ) { + OPT_KERNEL_NVIDIA(3, computeStencilsNv, 2048, 256, + (src, dst, sizes, offsets, indices, weights, start, end)); + //OPT_KERNEL_NVIDIA(4, computeStencilsNv, 2048, 256, + // (cvs, dst, sizes, offsets, indices, weights, start, end)); + if (length == 4 && srcStride == length && dstStride == length) { int gridDim = min(2048, (end-start+256-1)/256); - computeStencilsNv_v4<256><<>>(cvs, dst, sizes, offsets, indices, weights, start, end); + computeStencilsNv_v4<256><<>>( + src, dst, sizes, offsets, indices, weights, start, end); return; } #else - OPT_KERNEL(3, computeStencils, 512, 32, (cvs, dst, sizes, offsets, indices, weights, start, end)); - OPT_KERNEL(4, computeStencils, 512, 32, (cvs, dst, sizes, offsets, indices, weights, start, end)); + OPT_KERNEL(3, computeStencils, 512, 32, + (src, dst, sizes, offsets, indices, weights, start, end)); + OPT_KERNEL(4, computeStencils, 512, 32, + (src, dst, sizes, offsets, indices, weights, start, end)); #endif - computeStencils <<<512, 32>>>(cvs, dst, length, stride, + // generic case (slow) + computeStencils <<<512, 32>>>( + src, dst, length, srcStride, dstStride, sizes, offsets, indices, weights, start, end); } diff --git a/opensubdiv/osd/cudaVertexBuffer.h b/opensubdiv/osd/cudaVertexBuffer.h index 4b6ea2e4..c073939d 100644 --- a/opensubdiv/osd/cudaVertexBuffer.h +++ b/opensubdiv/osd/cudaVertexBuffer.h @@ -35,7 +35,7 @@ namespace Osd { /// \brief Concrete vertex buffer class for Cuda subvision. /// /// CudaVertexBuffer implements CudaVertexBufferInterface. -/// An instance of this buffer class can be passed to CudaComputeController +/// An instance of this buffer class can be passed to CudaEvaluator /// class CudaVertexBuffer { diff --git a/opensubdiv/osd/d3d11ComputeContext.cpp b/opensubdiv/osd/d3d11ComputeContext.cpp deleted file mode 100644 index 541e5ac1..00000000 --- a/opensubdiv/osd/d3d11ComputeContext.cpp +++ /dev/null @@ -1,284 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" - -#include "../osd/d3d11ComputeContext.h" -#include "../far/error.h" - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } - -// ---------------------------------------------------------------------------- - -struct D3D11Table { - - D3D11Table() : buffer(0), srv(0) { } - - ~D3D11Table() { - SAFE_RELEASE(buffer); - SAFE_RELEASE(srv); - } - - bool IsValid() const { - return (buffer and srv); - } - - template void initialize(std::vector const & src, - DXGI_FORMAT format, ID3D11DeviceContext *deviceContext) { - - size_t size = src.size()*sizeof(T); - - if (size==0) { - buffer = 0; - srv = 0; - return; - } - - ID3D11Device *device = 0; - deviceContext->GetDevice(&device); - assert(device); - - D3D11_BUFFER_DESC bd; - bd.ByteWidth = (unsigned int)size; - bd.Usage = D3D11_USAGE_IMMUTABLE; - bd.BindFlags = D3D11_BIND_SHADER_RESOURCE; - bd.CPUAccessFlags = 0; - bd.MiscFlags = 0; - bd.StructureByteStride = 0; - - D3D11_SUBRESOURCE_DATA initData; - initData.pSysMem = &src.at(0); - - HRESULT hr = device->CreateBuffer(&bd, &initData, &buffer); - if (FAILED(hr)) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "Error creating compute table buffer\n"); - return; - } - - D3D11_SHADER_RESOURCE_VIEW_DESC srvd; - ZeroMemory(&srvd, sizeof(srvd)); - srvd.Format = format; - srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; - srvd.Buffer.FirstElement = 0; - srvd.Buffer.NumElements = (unsigned int)src.size(); - - hr = device->CreateShaderResourceView(buffer, &srvd, &srv); - if (FAILED(hr)) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "Error creating compute table shader resource view\n"); - return; - } - } - - ID3D11Buffer * buffer; - ID3D11ShaderResourceView * srv; -}; - - -// ---------------------------------------------------------------------------- - -class D3D11ComputeContext::D3D11StencilTables { - -public: - - D3D11StencilTables(Far::StencilTables const & stencilTables, - ID3D11DeviceContext *deviceContext) { - - _numStencils = stencilTables.GetNumStencils(); - if (_numStencils > 0) { - // convert unsigned char sizes buffer to ints - // (HLSL does not have uint8 type) - std::vector const sizes(stencilTables.GetSizes().begin(), - stencilTables.GetSizes().end()); - - _sizes.initialize(sizes, - DXGI_FORMAT_R32_SINT, - deviceContext); - _offsets.initialize(stencilTables.GetOffsets(), - DXGI_FORMAT_R32_SINT, - deviceContext); - _indices.initialize(stencilTables.GetControlIndices(), - DXGI_FORMAT_R32_SINT, - deviceContext); - _weights.initialize(stencilTables.GetWeights(), - DXGI_FORMAT_R32_FLOAT, - deviceContext); - } - } - - bool IsValid() const { - return _sizes.IsValid() and _offsets.IsValid() and - _indices.IsValid() and _weights.IsValid(); - } - - D3D11Table const & GetSizes() const { - return _sizes; - } - - D3D11Table const & GetOffsets() const { - return _offsets; - } - - D3D11Table const & GetIndices() const { - return _indices; - } - - D3D11Table const & GetWeights() const { - return _weights; - } - - int GetNumStencils() const { - return _numStencils; - } - - void Bind(ID3D11DeviceContext * deviceContext) const { - ID3D11ShaderResourceView *SRViews[] = { - _sizes.srv, - _offsets.srv, - _indices.srv, - _weights.srv - }; - deviceContext->CSSetShaderResources(1, 4, SRViews); // t1-t4 - } - - static void Unbind(ID3D11DeviceContext * deviceContext) { - ID3D11ShaderResourceView *SRViews[] = { 0, 0, 0, 0 }; - deviceContext->CSSetShaderResources(1, 4, SRViews); - } - - -private: - - D3D11Table _sizes, - _offsets, - _indices, - _weights; - - int _numStencils; -}; - -// ---------------------------------------------------------------------------- - -D3D11ComputeContext::D3D11ComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - ID3D11DeviceContext *deviceContext) : - _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0) { - - if (vertexStencilTables) { - _vertexStencilTables = - new D3D11StencilTables(*vertexStencilTables, deviceContext); - _numControlVertices = vertexStencilTables->GetNumControlVertices(); - } - - if (varyingStencilTables) { - _varyingStencilTables = - new D3D11StencilTables(*varyingStencilTables, deviceContext); - - if (_numControlVertices) { - assert(_numControlVertices==varyingStencilTables->GetNumControlVertices()); - } else { - _numControlVertices = varyingStencilTables->GetNumControlVertices(); - } - } -} - -D3D11ComputeContext::~D3D11ComputeContext() { - delete _vertexStencilTables; - delete _varyingStencilTables; -} - - -// ---------------------------------------------------------------------------- - -bool -D3D11ComputeContext::HasVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->IsValid() : false; -} - -bool -D3D11ComputeContext::HasVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; -} - -int -D3D11ComputeContext::GetNumStencilsInVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; -} - -int -D3D11ComputeContext::GetNumStencilsInVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; -} - -// ---------------------------------------------------------------------------- - -void -D3D11ComputeContext::BindVertexStencilTables(ID3D11DeviceContext *deviceContext) const { - if (_vertexStencilTables) { - _vertexStencilTables->Bind(deviceContext); - } -} - -void -D3D11ComputeContext::BindVaryingStencilTables(ID3D11DeviceContext *deviceContext) const { - if (_varyingStencilTables) { - _varyingStencilTables->Bind(deviceContext); - } -} - -void -D3D11ComputeContext::UnbindStencilTables(ID3D11DeviceContext *deviceContext) const { - D3D11StencilTables::Unbind(deviceContext); -} - - -// ---------------------------------------------------------------------------- - -D3D11ComputeContext * -D3D11ComputeContext::Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - ID3D11DeviceContext *deviceContext) { - - D3D11ComputeContext *result = - new D3D11ComputeContext(vertexStencilTables, varyingStencilTables, - deviceContext); - - return result; -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/d3d11ComputeContext.h b/opensubdiv/osd/d3d11ComputeContext.h deleted file mode 100644 index 7d3dcd4f..00000000 --- a/opensubdiv/osd/d3d11ComputeContext.h +++ /dev/null @@ -1,128 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_D3D11_COMPUTE_CONTEXT_H -#define OSD_D3D11_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include "../osd/nonCopyable.h" - -struct ID3D11DeviceContext; - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far{ class StencilTables; } - -namespace Osd { - -/// -/// \brief D3D Refine Context -/// -/// The D3D implementation of the Refine module contextual functionality. -/// -/// Contexts interface the serialized topological data pertaining to the -/// geometric primitives with the capabilities of the selected discrete -/// compute device. -/// -class D3D11ComputeContext : public NonCopyable { -public: - - /// Creates an D3D11ComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param deviceContext The D3D device - /// - static D3D11ComputeContext * Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - ID3D11DeviceContext *deviceContext); - - /// Destructor - virtual ~D3D11ComputeContext(); - - /// Returns true if the Context has a 'vertex' interpolation stencil table - bool HasVertexStencilTables() const; - - /// Returns true if the Context has a 'varying' interpolation stencil table - bool HasVaryingStencilTables() const; - - /// Returns the number of control vertices - int GetNumControlVertices() const { - return _numControlVertices; - } - - /// Returns the number of stencils in vertex stencil table - int GetNumStencilsInVertexStencilTables() const; - - /// Returns the number of stencils in varying stencil table - int GetNumStencilsInVaryingStencilTables() const; - - /// Binds D3D11 buffers containing stencils for 'vertex' interpolation - /// - /// @param deviceContext The D3D device - /// - void BindVertexStencilTables(ID3D11DeviceContext *deviceContext) const; - - /// Binds D3D11 buffers containing stencils for 'varying' interpolation - /// - /// @param deviceContext The D3D device - /// - void BindVaryingStencilTables(ID3D11DeviceContext *deviceContext) const; - - /// Unbinds D3D11 stencil buffers - /// - /// @param deviceContext The D3D device - /// - void UnbindStencilTables(ID3D11DeviceContext *deviceContext) const; - -protected: - - explicit D3D11ComputeContext(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - ID3D11DeviceContext *deviceContext); - -private: - - class D3D11StencilTables; - - D3D11StencilTables * _vertexStencilTables, - * _varyingStencilTables; - - int _numControlVertices; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_D3D11_COMPUTE_CONTEXT_H diff --git a/opensubdiv/osd/d3d11ComputeController.cpp b/opensubdiv/osd/d3d11ComputeController.cpp deleted file mode 100644 index e74ab021..00000000 --- a/opensubdiv/osd/d3d11ComputeController.cpp +++ /dev/null @@ -1,338 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/d3d11ComputeController.h" -#include "../far/error.h" -#include "../osd/vertexDescriptor.h" - -#define INITGUID // for IID_ID3D11ShaderReflection -#include -#include -#include - -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } - -static const char *shaderSource = -#include "../osd/hlslComputeKernel.gen.h" -; - -// ---------------------------------------------------------------------------- - -// must match constant buffer declaration in hlslComputeKernel.hlsl -__declspec(align(16)) - -struct KernelUniformArgs { - - int uniformStart, // batch - uniformEnd, - - uniformOffset, // primvar buffer descriptor - uniformNumCVs; // number of const control vertices padded at -}; - -// ---------------------------------------------------------------------------- - -class D3D11ComputeController::KernelBundle : - NonCopyable { - -public: - - KernelBundle() : - _computeShader(0), - _classLinkage(0), - _subStencilKernel(0), - _uniformArgs(0), - _workGroupSize(64) { } - - ~KernelBundle() { - SAFE_RELEASE(_computeShader); - SAFE_RELEASE(_classLinkage); - SAFE_RELEASE(_subStencilKernel); - SAFE_RELEASE(_uniformArgs); - } - - - bool Compile(ID3D11DeviceContext *deviceContext, - VertexBufferDescriptor const &desc) { - - _desc = VertexBufferDescriptor(0, desc.length, desc.stride); - - DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS; - #ifdef _DEBUG - dwShaderFlags |= D3DCOMPILE_DEBUG; - #endif - - std::ostringstream ss; - - ss << _desc.offset; std::string offsetValue(ss.str()); ss.str(""); - ss << _desc.length; std::string lengthValue(ss.str()); ss.str(""); - ss << _desc.stride; std::string strideValue(ss.str()); ss.str(""); - ss << _workGroupSize; std::string workgroupSizeValue(ss.str()); ss.str(""); - - D3D_SHADER_MACRO defines[] = - { "OFFSET", offsetValue.c_str(), - "LENGTH", lengthValue.c_str(), - "STRIDE", strideValue.c_str(), - "WORK_GROUP_SIZE", workgroupSizeValue.c_str(), - 0, 0 }; - - ID3DBlob * computeShaderBuffer = NULL; - ID3DBlob * errorBuffer = NULL; - - HRESULT hr = D3DCompile(shaderSource, strlen(shaderSource), - NULL, &defines[0], NULL, - "cs_main", "cs_5_0", - dwShaderFlags, 0, - &computeShaderBuffer, &errorBuffer); - if (FAILED(hr)) { - if (errorBuffer != NULL) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "Error compiling HLSL shader: %s\n", - (CHAR*)errorBuffer->GetBufferPointer()); - errorBuffer->Release(); - return false; - } - } - - ID3D11Device *device = NULL; - deviceContext->GetDevice(&device); - assert(device); - - device->CreateClassLinkage(&_classLinkage); - assert(_classLinkage); - - device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(), - computeShaderBuffer->GetBufferSize(), - _classLinkage, - &_computeShader); - assert(_computeShader); - - ID3D11ShaderReflection *reflector; - D3DReflect(computeShaderBuffer->GetBufferPointer(), - computeShaderBuffer->GetBufferSize(), - IID_ID3D11ShaderReflection, (void**) &reflector); - assert(reflector); - - assert(reflector->GetNumInterfaceSlots() == 1); - reflector->Release(); - - computeShaderBuffer->Release(); - - _classLinkage->GetClassInstance("computeStencil", 0, &_subStencilKernel); - assert(_subStencilKernel); - - return true; - } - - void ApplyStencilTableKernel(ID3D11DeviceContext *deviceContext, - int offset, int numCVs, int start, int end) { - - KernelUniformArgs args; - args.uniformStart = start; - args.uniformEnd = end; - args.uniformOffset = offset; - args.uniformNumCVs = numCVs; - - dispatchCompute(deviceContext, _subStencilKernel, args); - } - - - struct Match { - - Match(VertexBufferDescriptor const & d) : desc(d) { } - - bool operator() (KernelBundle const * kernel) { - return (desc.length==kernel->_desc.length and - desc.stride==kernel->_desc.stride); - } - - VertexBufferDescriptor desc; - }; - -private: - - void dispatchCompute(ID3D11DeviceContext *deviceContext, - ID3D11ClassInstance * kernel, KernelUniformArgs const & args) { - - assert(deviceContext); - - int count = args.uniformEnd - args.uniformStart; - if (count <= 0) return; - - if (not _uniformArgs) { - ID3D11Device *device = NULL; - deviceContext->GetDevice(&device); - assert(device); - - D3D11_BUFFER_DESC cbDesc; - ZeroMemory(&cbDesc, sizeof(cbDesc)); - cbDesc.Usage = D3D11_USAGE_DYNAMIC; - cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - cbDesc.MiscFlags = 0; - cbDesc.ByteWidth = sizeof(KernelUniformArgs); - device->CreateBuffer(&cbDesc, NULL, &_uniformArgs); - } - assert(_uniformArgs); - - D3D11_MAPPED_SUBRESOURCE mappedResource; - deviceContext->Map(_uniformArgs, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); - CopyMemory(mappedResource.pData, &args, sizeof(KernelUniformArgs)); - - deviceContext->Unmap(_uniformArgs, 0); - deviceContext->CSSetConstantBuffers(0, 1, &_uniformArgs); // b0 - - deviceContext->CSSetShader(_computeShader, &kernel, 1); - deviceContext->Dispatch((count + _workGroupSize - 1) / _workGroupSize, 1, 1); - } - - -private: - - - ID3D11ComputeShader * _computeShader; - - ID3D11ClassLinkage * _classLinkage; - - ID3D11ClassInstance * _subStencilKernel; // stencil compute kernel HLSL subroutine - - ID3D11Buffer * _uniformArgs; // uniform paramaeters for kernels - - VertexBufferDescriptor _desc; // primvar buffer descriptor - - int _workGroupSize; -}; - -// ---------------------------------------------------------------------------- -void -D3D11ComputeController::Synchronize() { - - if (not _query) { - ID3D11Device *device = NULL; - _deviceContext->GetDevice(&device); - assert(device); - - D3D11_QUERY_DESC desc; - desc.Query = D3D11_QUERY_EVENT; - desc.MiscFlags = 0; - device->CreateQuery(&desc, &_query); - } - _deviceContext->Flush(); - _deviceContext->End(_query); - while (S_OK != _deviceContext->GetData(_query, NULL, 0, 0)); -} - -// ---------------------------------------------------------------------------- - -D3D11ComputeController::KernelBundle const * -D3D11ComputeController::getKernel(VertexBufferDescriptor const &desc) { - - KernelRegistry::iterator it = - std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(), - KernelBundle::Match(desc)); - - if (it != _kernelRegistry.end()) { - return *it; - } else { - assert(_deviceContext); - KernelBundle * kernelBundle = new KernelBundle(); - kernelBundle->Compile(_deviceContext, desc); - _kernelRegistry.push_back(kernelBundle); - return kernelBundle; - } -} - -void -D3D11ComputeController::bindBuffer() { - - // Unbind the vertexBuffer from the input assembler - ID3D11Buffer *NULLBuffer = 0; - UINT voffset = 0, vstride = 0; - _deviceContext->IASetVertexBuffers(0, 1, &NULLBuffer, &voffset, &vstride); - - // Unbind the vertexBuffer from the vertex shader - ID3D11ShaderResourceView *NULLSRV = 0; - _deviceContext->VSSetShaderResources(0, 1, &NULLSRV); - - if (_currentBindState.buffer) - _deviceContext->CSSetUnorderedAccessViews(0, 1, &_currentBindState.buffer, 0); // u0 -} - -void -D3D11ComputeController::unbindBuffer() { - assert(_deviceContext); - ID3D11UnorderedAccessView *UAViews[] = { 0 }; - _deviceContext->CSSetUnorderedAccessViews(0, 1, UAViews, 0); // u0 -} - -// ---------------------------------------------------------------------------- - -void -D3D11ComputeController::ApplyStencilTableKernel( - D3D11ComputeContext const *context, int numStencils) const { - - assert(context); - - // XXXX manuelk messy const drop forced by D3D API - could use better solution - D3D11ComputeController::KernelBundle * bundle = - const_cast(_currentBindState.kernelBundle); - - bundle->ApplyStencilTableKernel( - _deviceContext, - _currentBindState.desc.offset, - context->GetNumControlVertices(), - 0, - numStencils); -} - - -// ---------------------------------------------------------------------------- - -D3D11ComputeController::D3D11ComputeController( - ID3D11DeviceContext *deviceContext) - : _deviceContext(deviceContext), _query(0) { -} - -D3D11ComputeController::~D3D11ComputeController() { - - for (KernelRegistry::iterator it = _kernelRegistry.begin(); - it != _kernelRegistry.end(); ++it) { - delete *it; - } - SAFE_RELEASE(_query); -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/d3d11ComputeController.h b/opensubdiv/osd/d3d11ComputeController.h deleted file mode 100644 index c40c2e6a..00000000 --- a/opensubdiv/osd/d3d11ComputeController.h +++ /dev/null @@ -1,213 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_D3D11_COMPUTE_CONTROLLER_H -#define OSD_D3D11_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/d3d11ComputeContext.h" -#include "../osd/vertexDescriptor.h" - -#include - -struct ID3D11DeviceContext; -struct ID3D11Query; -struct ID3D11UnorderedAccessView; - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching D3D11 Compute subdivision kernels. -/// -/// D3D11ComputeController is a compute controller class to launch -/// D3D11Compute transfrom feedback subdivision kernels. It requires -/// GLVertexBufferInterface as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class D3D11ComputeController { -public: - typedef D3D11ComputeContext ComputeContext; - - /// Constructor. - /// - /// @param deviceContext a valid instanciated D3D11 device context - /// - D3D11ComputeController(ID3D11DeviceContext *deviceContext); - - /// Destructor. - ~D3D11ComputeController(); - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The D3D11Context to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( D3D11ComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - if (vertexBuffer) { - bind(vertexBuffer, vertexDesc); - - context->BindVertexStencilTables(_deviceContext); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVertexStencilTables()); - } - - if (varyingBuffer) { - bind(varyingBuffer, varyingDesc); - - context->BindVaryingStencilTables(_deviceContext); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVaryingStencilTables()); - } - - context->UnbindStencilTables(_deviceContext); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The D3D11Context to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(D3D11ComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context, - int numStencils) const; - - template - void bind( BUFFER * buffer, - VertexBufferDescriptor const * desc ) { - - assert(buffer); - - // if the vertex buffer descriptor is specified, use it - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (desc) { - _currentBindState.desc = *desc; - } else { - int numElements = buffer ? buffer->GetNumElements() : 0; - _currentBindState.desc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.buffer = buffer->BindD3D11UAV(_deviceContext); - - _currentBindState.kernelBundle = getKernel(_currentBindState.desc); - - bindBuffer(); - } - - - // Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - unbindBuffer(); - } - - // binds the primvar data buffer - void bindBuffer(); - - // unbinds the primvar data buffer - void unbindBuffer(); - - -private: - - ID3D11DeviceContext *_deviceContext; - ID3D11Query *_query; - - class KernelBundle; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : buffer(0), kernelBundle(0) { } - - void Reset() { - buffer = 0; - desc.Reset(); - kernelBundle = 0; - } - - ID3D11UnorderedAccessView * buffer; - - VertexBufferDescriptor desc; - - KernelBundle const * kernelBundle; - }; - - BindState _currentBindState; - - typedef std::vector KernelRegistry; - - KernelBundle const * getKernel(VertexBufferDescriptor const &desc); - - KernelRegistry _kernelRegistry; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_D3D11_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/d3d11ComputeEvaluator.cpp b/opensubdiv/osd/d3d11ComputeEvaluator.cpp new file mode 100644 index 00000000..7f34306f --- /dev/null +++ b/opensubdiv/osd/d3d11ComputeEvaluator.cpp @@ -0,0 +1,375 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/d3d11ComputeEvaluator.h" + +#include +#include +#include +#include + +#define INITGUID // for IID_ID3D11ShaderReflection +#include +#include +#include + +#include "../far/error.h" +#include "../far/stencilTables.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } + +static const char *shaderSource = +#include "../osd/hlslComputeKernel.gen.h" +; + +// ---------------------------------------------------------------------------- + +// must match constant buffer declaration in hlslComputeKernel.hlsl +__declspec(align(16)) + +struct KernelUniformArgs { + + int start; // batch + int end; + + int srcOffset; + int dstOffset; +}; + +// ---------------------------------------------------------------------------- + +template +static ID3D11Buffer *createBuffer(std::vector const &src, + ID3D11Device *device) { + + size_t size = src.size()*sizeof(T); + + ID3D11Buffer *buffer = NULL; + D3D11_BUFFER_DESC bd; + bd.ByteWidth = (unsigned int)size; + bd.Usage = D3D11_USAGE_IMMUTABLE; + bd.BindFlags = D3D11_BIND_SHADER_RESOURCE; + bd.CPUAccessFlags = 0; + bd.MiscFlags = 0; + bd.StructureByteStride = 0; + + D3D11_SUBRESOURCE_DATA initData; + initData.pSysMem = &src.at(0); + + HRESULT hr = device->CreateBuffer(&bd, &initData, &buffer); + if (FAILED(hr)) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "Error creating compute table buffer\n"); + return NULL; + } + return buffer; +} + +static ID3D11ShaderResourceView *createSRV(ID3D11Buffer *buffer, + DXGI_FORMAT format, + ID3D11Device *device, + size_t size) { + ID3D11ShaderResourceView *srv = NULL; + D3D11_SHADER_RESOURCE_VIEW_DESC srvd; + ZeroMemory(&srvd, sizeof(srvd)); + srvd.Format = format; + srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + srvd.Buffer.FirstElement = 0; + srvd.Buffer.NumElements = (unsigned int)size; + + HRESULT hr = device->CreateShaderResourceView(buffer, &srvd, &srv); + if (FAILED(hr)) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "Error creating compute table shader resource view\n"); + return NULL; + } + return srv; +} + +D3D11StencilTables::D3D11StencilTables(Far::StencilTables const *stencilTables, + ID3D11DeviceContext *deviceContext) + { + ID3D11Device *device = NULL; + deviceContext->GetDevice(&device); + assert(device); + + _numStencils = stencilTables->GetNumStencils(); + if (_numStencils > 0) { + // convert unsigned char sizes buffer to ints + // (HLSL does not have uint8 type) + std::vector const sizes(stencilTables->GetSizes().begin(), + stencilTables->GetSizes().end()); + + _sizesBuffer = createBuffer(sizes, device); + _offsetsBuffer = createBuffer(stencilTables->GetOffsets(), device); + _indicesBuffer = createBuffer(stencilTables->GetControlIndices(), device); + _weightsBuffer = createBuffer(stencilTables->GetWeights(), device); + + _sizes = createSRV(_sizesBuffer, DXGI_FORMAT_R32_SINT, device, + stencilTables->GetSizes().size()); + _offsets = createSRV(_offsetsBuffer, DXGI_FORMAT_R32_SINT, device, + stencilTables->GetOffsets().size()); + _indices = createSRV(_indicesBuffer, DXGI_FORMAT_R32_SINT, device, + stencilTables->GetControlIndices().size()); + _weights= createSRV(_weightsBuffer, DXGI_FORMAT_R32_FLOAT, device, + stencilTables->GetWeights().size()); + } else { + _sizes = _offsets = _indices = _weights = NULL; + _sizesBuffer = _offsetsBuffer = _indicesBuffer = _weightsBuffer = NULL; + } +} + +D3D11StencilTables::~D3D11StencilTables() { + SAFE_RELEASE(_sizes); + SAFE_RELEASE(_sizesBuffer); + SAFE_RELEASE(_offsets); + SAFE_RELEASE(_offsetsBuffer); + SAFE_RELEASE(_indices); + SAFE_RELEASE(_indicesBuffer); + SAFE_RELEASE(_weights); + SAFE_RELEASE(_weightsBuffer); +} + +// --------------------------------------------------------------------------- + + +D3D11ComputeEvaluator::D3D11ComputeEvaluator() : + _computeShader(NULL), + _classLinkage(NULL), + _singleBufferKernel(NULL), + _separateBufferKernel(NULL), + _uniformArgs(NULL), + _workGroupSize(64) { + +} + +D3D11ComputeEvaluator * +D3D11ComputeEvaluator::Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + ID3D11DeviceContext *deviceContext) { + (void)deviceContext; // not used + D3D11ComputeEvaluator *instance = new D3D11ComputeEvaluator(); + if (instance->Compile(srcDesc, dstDesc, deviceContext)) return instance; + delete instance; + return NULL; +} + +D3D11ComputeEvaluator::~D3D11ComputeEvaluator() { + SAFE_RELEASE(_computeShader); + SAFE_RELEASE(_classLinkage); + SAFE_RELEASE(_singleBufferKernel); + SAFE_RELEASE(_separateBufferKernel); + SAFE_RELEASE(_uniformArgs); +} + +bool +D3D11ComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + ID3D11DeviceContext *deviceContext) { + + if (srcDesc.length > dstDesc.length) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "srcDesc length must be less than or equal to " + "dstDesc length.\n"); + return false; + } + + DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS + | D3D10_SHADER_RESOURCES_MAY_ALIAS; +#ifdef _DEBUG + dwShaderFlags |= D3DCOMPILE_DEBUG; +#endif + + std::ostringstream ss; + ss << srcDesc.length; std::string lengthValue(ss.str()); ss.str(""); + ss << srcDesc.stride; std::string srcStrideValue(ss.str()); ss.str(""); + ss << dstDesc.stride; std::string dstStrideValue(ss.str()); ss.str(""); + ss << _workGroupSize; std::string workgroupSizeValue(ss.str()); ss.str(""); + + D3D_SHADER_MACRO defines[] = + { "LENGTH", lengthValue.c_str(), + "SRC_STRIDE", srcStrideValue.c_str(), + "DST_STRIDE", dstStrideValue.c_str(), + "WORK_GROUP_SIZE", workgroupSizeValue.c_str(), + 0, 0 }; + + ID3DBlob * computeShaderBuffer = NULL; + ID3DBlob * errorBuffer = NULL; + + HRESULT hr = D3DCompile(shaderSource, strlen(shaderSource), + NULL, &defines[0], NULL, + "cs_main", "cs_5_0", + dwShaderFlags, 0, + &computeShaderBuffer, &errorBuffer); + if (FAILED(hr)) { + if (errorBuffer != NULL) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "Error compiling HLSL shader: %s\n", + (CHAR*)errorBuffer->GetBufferPointer()); + errorBuffer->Release(); + return false; + } + } + + ID3D11Device *device = NULL; + deviceContext->GetDevice(&device); + assert(device); + + device->CreateClassLinkage(&_classLinkage); + assert(_classLinkage); + + device->CreateComputeShader(computeShaderBuffer->GetBufferPointer(), + computeShaderBuffer->GetBufferSize(), + _classLinkage, + &_computeShader); + assert(_computeShader); + + ID3D11ShaderReflection *reflector; + D3DReflect(computeShaderBuffer->GetBufferPointer(), + computeShaderBuffer->GetBufferSize(), + IID_ID3D11ShaderReflection, (void**) &reflector); + assert(reflector); + + assert(reflector->GetNumInterfaceSlots() == 1); + reflector->Release(); + + computeShaderBuffer->Release(); + + _classLinkage->GetClassInstance("singleBufferCompute", 0, &_singleBufferKernel); + assert(_singleBufferKernel); + _classLinkage->GetClassInstance("separateBufferCompute", 0, &_separateBufferKernel); + assert(_separateBufferKernel); + + D3D11_BUFFER_DESC cbDesc; + ZeroMemory(&cbDesc, sizeof(cbDesc)); + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + cbDesc.MiscFlags = 0; + cbDesc.ByteWidth = sizeof(KernelUniformArgs); + device->CreateBuffer(&cbDesc, NULL, &_uniformArgs); + + return true; +} + +/* static */ +void +D3D11ComputeEvaluator::Synchronize(ID3D11DeviceContext *deviceContext) { + // XXX: this is currently just for the performance measuring purpose. + + // XXXFIXME! + ID3D11Query *query = NULL; + + ID3D11Device *device = NULL; + deviceContext->GetDevice(&device); + assert(device); + + D3D11_QUERY_DESC desc; + desc.Query = D3D11_QUERY_EVENT; + desc.MiscFlags = 0; + device->CreateQuery(&desc, &query); + + deviceContext->Flush(); + deviceContext->End(query); + while (S_OK != deviceContext->GetData(query, NULL, 0, 0)); + + SAFE_RELEASE(query); +} + +bool +D3D11ComputeEvaluator::EvalStencils(ID3D11UnorderedAccessView *srcUAV, + VertexBufferDescriptor const &srcDesc, + ID3D11UnorderedAccessView *dstUAV, + VertexBufferDescriptor const &dstDesc, + ID3D11ShaderResourceView *sizesSRV, + ID3D11ShaderResourceView *offsetsSRV, + ID3D11ShaderResourceView *indicesSRV, + ID3D11ShaderResourceView *weightsSRV, + int start, + int end, + ID3D11DeviceContext *deviceContext) const { + assert(deviceContext); + + int count = end - start; + if (count <= 0) return true; + + KernelUniformArgs args; + args.start = start; + args.end = end; + args.srcOffset = srcDesc.offset; + args.dstOffset = dstDesc.offset; + + D3D11_MAPPED_SUBRESOURCE mappedResource; + deviceContext->Map(_uniformArgs, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); + CopyMemory(mappedResource.pData, &args, sizeof(KernelUniformArgs)); + + deviceContext->Unmap(_uniformArgs, 0); + deviceContext->CSSetConstantBuffers(0, 1, &_uniformArgs); // b0 + + // Unbind the vertexBuffer from the input assembler + ID3D11Buffer *NULLBuffer = 0; + UINT voffset = 0, vstride = 0; + deviceContext->IASetVertexBuffers(0, 1, &NULLBuffer, &voffset, &vstride); + ID3D11ShaderResourceView *NULLSRV = 0; + deviceContext->VSSetShaderResources(0, 1, &NULLSRV); + + // bind UAV + ID3D11UnorderedAccessView *UAViews[] = { srcUAV, dstUAV }; + ID3D11ShaderResourceView *SRViews[] = { + sizesSRV, offsetsSRV, indicesSRV, weightsSRV }; + + // bind source vertex and stencil tables + deviceContext->CSSetShaderResources(1, 4, SRViews); // t1-t4 + + if (srcUAV == dstUAV) { + deviceContext->CSSetUnorderedAccessViews(0, 1, UAViews, 0); // u0 + // Dispatch src == dst buffer + deviceContext->CSSetShader(_computeShader, &_singleBufferKernel, 1); + deviceContext->Dispatch((count + _workGroupSize - 1) / _workGroupSize, 1, 1); + } else { + deviceContext->CSSetUnorderedAccessViews(0, 2, UAViews, 0); // u0, u1 + // Dispatch src != dst buffer + deviceContext->CSSetShader(_computeShader, &_separateBufferKernel, 1); + deviceContext->Dispatch((count + _workGroupSize - 1) / _workGroupSize, 1, 1); + } + + // unbind stencil tables and vertexbuffers + SRViews[0] = SRViews[1] = SRViews[2] = SRViews[3] = NULL; + deviceContext->CSSetShaderResources(1, 4, SRViews); + + UAViews[0] = UAViews[1] = NULL; + deviceContext->CSSetUnorderedAccessViews(0, 2, UAViews, 0); + + return true; +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/d3d11ComputeEvaluator.h b/opensubdiv/osd/d3d11ComputeEvaluator.h new file mode 100644 index 00000000..91eb7596 --- /dev/null +++ b/opensubdiv/osd/d3d11ComputeEvaluator.h @@ -0,0 +1,227 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_D3D11_COMPUTE_EVALUATOR_H +#define OPENSUBDIV_OSD_D3D11_COMPUTE_EVALUATOR_H + +#include "../version.h" + +struct ID3D11DeviceContext; +struct ID3D11Buffer; +struct ID3D11ComputeShader; +struct ID3D11ClassLinkage; +struct ID3D11ClassInstance; +struct ID3D11ShaderResourceView; +struct ID3D11UnorderedAccessView; + +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far { + class StencilTables; +} + +namespace Osd { + +/// \brief D3D11 stencil tables +/// +/// This class is a D3D11 Shader Resource View representation of +/// Far::StencilTables. +/// +/// D3D11ComputeEvaluator consumes this table to apply stencils +/// +class D3D11StencilTables { +public: + template + static D3D11StencilTables *Create(Far::StencilTables const *stencilTables, + DEVICE_CONTEXT context) { + return new D3D11StencilTables(stencilTables, context->GetDeviceContext()); + } + + static D3D11StencilTables *Create(Far::StencilTables const *stencilTables, + ID3D11DeviceContext *deviceContext) { + return new D3D11StencilTables(stencilTables, deviceContext); + } + + D3D11StencilTables(Far::StencilTables const *stencilTables, + ID3D11DeviceContext *deviceContext); + + ~D3D11StencilTables(); + + // interfaces needed for D3D11ComputeEvaluator + ID3D11ShaderResourceView *GetSizesSRV() const { return _sizes; } + ID3D11ShaderResourceView *GetOffsetsSRV() const { return _offsets; } + ID3D11ShaderResourceView *GetIndicesSRV() const { return _indices; } + ID3D11ShaderResourceView *GetWeightsSRV() const { return _weights; } + int GetNumStencils() const { return _numStencils; } + +private: + ID3D11ShaderResourceView *_sizes; + ID3D11ShaderResourceView *_offsets; + ID3D11ShaderResourceView *_indices; + ID3D11ShaderResourceView *_weights; + ID3D11Buffer *_sizesBuffer; + ID3D11Buffer *_offsetsBuffer; + ID3D11Buffer *_indicesBuffer; + ID3D11Buffer *_weightsBuffer; + + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +class D3D11ComputeEvaluator { +public: + typedef bool Instantiatable; + static D3D11ComputeEvaluator * Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + ID3D11DeviceContext *deviceContext); + + /// Constructor. + D3D11ComputeEvaluator(); + + /// Destructor. + ~D3D11ComputeEvaluator(); + + /// \brief Generic static compute function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTables stencil table to be applied. The table must have + /// SSBO interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext ID3D11DeviceContext. + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + D3D11ComputeEvaluator const *instance, + ID3D11DeviceContext * deviceContext) { + if (instance) { + return instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable, + deviceContext); + } else { + // Create an instace on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc, deviceContext); + if (instance) { + bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable, + deviceContext); + delete instance; + return r; + } + return false; + } + } + + /// Dispatch the DX compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + template + bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + ID3D11DeviceContext *deviceContext) const { + return EvalStencils(srcVertexBuffer->BindD3D11UAV(deviceContext), + srcDesc, + dstVertexBuffer->BindD3D11UAV(deviceContext), + dstDesc, + stencilTable->GetSizesSRV(), + stencilTable->GetOffsetsSRV(), + stencilTable->GetIndicesSRV(), + stencilTable->GetWeightsSRV(), + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils(), + deviceContext); + } + + /// Dispatch the DX compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + bool EvalStencils(ID3D11UnorderedAccessView *srcSRV, + VertexBufferDescriptor const &srcDesc, + ID3D11UnorderedAccessView *dstUAV, + VertexBufferDescriptor const &dstDesc, + ID3D11ShaderResourceView *sizesSRV, + ID3D11ShaderResourceView *offsetsSRV, + ID3D11ShaderResourceView *indicesSRV, + ID3D11ShaderResourceView *weightsSRV, + int start, + int end, + ID3D11DeviceContext *deviceContext) const; + + /// Configure DX kernel. Returns false if it fails to compile the kernel. + bool Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + ID3D11DeviceContext *deviceContext); + + /// Wait the dispatched kernel finishes. + static void Synchronize(ID3D11DeviceContext *deviceContext); + +private: + ID3D11ComputeShader * _computeShader; + ID3D11ClassLinkage * _classLinkage; + ID3D11ClassInstance * _singleBufferKernel; + ID3D11ClassInstance * _separateBufferKernel; + ID3D11Buffer * _uniformArgs; // uniform paramaeters for kernels + + int _workGroupSize; +}; + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_D3D11_COMPUTE_EVALUATOR_H diff --git a/opensubdiv/osd/d3d11DrawContext.cpp b/opensubdiv/osd/d3d11DrawContext.cpp index 638d19fe..d657147f 100644 --- a/opensubdiv/osd/d3d11DrawContext.cpp +++ b/opensubdiv/osd/d3d11DrawContext.cpp @@ -33,10 +33,11 @@ namespace OPENSUBDIV_VERSION { namespace Osd { -D3D11DrawContext::D3D11DrawContext() : +D3D11DrawContext::D3D11DrawContext(int maxValence) : + DrawContext(maxValence), patchIndexBuffer(NULL), - ptexCoordinateBuffer(NULL), - ptexCoordinateBufferSRV(NULL), + patchParamBuffer(NULL), + patchParamBufferSRV(NULL), fvarDataBuffer(NULL), fvarDataBufferSRV(NULL), vertexBufferSRV(NULL), @@ -50,8 +51,8 @@ D3D11DrawContext::D3D11DrawContext() : D3D11DrawContext::~D3D11DrawContext() { if (patchIndexBuffer) patchIndexBuffer->Release(); - if (ptexCoordinateBuffer) ptexCoordinateBuffer->Release(); - if (ptexCoordinateBufferSRV) ptexCoordinateBufferSRV->Release(); + if (patchParamBuffer) patchParamBuffer->Release(); + if (patchParamBufferSRV) patchParamBufferSRV->Release(); if (fvarDataBuffer) fvarDataBuffer->Release(); if (fvarDataBufferSRV) fvarDataBufferSRV->Release(); if (vertexBufferSRV) vertexBufferSRV->Release(); @@ -63,11 +64,11 @@ D3D11DrawContext::~D3D11DrawContext() D3D11DrawContext * D3D11DrawContext::Create(Far::PatchTables const *patchTables, - int numVertexElements, ID3D11DeviceContext *pd3d11DeviceContext) { - D3D11DrawContext * result = new D3D11DrawContext(); - if (result->create(*patchTables, numVertexElements, pd3d11DeviceContext)) + int maxValence = patchTables->GetMaxValence(); + D3D11DrawContext * result = new D3D11DrawContext(maxValence); + if (result->create(*patchTables, pd3d11DeviceContext)) return result; delete result; @@ -76,7 +77,6 @@ D3D11DrawContext::Create(Far::PatchTables const *patchTables, bool D3D11DrawContext::create(Far::PatchTables const &patchTables, - int numVertexElements, ID3D11DeviceContext *pd3d11DeviceContext) { // adaptive patches @@ -111,12 +111,11 @@ D3D11DrawContext::create(Far::PatchTables const &patchTables, pd3d11DeviceContext->Unmap(patchIndexBuffer, 0); - DrawContext::ConvertPatchArrays(patchTables, _patchArrays, - patchTables.GetMaxValence(), numVertexElements); + DrawContext::ConvertPatchArrays(patchTables, _patchArrays); // allocate and initialize additional buffer data - // create ptex coordinate buffer + // create patch param buffer Far::PatchParamTable const & patchParamTables = patchTables.GetPatchParamTable(); @@ -144,7 +143,7 @@ D3D11DrawContext::create(Far::PatchTables const &patchTables, bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; bd.MiscFlags = 0; bd.StructureByteStride = sizeof(unsigned int); - hr = pd3d11Device->CreateBuffer(&bd, NULL, &ptexCoordinateBuffer); + hr = pd3d11Device->CreateBuffer(&bd, NULL, &patchParamBuffer); if (FAILED(hr)) { return false; } @@ -155,17 +154,18 @@ D3D11DrawContext::create(Far::PatchTables const &patchTables, srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; srvd.Buffer.FirstElement = 0; srvd.Buffer.NumElements = numElements; - hr = pd3d11Device->CreateShaderResourceView(ptexCoordinateBuffer, &srvd, &ptexCoordinateBufferSRV); + hr = pd3d11Device->CreateShaderResourceView( + patchParamBuffer, &srvd, &patchParamBufferSRV); if (FAILED(hr)) { return false; } - hr = pd3d11DeviceContext->Map(ptexCoordinateBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); + hr = pd3d11DeviceContext->Map(patchParamBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedResource); if (FAILED(hr)) { return false; } - unsigned int * ptexBuffer = (unsigned int *) mappedResource.pData; - memcpy(ptexBuffer, values, numElements * elementSize); - pd3d11DeviceContext->Unmap(ptexCoordinateBuffer, 0); + unsigned int *dst = (unsigned int *) mappedResource.pData; + memcpy(dst, values, numElements * elementSize); + pd3d11DeviceContext->Unmap(patchParamBuffer, 0); } // create vertex valence buffer and vertex texture @@ -277,9 +277,9 @@ D3D11DrawContext::SetFVarDataTexture(Far::PatchTables const & patchTables, void D3D11DrawContext::updateVertexTexture(ID3D11Buffer *vbo, - ID3D11DeviceContext *pd3d11DeviceContext, - int numVertices, - int numVertexElements) + ID3D11DeviceContext *pd3d11DeviceContext, + int numVertices, + int numVertexElements) { ID3D11Device *pd3d11Device = NULL; pd3d11DeviceContext->GetDevice(&pd3d11Device); diff --git a/opensubdiv/osd/d3d11DrawContext.h b/opensubdiv/osd/d3d11DrawContext.h index cecdb933..a88bdd13 100644 --- a/opensubdiv/osd/d3d11DrawContext.h +++ b/opensubdiv/osd/d3d11DrawContext.h @@ -29,7 +29,6 @@ #include "../far/patchTables.h" #include "../osd/drawContext.h" -#include "../osd/vertex.h" #include @@ -72,18 +71,14 @@ public: /// /// @param pd3d11DeviceContext A device context /// - /// @param numVertexElements The number of vertex elements - /// static D3D11DrawContext *Create(Far::PatchTables const *patchTables, - int numVertexElements, ID3D11DeviceContext *pd3d11DeviceContext); /// template version for custom context (OpenCL) used by OsdMesh template static D3D11DrawContext *Create(Far::PatchTables const *patchtables, - int numVertexElements, DEVICE_CONTEXT context) { - return Create(patchtables, numVertexElements, context->GetDeviceContext()); + return Create(patchtables, context->GetDeviceContext()); } /// Set vbo as a vertex texture (for gregory patch drawing) @@ -109,8 +104,8 @@ public: ID3D11Buffer *patchIndexBuffer; - ID3D11Buffer *ptexCoordinateBuffer; - ID3D11ShaderResourceView *ptexCoordinateBufferSRV; + ID3D11Buffer *patchParamBuffer; + ID3D11ShaderResourceView *patchParamBufferSRV; ID3D11Buffer *fvarDataBuffer; ID3D11ShaderResourceView *fvarDataBufferSRV; @@ -147,12 +142,11 @@ public: } private: - D3D11DrawContext(); + D3D11DrawContext(int maxValence); // allocate buffers from patchTables bool create(Far::PatchTables const &patchTables, - int numVertexElements, ID3D11DeviceContext *pd3d11DeviceContext); void updateVertexTexture(ID3D11Buffer *vbo, diff --git a/opensubdiv/osd/d3d11DrawRegistry.cpp b/opensubdiv/osd/d3d11DrawRegistry.cpp index b5e50247..da060ccf 100644 --- a/opensubdiv/osd/d3d11DrawRegistry.cpp +++ b/opensubdiv/osd/d3d11DrawRegistry.cpp @@ -47,58 +47,34 @@ D3D11DrawConfig::~D3D11DrawConfig() static const char *commonShaderSource = #include "hlslPatchCommon.gen.h" ; -static const char *ptexShaderSource = -#include "hlslPtexCommon.gen.h" -; static const char *bsplineShaderSource = #include "hlslPatchBSpline.gen.h" ; static const char *gregoryShaderSource = #include "hlslPatchGregory.gen.h" ; -static const char *transitionShaderSource = -#include "hlslPatchTransition.gen.h" -; D3D11DrawRegistryBase::~D3D11DrawRegistryBase() {} D3D11DrawSourceConfig * D3D11DrawRegistryBase::_CreateDrawSourceConfig( - DrawContext::PatchDescriptor const & desc, ID3D11Device * pd3dDevice) + Far::PatchDescriptor const & desc, ID3D11Device * pd3dDevice) { D3D11DrawSourceConfig * sconfig = _NewDrawSourceConfig(); sconfig->commonShader.source = commonShaderSource; - if (IsPtexEnabled()) { - sconfig->commonShader.source += ptexShaderSource; - } - - { - std::ostringstream ss; - ss << (int)desc.GetMaxValence(); - sconfig->commonShader.AddDefine("OSD_MAX_VALENCE", ss.str()); - ss.str(""); - ss << (int)desc.GetNumElements(); - sconfig->commonShader.AddDefine("OSD_NUM_ELEMENTS", ss.str()); - } - switch (desc.GetType()) { case Far::PatchDescriptor::REGULAR: - case Far::PatchDescriptor::BOUNDARY: - case Far::PatchDescriptor::CORNER: sconfig->commonShader.AddDefine("OSD_PATCH_BSPLINE"); sconfig->commonShader.AddDefine("OSD_PATCH_ENABLE_SINGLE_CREASE"); - sconfig->vertexShader.source = - std::string(transitionShaderSource) + bsplineShaderSource; + sconfig->vertexShader.source = bsplineShaderSource; sconfig->vertexShader.target = "vs_5_0"; sconfig->vertexShader.entry = "vs_main_patches"; - sconfig->hullShader.source = - std::string(transitionShaderSource) + bsplineShaderSource; + sconfig->hullShader.source = bsplineShaderSource; sconfig->hullShader.target = "hs_5_0"; sconfig->hullShader.entry = "hs_main_patches"; - sconfig->domainShader.source = - std::string(transitionShaderSource) + bsplineShaderSource; + sconfig->domainShader.source = bsplineShaderSource; sconfig->domainShader.target = "ds_5_0"; sconfig->domainShader.entry = "ds_main_patches"; break; diff --git a/opensubdiv/osd/d3d11DrawRegistry.h b/opensubdiv/osd/d3d11DrawRegistry.h index 6fdd158f..46613bde 100644 --- a/opensubdiv/osd/d3d11DrawRegistry.h +++ b/opensubdiv/osd/d3d11DrawRegistry.h @@ -29,7 +29,6 @@ #include "../far/patchTables.h" #include "../osd/drawRegistry.h" -#include "../osd/vertex.h" #include @@ -81,22 +80,14 @@ struct D3D11DrawSourceConfig { class D3D11DrawRegistryBase { public: - typedef DrawContext::PatchDescriptor DescType; + typedef Far::PatchDescriptor DescType; typedef D3D11DrawConfig ConfigType; typedef D3D11DrawSourceConfig SourceConfigType; - D3D11DrawRegistryBase(bool enablePtex=false) : _enablePtex(enablePtex) { } + D3D11DrawRegistryBase() { } virtual ~D3D11DrawRegistryBase(); - bool IsPtexEnabled() const { - return _enablePtex; - } - - void SetPtexEnabled(bool b) { - _enablePtex=b; - } - protected: virtual ConfigType * _NewDrawConfig() { return new ConfigType(); } virtual ConfigType * @@ -110,14 +101,11 @@ protected: virtual SourceConfigType * _NewDrawSourceConfig() { return new SourceConfigType(); } virtual SourceConfigType * _CreateDrawSourceConfig(DescType const & desc, ID3D11Device * pd3dDevice); - -private: - bool _enablePtex; }; //------------------------------------------------------------------------------ -template class D3D11DrawRegistry : public D3D11DrawRegistryBase { diff --git a/opensubdiv/osd/d3d11PtexTexture.cpp b/opensubdiv/osd/d3d11PtexTexture.cpp deleted file mode 100644 index 78a02dac..00000000 --- a/opensubdiv/osd/d3d11PtexTexture.cpp +++ /dev/null @@ -1,213 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/d3d11PtexTexture.h" -#include "../osd/ptexTextureLoader.h" -#include "../far/error.h" - -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -D3D11PtexTexture::D3D11PtexTexture() - : _width(0), _height(0), _depth(0), _pages(0), _layout(0), _texels(0) { -} - -D3D11PtexTexture::~D3D11PtexTexture() { - - // delete pages lookup --------------------------------- - if (_pages) _pages->Release(); - - // delete layout lookup -------------------------------- - if (_layout) _layout->Release(); - - // delete textures lookup ------------------------------ - if (_texels) _texels->Release(); -} - -static ID3D11Buffer * -genTextureBuffer(ID3D11DeviceContext *deviceContext, int size, void const * data) { - - D3D11_BUFFER_DESC hBufferDesc; - hBufferDesc.ByteWidth = size; - hBufferDesc.Usage = D3D11_USAGE_DYNAMIC; - hBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER | D3D11_BIND_SHADER_RESOURCE; - hBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - hBufferDesc.MiscFlags = 0; - hBufferDesc.StructureByteStride = sizeof(float); - - HRESULT hr; - ID3D11Buffer *buffer; - ID3D11Device *device; - deviceContext->GetDevice(&device); - hr = device->CreateBuffer(&hBufferDesc, NULL, &buffer); - if (FAILED(hr)) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "Fail in CreateBuffer\n"); - return 0; - } - - D3D11_MAPPED_SUBRESOURCE resource; - hr = deviceContext->Map(buffer, 0, - D3D11_MAP_WRITE_DISCARD, 0, &resource); - if (FAILED(hr)) { - Far::Error(Far::FAR_RUNTIME_ERROR, - "Fail in Map buffer\n"); - buffer->Release(); - return 0; - } - memcpy(resource.pData, data, size); - deviceContext->Unmap(buffer, 0); - - return buffer; -} - -D3D11PtexTexture * -D3D11PtexTexture::Create(ID3D11DeviceContext *deviceContext, - PtexTexture * reader, - unsigned long int targetMemory, - int gutterWidth, - int pageMargin) { - - D3D11PtexTexture * result = NULL; - - // Read the ptex data and pack the texels - PtexTextureLoader ldr(reader, gutterWidth, pageMargin); - - unsigned long int nativeSize = ldr.GetNativeUncompressedSize(), - targetSize = targetMemory; - - if (targetSize != 0 && targetSize != nativeSize) - ldr.OptimizeResolution(targetSize); - - int maxnumpages = D3D10_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION; - ldr.OptimizePacking(maxnumpages); - - if (!ldr.GenerateBuffers()) - return result; - - // Setup GPU memory - unsigned long int nfaces = ldr.GetNumBlocks(); - - ID3D11Buffer *pages = genTextureBuffer(deviceContext, - nfaces * sizeof(int), - ldr.GetIndexBuffer()); - - ID3D11Buffer *layout = genTextureBuffer(deviceContext, - nfaces * 4 * sizeof(float), - ldr.GetLayoutBuffer()); - - DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; - int bpp = 0; - int numChannels = reader->numChannels(); - switch (reader->dataType()) { - case Ptex::dt_uint16: - switch (numChannels) { - case 1: format = DXGI_FORMAT_R16_UINT; break; - case 2: format = DXGI_FORMAT_R16G16_UINT; break; - case 3: assert(false); break; - case 4: format = DXGI_FORMAT_R16G16B16A16_UINT; break; - } - bpp = numChannels * 2; - break; - case Ptex::dt_float: - switch (numChannels) { - case 1: format = DXGI_FORMAT_R32_FLOAT; break; - case 2: format = DXGI_FORMAT_R32G32_FLOAT; break; - case 3: format = DXGI_FORMAT_R32G32B32_FLOAT; break; - case 4: format = DXGI_FORMAT_R32G32B32A32_FLOAT; break; - } - bpp = numChannels * 4; - break; - case Ptex::dt_half: - switch (numChannels) { - case 1: format = DXGI_FORMAT_R16_FLOAT; break; - case 2: format = DXGI_FORMAT_R16G16_FLOAT; break; - case 3:assert(false); break; - case 4: format = DXGI_FORMAT_R16G16B16A16_FLOAT; break; - } - bpp = numChannels * 2; - break; - default: - switch (numChannels) { - case 1: format = DXGI_FORMAT_R8_UINT; break; - case 2: format = DXGI_FORMAT_R8G8_UINT; break; - case 3: assert(false); break; - case 4: format = DXGI_FORMAT_R8G8B8A8_UINT; break; - } - bpp = numChannels; - break; - } - - // actual texels texture array - D3D11_TEXTURE2D_DESC desc; - desc.Width = ldr.GetPageSize(); - desc.Height = ldr.GetPageSize(); - desc.MipLevels = 1; - desc.ArraySize = ldr.GetNumPages(); - desc.Format = format; - desc.SampleDesc.Count = 1; - desc.SampleDesc.Quality = 0; - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = 0; - desc.MiscFlags = 0; - - D3D11_SUBRESOURCE_DATA initData; - initData.pSysMem = ldr.GetTexelBuffer(); - initData.SysMemPitch = ldr.GetPageSize() * bpp; - initData.SysMemSlicePitch = ldr.GetPageSize() * ldr.GetPageSize() * bpp; - - ID3D11Device *device; - ID3D11Texture2D *texels; - deviceContext->GetDevice(&device); - HRESULT hr = device->CreateTexture2D(&desc, &initData, &texels); - - ldr.ClearBuffers(); - - // Return the Osd PtexTexture object - result = new D3D11PtexTexture; - - result->_width = ldr.GetPageSize(); - result->_height = ldr.GetPageSize(); - result->_depth = ldr.GetNumPages(); - - result->_format = format; - - result->_pages = pages; - result->_layout = layout; - result->_texels = texels; - - return result; -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/d3d11PtexTexture.h b/opensubdiv/osd/d3d11PtexTexture.h deleted file mode 100644 index b9d2422d..00000000 --- a/opensubdiv/osd/d3d11PtexTexture.h +++ /dev/null @@ -1,104 +0,0 @@ -// -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_D3D11_PTEX_TEXTURE_H -#define OSD_D3D11_PTEX_TEXTURE_H - -#include "../version.h" - -#include "../osd/nonCopyable.h" - -class PtexTexture; -struct ID3D11Buffer; -struct ID3D11Texture2D; -struct ID3D11DeviceContext; - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// D3D11PtexTexture : implements simple support for ptex textures -/// -/// The current implementation declares _texels as a TEXTURE2D_ARRAY of -/// n pages of a resolution that matches that of the largest face in the PTex file. -/// -/// Two TEXTURE_BUFFER constructs are used -/// as lookup tables : -/// * _pages stores the array index in which a given face is located -/// * _layout stores 4 float coordinates : top-left corner and width/height for each face -/// -/// GLSL fragments use SV_PrimitiveID and SV_DomainLocation to access the _pages and _layout -/// indirection tables, which provide then texture coordinates for the texels stored in -/// the _texels texture array. -/// -/// Hbr provides per-face support for a ptex face indexing scheme. D3D11DrawContext -/// class provides ptex face index lookup table as a texture buffer object that -/// can be accessed by HLSL shaders. -/// -class D3D11PtexTexture : NonCopyable { -public: - static D3D11PtexTexture * Create(ID3D11DeviceContext *deviceContext, - PtexTexture * reader, - unsigned long int targetMemory = 0, - int gutterWidth = 0, - int pageMargin = 0); - - /// Returns the texture buffer containing the lookup table associate each ptex - /// face index with its 3D texture page in the texels texture array. - ID3D11Buffer *GetPagesTextureBuffer() const { return _pages; } - - /// Returns the texture buffer containing the layout of the ptex faces in the - /// texels texture array. - ID3D11Buffer *GetLayoutTextureBuffer() const { return _layout; } - - /// Returns the texels texture array. - ID3D11Texture2D *GetTexelsTexture() const { return _texels; } - - ~D3D11PtexTexture(); - -private: - D3D11PtexTexture(); - - int _width, // widht / height / depth of the 3D texel buffer - _height, - _depth; - - int _format; // texel color format - - ID3D11Buffer *_pages, // per-face page indices into the texel array - *_layout; // per-face lookup table - // (vec4 : top-left corner & width / height) - ID3D11Texture2D *_texels; // texel data -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_D3D11_PTEX_TEXTURE_H diff --git a/opensubdiv/osd/d3d11VertexBuffer.h b/opensubdiv/osd/d3d11VertexBuffer.h index c9066c64..0f727801 100644 --- a/opensubdiv/osd/d3d11VertexBuffer.h +++ b/opensubdiv/osd/d3d11VertexBuffer.h @@ -41,7 +41,7 @@ namespace Osd { /// \brief Concrete vertex buffer class for DirectX subvision and DirectX drawing. /// /// D3D11VertexBuffer implements D3D11VertexBufferInterface. An instance -/// of this buffer class can be passed to D3D11ComputeController. +/// of this buffer class can be passed to D3D11ComputeEvaluator. /// class D3D11VertexBuffer { public: diff --git a/opensubdiv/osd/drawContext.cpp b/opensubdiv/osd/drawContext.cpp index 934b9c6a..d274a39e 100644 --- a/opensubdiv/osd/drawContext.cpp +++ b/opensubdiv/osd/drawContext.cpp @@ -37,7 +37,7 @@ DrawContext::~DrawContext() {} void DrawContext::ConvertPatchArrays(Far::PatchTables const &patchTables, - PatchArrayVector &osdPatchArrays, int maxValence, int numElements) { + PatchArrayVector &osdPatchArrays) { int narrays = patchTables.GetNumPatchArrays(); @@ -47,17 +47,16 @@ DrawContext::ConvertPatchArrays(Far::PatchTables const &patchTables, for (int array=0, pidx=0, vidx=0, qidx=0; array FVarData; + protected: static void packPatchVerts(Far::PatchTables const & patchTables, @@ -232,28 +183,10 @@ protected: PatchArrayVector _patchArrays; bool _isAdaptive; + + int _maxValence; }; -// Allows ordering of patches by type -inline bool -DrawContext::PatchDescriptor::operator < ( PatchDescriptor const other ) const -{ - return _farDesc < other._farDesc or (_farDesc == other._farDesc and - (_maxValence < other._maxValence or ((_maxValence == other._maxValence) and - (_numElements < other._numElements)))); -} - -// True if the descriptors are identical -inline bool -DrawContext::PatchDescriptor::operator == ( PatchDescriptor const other ) const -{ - return _farDesc == other._farDesc and - _maxValence == other._maxValence and - _numElements == other._numElements; -} - - - } // end namespace Osd } // end namespace OPENSUBDIV_VERSION diff --git a/opensubdiv/osd/evalLimitContext.cpp b/opensubdiv/osd/evalLimitContext.cpp deleted file mode 100644 index 5e133171..00000000 --- a/opensubdiv/osd/evalLimitContext.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/evalLimitContext.h" -#include "../osd/vertexDescriptor.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -EvalLimitContext::EvalLimitContext(Far::PatchTables const & patchTables) { - - _adaptive = patchTables.IsFeatureAdaptive(); -} - -EvalLimitContext::~EvalLimitContext() { -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/evalLimitContext.h b/opensubdiv/osd/evalLimitContext.h deleted file mode 100644 index 0e2c90f5..00000000 --- a/opensubdiv/osd/evalLimitContext.h +++ /dev/null @@ -1,101 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_EVAL_LIMIT_CONTEXT_H -#define OSD_EVAL_LIMIT_CONTEXT_H - -#include "../version.h" - -#include "../far/patchTables.h" - -#include "../osd/nonCopyable.h" -#include "../osd/vertex.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - - -/// \brief Coordinates set on a limit surface -/// -struct LimitLocation { - - LimitLocation() { } - - /// \brief Constructor - /// - /// @param f Ptex face id - /// - /// @param x parametric location on face - /// - /// @param y parametric location on face - /// - LimitLocation(int f, float x, float y) : ptexIndex(f), s(x), t(y) { } - - int ptexIndex; ///< ptex face index - - float s, t; ///< parametric location on face -}; - -class LimitLocationsArray { - -public: - - /// \brief Constructor - LimitLocationsArray() : ptexIndex(-1), numLocations(0), s(0), t(0) { } - - int ptexIndex, ///< ptex face index - numLocations; ///< number of (u,v) coordinates in the array - - float const * s, ///< array of u coordinates - * t; ///< array of v coordinates -}; - - -/// \brief LimitEval Context -/// -/// A stub class to derive LimitEval context classes. -/// -class EvalLimitContext : private NonCopyable { - -public: - /// \brief Destructor. - virtual ~EvalLimitContext(); - -protected: - explicit EvalLimitContext(Far::PatchTables const & patchTables); - -private: - bool _adaptive; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif /* OSD_EVAL_LIMIT_CONTEXT_H */ diff --git a/opensubdiv/osd/glComputeEvaluator.cpp b/opensubdiv/osd/glComputeEvaluator.cpp new file mode 100644 index 00000000..387c84fb --- /dev/null +++ b/opensubdiv/osd/glComputeEvaluator.cpp @@ -0,0 +1,224 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/glComputeEvaluator.h" + +#include +#include +#include +#include + +#include "../far/error.h" +#include "../far/stencilTables.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +static const char *shaderSource = +#include "../osd/glslComputeKernel.gen.h" +; + +template GLuint +createSSBO(std::vector const & src) { + GLuint devicePtr = 0; + glGenBuffers(1, &devicePtr); + +#if defined(GL_EXT_direct_state_access) + if (glNamedBufferDataEXT) { + glNamedBufferDataEXT(devicePtr, src.size()*sizeof(T), + &src.at(0), GL_STATIC_DRAW); + } else { +#else + { +#endif + GLint prev = 0; + glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &prev); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr); + glBufferData(GL_SHADER_STORAGE_BUFFER, src.size()*sizeof(T), + &src.at(0), GL_STATIC_DRAW); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, prev); + } + + return devicePtr; +} + +GLStencilTablesSSBO::GLStencilTablesSSBO( + Far::StencilTables const *stencilTables) { + _numStencils = stencilTables->GetNumStencils(); + if (_numStencils > 0) { + _sizes = createSSBO(stencilTables->GetSizes()); + _offsets = createSSBO(stencilTables->GetOffsets()); + _indices = createSSBO(stencilTables->GetControlIndices()); + _weights = createSSBO(stencilTables->GetWeights()); + } else { + _sizes = _offsets = _indices = _weights = 0; + } +} + +GLStencilTablesSSBO::~GLStencilTablesSSBO() { + if (_sizes) glDeleteBuffers(1, &_sizes); + if (_offsets) glDeleteBuffers(1, &_offsets); + if (_weights) glDeleteBuffers(1, &_weights); + if (_indices) glDeleteBuffers(1, &_indices); +} + +// --------------------------------------------------------------------------- + + +GLComputeEvaluator::GLComputeEvaluator() : + _program(0), _workGroupSize(64) { +} + +GLComputeEvaluator::~GLComputeEvaluator() { + if (_program) { + glDeleteProgram(_program); + } +} + +bool +GLComputeEvaluator::Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc) { + if (srcDesc.length > dstDesc.length) { + Far::Error(Far::FAR_RUNTIME_ERROR, + "srcDesc length must be less than or equal to " + "dstDesc length.\n"); + return false; + } + + if (_program) { + glDeleteProgram(_program); + _program = 0; + } + _program = glCreateProgram(); + + GLuint shader = glCreateShader(GL_COMPUTE_SHADER); + + std::ostringstream defines; + defines << "#define LENGTH " << srcDesc.length << "\n" + << "#define SRC_STRIDE " << srcDesc.stride << "\n" + << "#define DST_STRIDE " << dstDesc.stride << "\n" + << "#define WORK_GROUP_SIZE " << _workGroupSize << "\n"; + std::string defineStr = defines.str(); + + const char *shaderSources[3] = {"#version 430\n", 0, 0}; + shaderSources[1] = defineStr.c_str(); + shaderSources[2] = shaderSource; + glShaderSource(shader, 3, shaderSources, NULL); + glCompileShader(shader); + glAttachShader(_program, shader); + + GLint linked = 0; + glLinkProgram(_program); + glGetProgramiv(_program, GL_LINK_STATUS, &linked); + + if (linked == GL_FALSE) { + char buffer[1024]; + glGetShaderInfoLog(shader, 1024, NULL, buffer); + Far::Error(Far::FAR_RUNTIME_ERROR, buffer); + + glGetProgramInfoLog(_program, 1024, NULL, buffer); + Far::Error(Far::FAR_RUNTIME_ERROR, buffer); + + glDeleteProgram(_program); + _program = 0; + return false; + } + + glDeleteShader(shader); + + // store uniform locations for the compute kernel program. + _uniformSizes = glGetUniformLocation(_program, "stencilSizes"); + _uniformOffsets = glGetUniformLocation(_program, "stencilOffsets"); + _uniformIndices = glGetUniformLocation(_program, "stencilIndices"); + _uniformWeights = glGetUniformLocation(_program, "stencilIWeights"); + + _uniformStart = glGetUniformLocation(_program, "batchStart"); + _uniformEnd = glGetUniformLocation(_program, "batchEnd"); + + _uniformSrcOffset = glGetUniformLocation(_program, "srcOffset"); + _uniformDstOffset = glGetUniformLocation(_program, "dstOffset"); + + return true; +} + +/* static */ +void +GLComputeEvaluator::Synchronize(void * /*kernel*/) { + // XXX: this is currently just for the performance measuring purpose. + // need to be reimplemented by fence and sync. + glFinish(); +} + +bool +GLComputeEvaluator::EvalStencils(GLuint srcBuffer, + VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, + VertexBufferDescriptor const &dstDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + int start, + int end) const { + if (!_program) return false; + int count = end - start; + if (count <= 0) { + return true; + } + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, srcBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, dstBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, sizesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, offsetsBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, indicesBuffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, weightsBuffer); + + glUseProgram(_program); + + glUniform1i(_uniformStart, start); + glUniform1i(_uniformEnd, end); + glUniform1i(_uniformSrcOffset, srcDesc.offset); + glUniform1i(_uniformDstOffset, dstDesc.offset); + + glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1); + + glUseProgram(0); + + glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0); + + return true; +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glComputeEvaluator.h b/opensubdiv/osd/glComputeEvaluator.h new file mode 100644 index 00000000..bea48d4c --- /dev/null +++ b/opensubdiv/osd/glComputeEvaluator.h @@ -0,0 +1,215 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_GL_COMPUTE_EVALUATOR_H +#define OPENSUBDIV_OSD_GL_COMPUTE_EVALUATOR_H + +#include "../version.h" + +#include "../osd/opengl.h" +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far { + class StencilTables; +} + +namespace Osd { + +/// \brief GL stencil tables (Shader Storage buffer) +/// +/// This class is a GLSL SSBO representation of Far::StencilTables. +/// +/// GLSLComputeKernel consumes this table to apply stencils +/// +class GLStencilTablesSSBO { +public: + static GLStencilTablesSSBO *Create(Far::StencilTables const *stencilTables, + void *deviceContext = NULL) { + (void)deviceContext; // unused + return new GLStencilTablesSSBO(stencilTables); + } + + explicit GLStencilTablesSSBO(Far::StencilTables const *stencilTables); + ~GLStencilTablesSSBO(); + + // interfaces needed for GLSLComputeKernel + GLuint GetSizesBuffer() const { return _sizes; } + GLuint GetOffsetsBuffer() const { return _offsets; } + GLuint GetIndicesBuffer() const { return _indices; } + GLuint GetWeightsBuffer() const { return _weights; } + int GetNumStencils() const { return _numStencils; } + +private: + GLuint _sizes; + GLuint _offsets; + GLuint _indices; + GLuint _weights; + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +class GLComputeEvaluator { +public: + typedef bool Instantiatable; + static GLComputeEvaluator * Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + void * deviceContext = NULL) { + (void)deviceContext; // not used + GLComputeEvaluator *instance = new GLComputeEvaluator(); + if (instance->Compile(srcDesc, dstDesc)) return instance; + delete instance; + return NULL; + } + + /// Constructor. + GLComputeEvaluator(); + + /// Destructor. note that the GL context must be made current. + ~GLComputeEvaluator(); + + /// \brief Generic static compute function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// SSBO interfaces. + /// + /// @param evaluator cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLSL kernel + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + GLComputeEvaluator const *instance, + void * deviceContext = NULL) { + if (instance) { + return instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + } else { + // Create a kernel on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc); + if (instance) { + bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// Dispatch the GLSL compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + template + bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const { + return EvalStencils(srcVertexBuffer->BindVBO(), + srcDesc, + dstVertexBuffer->BindVBO(), + dstDesc, + stencilTable->GetSizesBuffer(), + stencilTable->GetOffsetsBuffer(), + stencilTable->GetIndicesBuffer(), + stencilTable->GetWeightsBuffer(), + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils()); + } + + /// Dispatch the GLSL compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + bool EvalStencils(GLuint srcBuffer, + VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, + VertexBufferDescriptor const &dstDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + int start, + int end) const; + + /// Configure GLSL kernel. A valid GL context must be made current before + /// calling this function. Returns false if it fails to compile the kernel. + bool Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc); + + /// Wait the dispatched kernel finishes. + static void Synchronize(void *deviceContext); + +private: + GLuint _program; + + GLuint _uniformSizes, // stencil tables + _uniformOffsets, + _uniformIndices, + _uniformWeights, + + _uniformStart, // range + _uniformEnd, + + _uniformSrcOffset, // src buffer offset (in elements) + _uniformDstOffset; // dst buffer offset (in elements) + + int _workGroupSize; +}; + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_GL_COMPUTE_EVALUATOR_H diff --git a/opensubdiv/osd/glDrawContext.cpp b/opensubdiv/osd/glDrawContext.cpp index cff149de..e637b340 100644 --- a/opensubdiv/osd/glDrawContext.cpp +++ b/opensubdiv/osd/glDrawContext.cpp @@ -32,7 +32,8 @@ namespace OPENSUBDIV_VERSION { namespace Osd { -GLDrawContext::GLDrawContext() : +GLDrawContext::GLDrawContext(int maxValence) : + DrawContext(maxValence), _patchIndexBuffer(0), _patchParamTextureBuffer(0), _fvarDataTextureBuffer(0), _vertexTextureBuffer(0), _vertexValenceTextureBuffer(0), _quadOffsetsTextureBuffer(0) { @@ -98,14 +99,14 @@ createTextureBuffer(T const &data, GLint format, int offset=0) } GLDrawContext * -GLDrawContext::Create(Far::PatchTables const * patchTables, - int numVertexElements, void * /*deviceContext*/) { +GLDrawContext::Create(Far::PatchTables const * patchTables, void * /*deviceContext*/) { if (patchTables) { - GLDrawContext * result = new GLDrawContext(); + int maxValence = patchTables->GetMaxValence(); + GLDrawContext * result = new GLDrawContext(maxValence); - if (result->create(*patchTables, numVertexElements)) { + if (result->create(*patchTables)) { return result; } else { delete result; @@ -115,7 +116,7 @@ GLDrawContext::Create(Far::PatchTables const * patchTables, } bool -GLDrawContext::create(Far::PatchTables const & patchTables, int numVertexElements) { +GLDrawContext::create(Far::PatchTables const & patchTables) { _isAdaptive = patchTables.IsFeatureAdaptive(); @@ -138,8 +139,7 @@ GLDrawContext::create(Far::PatchTables const & patchTables, int numVertexElement glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); } - DrawContext::ConvertPatchArrays(patchTables, _patchArrays, - patchTables.GetMaxValence(), numVertexElements); + DrawContext::ConvertPatchArrays(patchTables, _patchArrays); // allocate and initialize additional buffer data diff --git a/opensubdiv/osd/glDrawContext.h b/opensubdiv/osd/glDrawContext.h index e580107d..0ed7e4e6 100644 --- a/opensubdiv/osd/glDrawContext.h +++ b/opensubdiv/osd/glDrawContext.h @@ -58,10 +58,9 @@ public: /// /// @param patchTables a valid set of Far::PatchTables /// - /// @param numVertexElements the number of vertex elements + /// @param deviceContext not used in GLDrawContext /// static GLDrawContext * Create(Far::PatchTables const * patchTables, - int numVertexElements, void *deviceContext = NULL); /// Set vbo as a vertex texture (for gregory patch drawing) @@ -137,10 +136,10 @@ protected: GLuint _vertexValenceTextureBuffer; GLuint _quadOffsetsTextureBuffer; - GLDrawContext(); + GLDrawContext(int maxValence); // allocate buffers from patchTables - bool create(Far::PatchTables const & patchTables, int numElements); + bool create(Far::PatchTables const & patchTables); void updateVertexTexture(GLuint vbo); }; diff --git a/opensubdiv/osd/glDrawRegistry.cpp b/opensubdiv/osd/glDrawRegistry.cpp index 6e5d19ff..7ab446b2 100644 --- a/opensubdiv/osd/glDrawRegistry.cpp +++ b/opensubdiv/osd/glDrawRegistry.cpp @@ -43,9 +43,6 @@ GLDrawConfig::~GLDrawConfig() static const char *commonShaderSource = #include "glslPatchCommon.gen.h" ; -static const char *ptexShaderSource = -#include "glslPtexCommon.gen.h" -; static const char *bsplineShaderSource = #include "glslPatchBSpline.gen.h" ; @@ -55,50 +52,29 @@ static const char *gregoryShaderSource = static const char *gregoryBasisShaderSource = #include "glslPatchGregoryBasis.gen.h" ; -static const char *transitionShaderSource = -#include "glslPatchTransition.gen.h" -; #endif GLDrawRegistryBase::~GLDrawRegistryBase() {} #if defined(GL_ARB_tessellation_shader) || defined(GL_VERSION_4_0) GLDrawSourceConfig * -GLDrawRegistryBase::_CreateDrawSourceConfig( - DrawContext::PatchDescriptor const & desc) +GLDrawRegistryBase::_CreateDrawSourceConfig(Far::PatchDescriptor const & desc) { GLDrawSourceConfig * sconfig = _NewDrawSourceConfig(); sconfig->commonShader.source = commonShaderSource; - if (IsPtexEnabled()) { - sconfig->commonShader.source += ptexShaderSource; - } - - { - std::ostringstream ss; - ss << (int)desc.GetMaxValence(); - sconfig->commonShader.AddDefine("OSD_MAX_VALENCE", ss.str()); - ss.str(""); - ss << (int)desc.GetNumElements(); - sconfig->commonShader.AddDefine("OSD_NUM_ELEMENTS", ss.str()); - } - switch (desc.GetType()) { case Far::PatchDescriptor::REGULAR: - case Far::PatchDescriptor::BOUNDARY: - case Far::PatchDescriptor::CORNER: sconfig->commonShader.AddDefine("OSD_PATCH_BSPLINE"); sconfig->commonShader.AddDefine("OSD_PATCH_ENABLE_SINGLE_CREASE"); sconfig->vertexShader.source = bsplineShaderSource; sconfig->vertexShader.version = "#version 410\n"; sconfig->vertexShader.AddDefine("OSD_PATCH_VERTEX_BSPLINE_SHADER"); - sconfig->tessControlShader.source = - std::string(transitionShaderSource) + bsplineShaderSource; + sconfig->tessControlShader.source = bsplineShaderSource; sconfig->tessControlShader.version = "#version 410\n"; sconfig->tessControlShader.AddDefine("OSD_PATCH_TESS_CONTROL_BSPLINE_SHADER"); - sconfig->tessEvalShader.source = - std::string(transitionShaderSource) + bsplineShaderSource; + sconfig->tessEvalShader.source = bsplineShaderSource; sconfig->tessEvalShader.version = "#version 410\n"; sconfig->tessEvalShader.AddDefine("OSD_PATCH_TESS_EVAL_BSPLINE_SHADER"); break; @@ -148,8 +124,7 @@ GLDrawRegistryBase::_CreateDrawSourceConfig( } #else GLDrawSourceConfig * -GLDrawRegistryBase::_CreateDrawSourceConfig( - DrawContext::PatchDescriptor const &) +GLDrawRegistryBase::_CreateDrawSourceConfig(Far::PatchDescriptor const &) { return _NewDrawSourceConfig(); } @@ -202,7 +177,7 @@ _CompileShader( GLDrawConfig * GLDrawRegistryBase::_CreateDrawConfig( - DrawContext::PatchDescriptor const & /* desc */, + Far::PatchDescriptor const & /* desc */, GLDrawSourceConfig const * sconfig) { assert(sconfig); diff --git a/opensubdiv/osd/glDrawRegistry.h b/opensubdiv/osd/glDrawRegistry.h index 3e1857c0..452e2d17 100644 --- a/opensubdiv/osd/glDrawRegistry.h +++ b/opensubdiv/osd/glDrawRegistry.h @@ -28,8 +28,6 @@ #include "../version.h" #include "../osd/drawRegistry.h" -#include "../osd/vertex.h" - #include "../osd/opengl.h" #include @@ -65,22 +63,14 @@ struct GLDrawSourceConfig : public DrawSourceConfig { class GLDrawRegistryBase { public: - typedef DrawContext::PatchDescriptor DescType; + typedef Far::PatchDescriptor DescType; typedef GLDrawConfig ConfigType; typedef GLDrawSourceConfig SourceConfigType; - GLDrawRegistryBase(bool enablePtex=false) : _enablePtex(enablePtex) { } + GLDrawRegistryBase() { } virtual ~GLDrawRegistryBase(); - bool IsPtexEnabled() const { - return _enablePtex; - } - - void SetPtexEnabled(bool b) { - _enablePtex=b; - } - protected: virtual ConfigType * _NewDrawConfig() { return new ConfigType(); @@ -92,16 +82,13 @@ protected: virtual SourceConfigType * _NewDrawSourceConfig() { return new SourceConfigType(); } - - virtual SourceConfigType * _CreateDrawSourceConfig(DescType const & desc); -private: - bool _enablePtex; + virtual SourceConfigType * _CreateDrawSourceConfig(DescType const & desc); }; //------------------------------------------------------------------------------ -template diff --git a/opensubdiv/osd/glPtexTexture.cpp b/opensubdiv/osd/glPtexTexture.cpp deleted file mode 100644 index 0e0df35e..00000000 --- a/opensubdiv/osd/glPtexTexture.cpp +++ /dev/null @@ -1,173 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/glPtexTexture.h" -#include "../osd/ptexTextureLoader.h" - -#include "../osd/opengl.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -GLPtexTexture::GLPtexTexture() - : _width(0), _height(0), _depth(0), _pages(0), _layout(0), _texels(0) { -} - -GLPtexTexture::~GLPtexTexture() { - - // delete pages lookup --------------------------------- - if (glIsTexture(_pages)) - glDeleteTextures(1, &_pages); - - // delete layout lookup -------------------------------- - if (glIsTexture(_layout)) - glDeleteTextures(1, &_layout); - - // delete textures lookup ------------------------------ - if (glIsTexture(_texels)) - glDeleteTextures(1, &_texels); -} - -static GLuint -genTextureBuffer(GLenum format, GLsizeiptr size, GLvoid const * data) { - - GLuint buffer, result; - glGenBuffers(1, &buffer); - glBindBuffer(GL_TEXTURE_BUFFER, buffer); - glBufferData(GL_TEXTURE_BUFFER, size, data, GL_STATIC_DRAW); - - glGenTextures(1, & result); - glBindTexture(GL_TEXTURE_BUFFER, result); - glTexBuffer(GL_TEXTURE_BUFFER, format, buffer); - - // need to reset texture binding before deleting the source buffer. - glBindTexture(GL_TEXTURE_BUFFER, 0); - glDeleteBuffers(1, &buffer); - - return result; -} - -GLPtexTexture * -GLPtexTexture::Create(PtexTexture * reader, - unsigned long int targetMemory, - int gutterWidth, - int pageMargin) { - - GLPtexTexture * result = NULL; - - // Read the ptexture data and pack the texels - PtexTextureLoader ldr(reader, gutterWidth, pageMargin); - - unsigned long int nativeSize = ldr.GetNativeUncompressedSize(), - targetSize = targetMemory; - - if (targetSize != 0 && targetSize != nativeSize) - ldr.OptimizeResolution(targetSize); - - GLint maxnumpages = 0; - glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxnumpages); - - ldr.OptimizePacking(maxnumpages); - - if (!ldr.GenerateBuffers()) - return result; - - // Setup GPU memory - unsigned long int nfaces = ldr.GetNumBlocks(); - - GLuint pages = genTextureBuffer(GL_R32I, - nfaces * sizeof(GLint), - ldr.GetIndexBuffer()); - - GLuint layout = genTextureBuffer(GL_RGBA32F, - nfaces * 4 * sizeof(GLfloat), - ldr.GetLayoutBuffer()); - - GLenum format, type; - switch (reader->dataType()) { - case Ptex::dt_uint16 : type = GL_UNSIGNED_SHORT; break; - case Ptex::dt_float : type = GL_FLOAT; break; - case Ptex::dt_half : type = GL_HALF_FLOAT; break; - default : type = GL_UNSIGNED_BYTE; break; - } - - switch (reader->numChannels()) { - case 1 : format = GL_RED; break; - case 2 : format = GL_RG; break; - case 3 : format = GL_RGB; break; - case 4 : format = GL_RGBA; break; - default: format = GL_RED; break; - } - - // actual texels texture array - GLuint texels; - glGenTextures(1, &texels); - glBindTexture(GL_TEXTURE_2D_ARRAY, texels); - - // XXXX for the time being, filtering is off - once cross-patch filtering - // is in place, we will use glGenSamplers to dynamically access these settings. - if (gutterWidth > 0) { - glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - } else { - glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - } - glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, - (type == GL_FLOAT) ? GL_RGBA32F : GL_RGBA, - ldr.GetPageSize(), - ldr.GetPageSize(), - ldr.GetNumPages(), - 0, format, type, - ldr.GetTexelBuffer()); - - ldr.ClearBuffers(); - - // Return the Osd Ptexture object - result = new GLPtexTexture; - - result->_width = ldr.GetPageSize(); - result->_height = ldr.GetPageSize(); - result->_depth = ldr.GetNumPages(); - - result->_format = format; - - result->_pages = pages; - result->_layout = layout; - result->_texels = texels; - - return result; -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glPtexTexture.h b/opensubdiv/osd/glPtexTexture.h deleted file mode 100644 index 6ed21949..00000000 --- a/opensubdiv/osd/glPtexTexture.h +++ /dev/null @@ -1,100 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_GL_PTEX_TEXTURE_H -#define OSD_GL_PTEX_TEXTURE_H - -#include "../version.h" - -#include "../osd/nonCopyable.h" - -#include "../osd/opengl.h" - -class PtexTexture; - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// GLPTexture : implements simple support for ptex textures -/// -/// The current implementation declares _texels as a GL_TEXTURE_2D_ARRAY of -/// n pages of a resolution that matches that of the largest face in the PTex file. -/// -/// Two GL_TEXTURE_BUFFER constructs are used -/// as lookup tables : -/// * _pages stores the array index in which a given face is located -/// * _layout stores 4 float coordinates : top-left corner and width/height for each face -/// -/// GLSL fragments use gl_PrimitiveID and gl_TessCoords to access the _pages and _layout -/// indirection tables, which provide then texture coordinates for the texels stored in -/// the _texels texture array. -/// -/// Hbr provides per-face support for a ptex face indexing scheme. OsdGLDrawContext -/// class provides ptex face index lookup table as a texture buffer object that -/// can be accessed by GLSL shaders. -/// -class GLPtexTexture : NonCopyable { -public: - static GLPtexTexture * Create(PtexTexture * reader, - unsigned long int targetMemory = 0, - int gutterWidth = 0, - int pageMargin = 0); - - /// Returns the texture buffer containing the lookup table associate each ptex - /// face index with its 3D texture page in the texels texture array. - GLuint GetPagesTextureBuffer() const { return _pages; } - - /// Returns the texture buffer containing the layout of the ptex faces in the - /// texels texture array. - GLuint GetLayoutTextureBuffer() const { return _layout; } - - /// Returns the texels texture array. - GLuint GetTexelsTexture() const { return _texels; } - - ~GLPtexTexture(); - -private: - GLPtexTexture(); - - GLsizei _width, // widht / height / depth of the 3D texel buffer - _height, - _depth; - - GLint _format; // texel color format - - GLuint _pages, // per-face page indices into the texel array - _layout, // per-face lookup table (vec4 : top-left corner & width / height) - _texels; // texel data -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_GL_PTEX_TEXTURE_H diff --git a/opensubdiv/osd/glVertexBuffer.h b/opensubdiv/osd/glVertexBuffer.h index 67709a60..37ee8917 100644 --- a/opensubdiv/osd/glVertexBuffer.h +++ b/opensubdiv/osd/glVertexBuffer.h @@ -39,8 +39,7 @@ namespace Osd { /// \brief Concrete vertex buffer class for GLSL subvision and OpenGL drawing. /// /// GLVertexBuffer implements GLVertexBufferInterface. An instance -/// of this buffer class can be passed to OsdGLComputeController -/// and OsdGLDrawController +/// of this buffer class can be passed to OsdGLComputeEvaluator. /// class GLVertexBuffer { public: diff --git a/opensubdiv/osd/glXFBEvaluator.cpp b/opensubdiv/osd/glXFBEvaluator.cpp new file mode 100644 index 00000000..14ef745b --- /dev/null +++ b/opensubdiv/osd/glXFBEvaluator.cpp @@ -0,0 +1,353 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#include "../osd/glXFBEvaluator.h" + +#include +#include +#include +#include + +#include "../far/error.h" +#include "../far/stencilTables.h" + +#if _MSC_VER + #define snprintf _snprintf +#endif + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +static const char *shaderSource = +#include "../osd/glslXFBKernel.gen.h" +; + +template GLuint +createGLTextureBuffer(std::vector const & src, GLenum type) { + GLint size = static_cast(src.size()*sizeof(T)); + void const * ptr = &src.at(0); + + GLuint buffer; + glGenBuffers(1, &buffer); + + GLuint devicePtr; + glGenTextures(1, &devicePtr); + +#if defined(GL_EXT_direct_state_access) + if (glNamedBufferDataEXT && glTextureBufferEXT) { + glNamedBufferDataEXT(buffer, size, ptr, GL_STATIC_DRAW); + glTextureBufferEXT(devicePtr, GL_TEXTURE_BUFFER, type, buffer); + } else { +#else + { +#endif + GLint prev = 0; + + glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &prev); + glBindBuffer(GL_ARRAY_BUFFER, buffer); + glBufferData(GL_ARRAY_BUFFER, size, ptr, GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, prev); + + glGetIntegerv(GL_TEXTURE_BINDING_BUFFER, &prev); + glBindTexture(GL_TEXTURE_BUFFER, devicePtr); + glTexBuffer(GL_TEXTURE_BUFFER, type, buffer); + glBindTexture(GL_TEXTURE_BUFFER, prev); + } + + glDeleteBuffers(1, &buffer); + + return devicePtr; +} + +GLStencilTablesTBO::GLStencilTablesTBO( + Far::StencilTables const *stencilTables) { + + _numStencils = stencilTables->GetNumStencils(); + if (_numStencils > 0) { + _sizes = createGLTextureBuffer(stencilTables->GetSizes(), GL_R8UI); + _offsets = createGLTextureBuffer( + stencilTables->GetOffsets(), GL_R32I); + _indices = createGLTextureBuffer( + stencilTables->GetControlIndices(), GL_R32I); + _weights = createGLTextureBuffer(stencilTables->GetWeights(), GL_R32F); + } else { + _sizes = _offsets = _indices = _weights = 0; + } +} + +GLStencilTablesTBO::~GLStencilTablesTBO() { + if (_sizes) glDeleteTextures(1, &_sizes); + if (_offsets) glDeleteTextures(1, &_offsets); + if (_weights) glDeleteTextures(1, &_weights); + if (_indices) glDeleteTextures(1, &_indices); +} + +// --------------------------------------------------------------------------- + + +GLXFBEvaluator::GLXFBEvaluator() : + _program(0), _srcBufferTexture(0), + _uniformSrcBufferTexture(0), _uniformSizesTexture(0), + _uniformOffsetsTexture(0), _uniformIndicesTexture(0), + _uniformWeightsTexture(0), _uniformStart(0), _uniformEnd(0), + _uniformSrcOffset(0) { +} + +GLXFBEvaluator::~GLXFBEvaluator() { + if (_program) { + glDeleteProgram(_program); + } + if (_srcBufferTexture) { + glDeleteTextures(1, &_srcBufferTexture); + } +} + +bool +GLXFBEvaluator::Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc) { + if (_program) { + glDeleteProgram(_program); + _program = 0; + } + _program = glCreateProgram(); + + GLuint shader = glCreateShader(GL_VERTEX_SHADER); + + std::ostringstream defines; + defines << "#define LENGTH " << srcDesc.length << "\n" + << "#define SRC_STRIDE " << srcDesc.stride << "\n"; + std::string defineStr = defines.str(); + + const char *shaderSources[3] = {"#version 410\n", NULL, NULL}; + + shaderSources[1] = defineStr.c_str(); + shaderSources[2] = shaderSource; + glShaderSource(shader, 3, shaderSources, NULL); + glCompileShader(shader); + glAttachShader(_program, shader); + + std::vector outputs; + std::vector pOutputs; + { + // vertex data (may include custom vertex data) and varying data + // are stored into the same buffer, interleaved. + // + // (gl_SkipComponents1) + // outVertexData[0] + // outVertexData[1] + // outVertexData[2] + // (gl_SkipComponents1) + // + // note that "primvarOffset" in shader is still needed to read + // interleaved components even if gl_SkipComponents is used. + // + char attrName[32]; + int primvarOffset = (dstDesc.offset % dstDesc.stride); + for (int i = 0; i < primvarOffset; ++i) { + outputs.push_back("gl_SkipComponents1"); + } + for (int i = 0; i < dstDesc.length; ++i) { + snprintf(attrName, sizeof(attrName), "outVertexBuffer[%d]", i); + outputs.push_back(attrName); + } + for (int i = primvarOffset + dstDesc.length; i < dstDesc.stride; ++i) { + outputs.push_back("gl_SkipComponents1"); + } + + // convert to char* array + for (size_t i = 0; i < outputs.size(); ++i) { + pOutputs.push_back(&outputs[i][0]); + } + } + + glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(), + &pOutputs[0], GL_INTERLEAVED_ATTRIBS); + + GLint linked = 0; + glLinkProgram(_program); + glGetProgramiv(_program, GL_LINK_STATUS, &linked); + + if (linked == GL_FALSE) { + char buffer[1024]; + glGetShaderInfoLog(shader, 1024, NULL, buffer); + Far::Error(Far::FAR_RUNTIME_ERROR, buffer); + + glGetProgramInfoLog(_program, 1024, NULL, buffer); + Far::Error(Far::FAR_RUNTIME_ERROR, buffer); + + glDeleteProgram(_program); + _program = 0; + return false; + } + + glDeleteShader(shader); + + // set uniform locations for compute kernels + _uniformSrcBufferTexture = glGetUniformLocation(_program, "vertexBuffer"); + + _uniformSizesTexture = glGetUniformLocation(_program, "sizes"); + _uniformOffsetsTexture = glGetUniformLocation(_program, "offsets"); + _uniformIndicesTexture = glGetUniformLocation(_program, "indices"); + _uniformWeightsTexture = glGetUniformLocation(_program, "weights"); + + _uniformStart = glGetUniformLocation(_program, "batchStart"); + _uniformEnd = glGetUniformLocation(_program, "batchEnd"); + + _uniformSrcOffset = glGetUniformLocation(_program, "srcOffset"); + + // create a texture for input buffer + if (!_srcBufferTexture) { + glGenTextures(1, &_srcBufferTexture); + } + return true; +} + +/* static */ +void +GLXFBEvaluator::Synchronize(void * /*kernel*/) { + // XXX: this is currently just for the test purpose. + // need to be reimplemented by fence and sync. + glFinish(); +} + +static void +bindTexture(GLint sampler, GLuint texture, int unit) { + if (sampler == -1) { + return; + } + glUniform1i(sampler, unit); + glActiveTexture(GL_TEXTURE0 + unit); + glBindTexture(GL_TEXTURE_BUFFER, texture); + glActiveTexture(GL_TEXTURE0); +} + +bool +GLXFBEvaluator::EvalStencils(GLuint srcBuffer, + VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, + VertexBufferDescriptor const &dstDesc, + GLuint sizesTexture, + GLuint offsetsTexture, + GLuint indicesTexture, + GLuint weightsTexture, + int start, + int end) const { + if (!_program) return false; + int count = end - start; + if (count <= 0) { + return true; + } + + // bind vertex array + // always create new one, to be safe with multiple contexts (slow though) + GLuint vao = 0; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + + glEnable(GL_RASTERIZER_DISCARD); + glUseProgram(_program); + + // Set input VBO as a texture buffer. + glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture); + glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer); + glBindTexture(GL_TEXTURE_BUFFER, 0); + + bindTexture(_uniformSrcBufferTexture, _srcBufferTexture, 0); + + // bind stencil tables textures. + bindTexture(_uniformSizesTexture, sizesTexture, 1); + bindTexture(_uniformOffsetsTexture, offsetsTexture, 2); + bindTexture(_uniformIndicesTexture, indicesTexture, 3); + bindTexture(_uniformWeightsTexture, weightsTexture, 4); + + // set batch range + glUniform1i(_uniformStart, start); + glUniform1i(_uniformEnd, end); + glUniform1i(_uniformSrcOffset, srcDesc.offset); + + // The destination buffer is bound at vertex boundary. + // + // Example: When we have a batched and interleaved vertex buffer + // + // Obj X | Obj Y | + // -----------+-------------------------------------------+------- + // | vtx 0 | vtx 1 | | + // -----------+---------------+---------------+-----------+------- + // | x y z r g b a | x y z r g b a | .... | + // -----------+---------------+---------------+-----------+------- + // ^ + // srcDesc.offset for Obj Y color + // + // ^-------------------------------------------^ + // XFB destination buffer range + // S S S * * * * + // k k k + // i i i + // p p p + // + // We use gl_SkipComponents to skip the first 3 XYZ so the + // buffer itself needs to be bound for entire section of ObjY. + // + // Note that for the source buffer (texture) we bind the whole + // buffer (all VBO range) and use srcOffset=srcDesc.offset for + // indexing. + // + int dstBufferBindOffset = + dstDesc.offset - (dstDesc.offset % dstDesc.stride); + + // bind destination buffer + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, + 0, dstBuffer, + dstBufferBindOffset * sizeof(float), + count * dstDesc.stride * sizeof(float)); + + glBeginTransformFeedback(GL_POINTS); + glDrawArrays(GL_POINTS, 0, count); + glEndTransformFeedback(); + + glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0); + + for (int i = 0; i < 5; ++i) { + glActiveTexture(GL_TEXTURE0 + i); + glBindTexture(GL_TEXTURE_BUFFER, 0); + } + + glDisable(GL_RASTERIZER_DISCARD); + glUseProgram(0); + glActiveTexture(GL_TEXTURE0); + + // revert vao + glBindVertexArray(0); + glDeleteVertexArrays(1, &vao); + + + return true; +} + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glXFBEvaluator.h b/opensubdiv/osd/glXFBEvaluator.h new file mode 100644 index 00000000..c987eba1 --- /dev/null +++ b/opensubdiv/osd/glXFBEvaluator.h @@ -0,0 +1,215 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_GL_XFB_EVALUATOR_H +#define OPENSUBDIV_OSD_GL_XFB_EVALUATOR_H + +#include "../version.h" + +#include "../osd/opengl.h" +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Far { + class StencilTables; +} + +namespace Osd { + +/// \brief GL TextureBuffer stencil tables +/// +/// This class is a GL Texture Buffer representation of Far::StencilTables. +/// +/// GLSLTransformFeedback consumes this table to apply stencils +/// +/// +class GLStencilTablesTBO { +public: + static GLStencilTablesTBO *Create( + Far::StencilTables const *stencilTables, void *deviceContext = NULL) { + (void)deviceContext; // unused + return new GLStencilTablesTBO(stencilTables); + } + + explicit GLStencilTablesTBO(Far::StencilTables const *stencilTables); + ~GLStencilTablesTBO(); + + // interfaces needed for GLSLTransformFeedbackKernel + GLuint GetSizesTexture() const { return _sizes; } + GLuint GetOffsetsTexture() const { return _offsets; } + GLuint GetIndicesTexture() const { return _indices; } + GLuint GetWeightsTexture() const { return _weights; } + int GetNumStencils() const { return _numStencils; } + +private: + GLuint _sizes; + GLuint _offsets; + GLuint _indices; + GLuint _weights; + int _numStencils; +}; + +// --------------------------------------------------------------------------- + +class GLXFBEvaluator { +public: + typedef bool Instantiatable; + static GLXFBEvaluator * Create(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + void * deviceContext = NULL) { + (void)deviceContext; // not used + GLXFBEvaluator *instance = new GLXFBEvaluator(); + if (instance->Compile(srcDesc, dstDesc)) return instance; + delete instance; + return NULL; + } + + /// Constructor. + GLXFBEvaluator(); + + /// Destructor. note that the GL context must be made current. + ~GLXFBEvaluator(); + + /// \brief Generic static stencil function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindVBO() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindVBO() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. The table must have + /// Texture Buffer Object interfaces. + /// + /// @param instance cached compiled instance. Clients are supposed to + /// pre-compile an instance of this class and provide + /// to this function. If it's null the kernel still + /// compute by instantiating on-demand kernel although + /// it may cause a performance problem. + /// + /// @param deviceContext not used in the GLSLTransformFeedback kernel + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + GLXFBEvaluator const *instance, + void * deviceContext = NULL) { + if (instance) { + return instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + } else { + // Create an instance on demand (slow) + (void)deviceContext; // unused + instance = Create(srcDesc, dstDesc); + if (instance) { + bool r = instance->EvalStencils(srcVertexBuffer, srcDesc, + dstVertexBuffer, dstDesc, + stencilTable); + delete instance; + return r; + } + return false; + } + } + + /// Dispatch the GLSL compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + template + bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable) const { + return EvalStencils(srcVertexBuffer->BindVBO(), + srcDesc, + dstVertexBuffer->BindVBO(), + dstDesc, + stencilTable->GetSizesTexture(), + stencilTable->GetOffsetsTexture(), + stencilTable->GetIndicesTexture(), + stencilTable->GetWeightsTexture(), + /* start = */ 0, + /* end = */ stencilTable->GetNumStencils()); + } + + /// Dispatch the GLSL compute kernel on GPU asynchronously. + /// returns false if the kernel hasn't been compiled yet. + bool EvalStencils(GLuint srcBuffer, + VertexBufferDescriptor const &srcDesc, + GLuint dstBuffer, + VertexBufferDescriptor const &dstDesc, + GLuint sizesBuffer, + GLuint offsetsBuffer, + GLuint indicesBuffer, + GLuint weightsBuffer, + int start, + int end) const; + + /// Configure GLSL kernel. A valid GL context must be made current before + /// calling this function. Returns false if it fails to compile the kernel. + bool Compile(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc); + + /// Wait the dispatched kernel finishes. + static void Synchronize(void *kernel); + +private: + GLuint _program; + + GLuint _srcBufferTexture; + + GLuint _uniformSrcBufferTexture; + GLuint _uniformSizesTexture; + GLuint _uniformOffsetsTexture; + GLuint _uniformIndicesTexture; + GLuint _uniformWeightsTexture; + + GLuint _uniformStart; // range + GLuint _uniformEnd; + GLuint _uniformSrcOffset; // src buffer offset (in elements) +}; + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_GL_XFB_EVALUATOR_H diff --git a/opensubdiv/osd/glslComputeContext.cpp b/opensubdiv/osd/glslComputeContext.cpp deleted file mode 100644 index 61d0d3f2..00000000 --- a/opensubdiv/osd/glslComputeContext.cpp +++ /dev/null @@ -1,229 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" - -//#include "../osd/debug.h" -#include "../osd/glslComputeContext.h" -#include "../osd/opengl.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// ----------------------------------------------------------------------------- - -template GLuint -createGLSLBuffer(std::vector const & src) { - - GLuint devicePtr=0; - - glGenBuffers(1, &devicePtr); - -#if defined(GL_EXT_direct_state_access) - if (glNamedBufferDataEXT) { - glNamedBufferDataEXT(devicePtr, src.size()*sizeof(T), &src.at(0), GL_STATIC_DRAW); - } else { -#else - { -#endif - GLint prev = 0; - glGetIntegerv(GL_SHADER_STORAGE_BUFFER_BINDING, &prev); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, devicePtr); - glBufferData(GL_SHADER_STORAGE_BUFFER, src.size()*sizeof(T), &src.at(0), GL_STATIC_DRAW); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, prev); - } - - //OSD_DEBUG_CHECK_GL_ERROR("createGLSLBuffer size %ld", src.size()); - return devicePtr; -} - -// ----------------------------------------------------------------------------- - -class GLSLComputeContext::GLSLStencilTables { - -public: - - GLSLStencilTables(Far::StencilTables const & stencilTables) { - _numStencils = stencilTables.GetNumStencils(); - if (_numStencils > 0) { - _sizes = createGLSLBuffer(stencilTables.GetSizes()); - _offsets = createGLSLBuffer(stencilTables.GetOffsets()); - _indices = createGLSLBuffer(stencilTables.GetControlIndices()); - _weights = createGLSLBuffer(stencilTables.GetWeights()); - } else { - _sizes = _offsets = _indices = _weights = 0; - } - } - - ~GLSLStencilTables() { - if (_sizes) glDeleteBuffers(1, &_sizes); - if (_offsets) glDeleteBuffers(1, &_offsets); - if (_weights) glDeleteBuffers(1, &_weights); - if (_indices) glDeleteBuffers(1, &_indices); - } - - bool IsValid() const { - return _sizes and _offsets and _indices and _weights; - } - - GLuint GetSizes() const { - return _sizes; - } - - GLuint GetOffsets() const { - return _offsets; - } - - GLuint GetIndices() const { - return _indices; - } - - GLuint GetWeights() const { - return _weights; - } - - int GetNumStencils() const { - return _numStencils; - } - - void Bind() const { - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, _sizes); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, _offsets); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, _indices); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, _weights); - } - - static void Unbind() { - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0); - - glUseProgram(0); - } - -private: - - GLuint _sizes, - _offsets, - _indices, - _weights; - int _numStencils; -}; - -// ----------------------------------------------------------------------------- - -GLSLComputeContext::GLSLComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables) : - _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0), - _numStencils(0) { - - if (vertexStencilTables) { - _vertexStencilTables = new GLSLStencilTables(*vertexStencilTables); - _numControlVertices = vertexStencilTables->GetNumControlVertices(); - } - - if (varyingStencilTables) { - _varyingStencilTables = new GLSLStencilTables(*varyingStencilTables); - - if (_numControlVertices) { - assert(_numControlVertices==varyingStencilTables->GetNumControlVertices()); - } else { - _numControlVertices = varyingStencilTables->GetNumControlVertices(); - } - } -} - -GLSLComputeContext::~GLSLComputeContext() { - delete _vertexStencilTables; - delete _varyingStencilTables; -} - -// ---------------------------------------------------------------------------- - -bool -GLSLComputeContext::HasVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->IsValid() : false; -} - -bool -GLSLComputeContext::HasVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; -} - -int -GLSLComputeContext::GetNumStencilsInVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : false; -} - -int -GLSLComputeContext::GetNumStencilsInVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : false; -} - -// ---------------------------------------------------------------------------- - -void -GLSLComputeContext::BindVertexStencilTables() const { - if (_vertexStencilTables) { - _vertexStencilTables->Bind(); - } -} - -void -GLSLComputeContext::BindVaryingStencilTables() const { - if (_varyingStencilTables) { - _varyingStencilTables->Bind(); - } -} - -void -GLSLComputeContext::UnbindStencilTables() const { - GLSLStencilTables::Unbind(); -} - - -// ----------------------------------------------------------------------------- - -GLSLComputeContext * -GLSLComputeContext::Create(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void * /*deviceContext*/) { - - GLSLComputeContext *result = - new GLSLComputeContext(vertexStencilTables, varyingStencilTables); - - return result; -} - -// ----------------------------------------------------------------------------- - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glslComputeContext.h b/opensubdiv/osd/glslComputeContext.h deleted file mode 100644 index 7f21869d..00000000 --- a/opensubdiv/osd/glslComputeContext.h +++ /dev/null @@ -1,126 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_GLSL_COMPUTE_CONTEXT_H -#define OSD_GLSL_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include - -#include "../osd/nonCopyable.h" -#include "../osd/opengl.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far{ class StencilTables; } - -namespace Osd { - -/// -/// \brief GLSL-Compute Refine Context -/// -/// The GLSL-Compute implementation of the Refine module contextual functionality. -/// -/// Contexts interface the serialized topological data pertaining to the -/// geometric primitives with the capabilities of the selected discrete -/// compute device. -/// -class GLSLComputeContext { - -public: - /// Creates an GLSLComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - /// @param deviceContext (not used) - /// - static GLSLComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void *deviceContext = NULL); - - /// Destructor - virtual ~GLSLComputeContext(); - - /// Returns true if the Context has a 'vertex' interpolation stencil table - bool HasVertexStencilTables() const; - - /// Returns true if the Context has a 'varying' interpolation stencil table - bool HasVaryingStencilTables() const; - - /// Returns the number of control vertices - int GetNumControlVertices() const { - return _numControlVertices; - } - - /// Returns the number of stencils in vertex stencil table - int GetNumStencilsInVertexStencilTables() const; - - /// Returns the number of stencils in varying stencil table - int GetNumStencilsInVaryingStencilTables() const; - - /// Returns the GL buffer containing vertex-stencil stencil sizes - GLuint GetVertexStencilTablesSizes() const; - - /// Returns the GL buffer containing vertex-stencil stencil offsets - GLuint GetVertexStencilTablesOffsets() const; - - /// Binds GL buffers containing stencils for 'vertex' interpolation - void BindVertexStencilTables() const; - - /// Binds GL buffers containing stencils for 'varying' interpolation - void BindVaryingStencilTables() const; - - /// Unbinds GL stencil buffers - void UnbindStencilTables() const; - -protected: - explicit GLSLComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables); - -private: - class GLSLStencilTables; - - GLSLStencilTables * _vertexStencilTables, - * _varyingStencilTables; - - int _numControlVertices; - int _numStencils; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_GLSL_COMPUTE_CONTEXT_H diff --git a/opensubdiv/osd/glslComputeController.cpp b/opensubdiv/osd/glslComputeController.cpp deleted file mode 100644 index 36a44a44..00000000 --- a/opensubdiv/osd/glslComputeController.cpp +++ /dev/null @@ -1,287 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/glslComputeController.h" -#include "../osd/vertexDescriptor.h" -#include "../osd/opengl.h" -#include "../far/error.h" - -#include -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -static const char *shaderSource = -#include "../osd/glslComputeKernel.gen.h" -; - -// ---------------------------------------------------------------------------- - -class GLSLComputeController::KernelBundle : - NonCopyable { - -public: - - KernelBundle() : - _program(0), - _uniformSizes(0), - _uniformOffsets(0), - _uniformIndices(0), - _uniformWeights(0), - _uniformStart(0), - _uniformEnd(0), - _uniformOffset(0), - _uniformNumCVs(0), - _workGroupSize(64) { } - - ~KernelBundle() { - if (_program) { - glDeleteProgram(_program); - } - } - - void UseProgram(int primvarOffset) const { - glUseProgram(_program); - glUniform1i(_uniformOffset, primvarOffset); - - //OSD_DEBUG_CHECK_GL_ERROR("UseProgram"); - } - - bool Compile(VertexBufferDescriptor const & desc) { - - _desc = VertexBufferDescriptor(0, desc.length, desc.stride); - - if (_program) { - glDeleteProgram(_program); - _program=0; - } - _program = glCreateProgram(); - - GLuint shader = glCreateShader(GL_COMPUTE_SHADER); - - std::ostringstream defines; - defines << "#define OFFSET " << _desc.offset << "\n" - << "#define LENGTH " << _desc.length << "\n" - << "#define STRIDE " << _desc.stride << "\n" - << "#define WORK_GROUP_SIZE " << _workGroupSize << "\n"; - std::string defineStr = defines.str(); - - const char *shaderSources[3] = {"#version 430\n", 0, 0}; - shaderSources[1] = defineStr.c_str(); - shaderSources[2] = shaderSource; - glShaderSource(shader, 3, shaderSources, NULL); - glCompileShader(shader); - glAttachShader(_program, shader); - - GLint linked = 0; - glLinkProgram(_program); - glGetProgramiv(_program, GL_LINK_STATUS, &linked); - - if (linked == GL_FALSE) { - char buffer[1024]; - glGetShaderInfoLog(shader, 1024, NULL, buffer); - Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - - glGetProgramInfoLog(_program, 1024, NULL, buffer); - Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - - glDeleteProgram(_program); - _program = 0; - return false; - } - - glDeleteShader(shader); - - // set uniform locations for compute kernels - _uniformSizes = glGetUniformLocation(_program, "sterncilSizes"); - _uniformOffsets = glGetUniformLocation(_program, "sterncilOffsets"); - _uniformIndices = glGetUniformLocation(_program, "sterncilIndices"); - _uniformWeights = glGetUniformLocation(_program, "sterncilIWeights"); - - _uniformStart = glGetUniformLocation(_program, "batchStart"); - _uniformEnd = glGetUniformLocation(_program, "batchEnd"); - - _uniformOffset = glGetUniformLocation(_program, "primvarOffset"); - _uniformNumCVs = glGetUniformLocation(_program, "numCVs"); - - //OSD_DEBUG_CHECK_GL_ERROR("Compile"); - - return true; - } - - void ApplyStencilTableKernel(int offset, int numCVs, - int start, int end) const { - - dispatchCompute(offset, numCVs, start, end); - } - - struct Match { - - Match(VertexBufferDescriptor const & d) : desc(d) { } - - bool operator() (KernelBundle const * kernel) { - return (desc.length==kernel->_desc.length and - desc.stride==kernel->_desc.stride); - } - - VertexBufferDescriptor desc; - }; - -protected: - - void dispatchCompute(int offset, int numCVs, int start, int end) const { - - int count = end - start; - if (count<=0) { - return; - } - - - glUniform1i(_uniformStart, start); - glUniform1i(_uniformEnd, end); - - glUniform1i(_uniformOffset, offset); - glUniform1i(_uniformNumCVs, numCVs); - - glDispatchCompute((count + _workGroupSize - 1) / _workGroupSize, 1, 1); - - // sync for later reading. - // XXX: in theory, just SHADER_STORAGE_BARRIER is needed here. However - // we found a problem (issue #295) with nvidia driver 331.49 / Quadro4000 - // resulting in invalid vertices. - // Apparently adding TEXTURE_FETCH_BARRIER after a kernel fixes it. - // The workaroud is commented out, since it looks fixed as of driver 334.xx. - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); - - //OSD_DEBUG_CHECK_GL_ERROR("dispatchCompute"); - } - -private: - - GLuint _program; - - GLuint _uniformSizes, // uniform paramaeters for kernels - _uniformOffsets, - _uniformIndices, - _uniformWeights, - - _uniformStart, // batch - _uniformEnd, - - _uniformOffset, // GL primvar buffer descriptor - _uniformNumCVs; // number of const control vertices padded at - // the beginning of the buffer - - VertexBufferDescriptor _desc; // primvar buffer descriptor - - int _workGroupSize; -}; - -// ---------------------------------------------------------------------------- - -void -GLSLComputeController::ApplyStencilTableKernel( - ComputeContext const *context, int numStencils) const { - - assert(context); - - // Note: GLSLComputeContext has a state, knowing whether vertex or - // varying stencil tables are being bound. GetNumStencils() reflects it. - // This structure will likely be revisited. - - int start = 0; - int end = numStencils; - - _currentBindState.kernelBundle->ApplyStencilTableKernel( - _currentBindState.desc.offset, context->GetNumControlVertices(), - start, end); -} - -// ---------------------------------------------------------------------------- - -GLSLComputeController::GLSLComputeController() { } - -GLSLComputeController::~GLSLComputeController() { - for (KernelRegistry::iterator it = _kernelRegistry.begin(); - it != _kernelRegistry.end(); ++it) { - delete *it; - } -} - -// ---------------------------------------------------------------------------- - -void -GLSLComputeController::Synchronize() { - - glFinish(); -} - -// ---------------------------------------------------------------------------- -GLSLComputeController::KernelBundle const * -GLSLComputeController::getKernel(VertexBufferDescriptor const &desc) { - - KernelRegistry::iterator it = - std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(), - KernelBundle::Match(desc)); - - if (it != _kernelRegistry.end()) { - return *it; - } else { - KernelBundle * kernelBundle = new KernelBundle(); - kernelBundle->Compile(desc); - _kernelRegistry.push_back(kernelBundle); - return kernelBundle; - } -} - -void -GLSLComputeController::bindBufferAndProgram() { - - if (_currentBindState.buffer) - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, _currentBindState.buffer); - - _currentBindState.kernelBundle->UseProgram(_currentBindState.desc.offset); - - glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); -} - -void -GLSLComputeController::unbindBufferAndProgram() { - - glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0); - glUseProgram(0); -} - -// ---------------------------------------------------------------------------- - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glslComputeController.h b/opensubdiv/osd/glslComputeController.h deleted file mode 100644 index 8c87b37d..00000000 --- a/opensubdiv/osd/glslComputeController.h +++ /dev/null @@ -1,210 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_GLSL_COMPUTE_CONTROLLER_H -#define OSD_GLSL_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/glslComputeContext.h" -#include "../osd/vertexDescriptor.h" - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching GLSL Compute subdivision kernels. -/// -/// GLSLComputeController is a compute controller class to launch -/// GLSLCompute transfrom feedback subdivision kernels. It requires -/// GLVertexBufferInterface as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class GLSLComputeController { -public: - typedef GLSLComputeContext ComputeContext; - - /// Constructor. - GLSLComputeController(); - - /// Destructor. - ~GLSLComputeController(); - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The OsdGLSLContext to apply refinement operations to - /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( GLSLComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - if (vertexBuffer) { - bind(vertexBuffer, vertexDesc); - - context->BindVertexStencilTables(); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVertexStencilTables()); - } - - if (varyingBuffer) { - bind(varyingBuffer, varyingDesc); - - context->BindVaryingStencilTables(); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVaryingStencilTables()); - } - - context->UnbindStencilTables(); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The OsdGLSLContext to apply refinement operations to - /// - /// @param batches Vector of batches of vertices organized by operative - /// kernel - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(GLSLComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context, - int numStencils) const; - - template - void bind( BUFFER * buffer, - VertexBufferDescriptor const * desc ) { - - assert(buffer); - - // if the vertex buffer descriptor is specified, use it - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (desc) { - _currentBindState.desc = *desc; - } else { - int numElements = buffer ? buffer->GetNumElements() : 0; - _currentBindState.desc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.buffer = buffer->BindVBO(); - - _currentBindState.kernelBundle = getKernel(_currentBindState.desc); - - bindBufferAndProgram(); - } - - - // Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - unbindBufferAndProgram(); - } - - // binds the primvar data buffer and compute program - void bindBufferAndProgram(); - - // unbinds the primvar data buffer and compute program - void unbindBufferAndProgram(); - - -private: - - class KernelBundle; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : buffer(0), kernelBundle(0) { } - - void Reset() { - buffer = 0; - desc.Reset(); - kernelBundle = 0; - } - - GLuint buffer; - - VertexBufferDescriptor desc; - - KernelBundle const * kernelBundle; - }; - - BindState _currentBindState; - - typedef std::vector KernelRegistry; - - KernelBundle const * getKernel(VertexBufferDescriptor const &desc); - - KernelRegistry _kernelRegistry; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_GLSL_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/glslComputeKernel.glsl b/opensubdiv/osd/glslComputeKernel.glsl index 1cdd5a08..8cb6e89a 100644 --- a/opensubdiv/osd/glslComputeKernel.glsl +++ b/opensubdiv/osd/glslComputeKernel.glsl @@ -26,15 +26,15 @@ uniform int batchStart = 0; uniform int batchEnd = 0; +uniform int srcOffset = 0; +uniform int dstOffset = 0; -uniform int primvarOffset = 0; -uniform int numCVs = 0; - -layout(binding=0) buffer vertex_buffer { float vertexBuffer[]; }; -layout(binding=1) buffer sterncilSizes { unsigned char _sizes[]; }; -layout(binding=2) buffer sterncilOffsets { int _offsets[]; }; -layout(binding=3) buffer sterncilIndices { int _indices[]; }; -layout(binding=4) buffer sterncilWeights { float _weights[]; }; +layout(binding=0) buffer src_buffer { float srcVertexBuffer[]; }; +layout(binding=1) buffer dst_buffer { float dstVertexBuffer[]; }; +layout(binding=2) buffer stencilSizes { unsigned char _sizes[]; }; +layout(binding=3) buffer stencilOffsets { int _offsets[]; }; +layout(binding=4) buffer stencilIndices { int _indices[]; }; +layout(binding=5) buffer stencilWeights { float _weights[]; }; layout(local_size_x=WORK_GROUP_SIZE, local_size_y=1, local_size_z=1) in; @@ -52,17 +52,17 @@ void clear(out Vertex v) { Vertex readVertex(int index) { Vertex v; - int vertexIndex = primvarOffset + index * STRIDE; + int vertexIndex = srcOffset + index * SRC_STRIDE; for (int i = 0; i < LENGTH; ++i) { - v.vertexData[i] = vertexBuffer[vertexIndex + i]; + v.vertexData[i] = srcVertexBuffer[vertexIndex + i]; } return v; } void writeVertex(int index, Vertex v) { - int vertexIndex = primvarOffset + index * STRIDE; + int vertexIndex = dstOffset + index * DST_STRIDE; for (int i = 0; i < LENGTH; ++i) { - vertexBuffer[vertexIndex + i] = v.vertexData[i]; + dstVertexBuffer[vertexIndex + i] = v.vertexData[i]; } } @@ -91,9 +91,7 @@ void main() { addWithWeight(dst, readVertex( _indices[offset+i] ), _weights[offset+i]); } - // the vertex buffer contains our control vertices at the beginning: don't - // stomp on those ! - writeVertex(numCVs+current, dst); + writeVertex(current, dst); } //------------------------------------------------------------------------------ diff --git a/opensubdiv/osd/glslPatchBSpline.glsl b/opensubdiv/osd/glslPatchBSpline.glsl index d53f4f51..ccba7563 100644 --- a/opensubdiv/osd/glslPatchBSpline.glsl +++ b/opensubdiv/osd/glslPatchBSpline.glsl @@ -82,36 +82,9 @@ out block { OSD_USER_VARYING_DECLARE } outpt[]; -#define ID gl_InvocationID +patch out vec4 tessOuterLo, tessOuterHi; -void -reflectBoundaryEdges(inout vec3 cpt[16], int patchParam) -{ - if (((patchParam >> 4) & 1) != 0) { - cpt[0] = 2*cpt[4] - cpt[8]; - cpt[1] = 2*cpt[5] - cpt[9]; - cpt[2] = 2*cpt[6] - cpt[10]; - cpt[3] = 2*cpt[7] - cpt[11]; - } - if (((patchParam >> 4) & 2) != 0) { - cpt[3] = 2*cpt[2] - cpt[1]; - cpt[7] = 2*cpt[6] - cpt[5]; - cpt[11] = 2*cpt[10] - cpt[9]; - cpt[15] = 2*cpt[14] - cpt[13]; - } - if (((patchParam >> 4) & 4) != 0) { - cpt[12] = 2*cpt[8] - cpt[4]; - cpt[13] = 2*cpt[9] - cpt[5]; - cpt[14] = 2*cpt[10] - cpt[6]; - cpt[15] = 2*cpt[11] - cpt[7]; - } - if (((patchParam >> 4) & 8) != 0) { - cpt[0] = 2*cpt[1] - cpt[2]; - cpt[4] = 2*cpt[5] - cpt[6]; - cpt[8] = 2*cpt[9] - cpt[10]; - cpt[12] = 2*cpt[13] - cpt[14]; - } -} +#define ID gl_InvocationID // compute single-crease patch matrix mat4 @@ -143,9 +116,9 @@ void main() position[i] = inpt[i].v.position.xyz; } - int patchParam = GetPatchParam(); + ivec3 patchParam = OsdGetPatchParam(OsdGetPatchIndex(gl_PrimitiveID)); - reflectBoundaryEdges(position, patchParam); + OsdComputeBSplineBoundaryPoints(position, patchParam); vec3 H[4]; for (int l=0; l<4; ++l) { @@ -156,7 +129,7 @@ void main() } #if defined OSD_PATCH_ENABLE_SINGLE_CREASE - float sharpness = GetSharpness(); + float sharpness = OsdGetPatchSharpness(patchParam); if (sharpness > 0) { float Sf = floor(sharpness); float Sc = ceil(sharpness); @@ -200,14 +173,7 @@ void main() OSD_USER_VARYING_PER_CONTROL_POINT(ID, ID); - int patchLevel = GetPatchLevel(); - - // +0.5 to avoid interpolation error of integer value - outpt[ID].v.patchCoord = vec4(0, 0, - patchLevel+0.5, - GetPrimitiveID()+0.5); - - OSD_COMPUTE_PTEX_COORD_TESSCONTROL_SHADER; + outpt[ID].v.patchCoord = OsdGetPatchCoord(patchParam); #if defined OSD_ENABLE_SCREENSPACE_TESSELLATION // Wait for all basis conversion to be finished @@ -216,26 +182,27 @@ void main() if (ID == 0) { OSD_PATCH_CULL(OSD_PATCH_INPUT_SIZE); - vec4 outerLevel = vec4(0); - vec2 innerLevel = vec2(0); #if defined OSD_ENABLE_SCREENSPACE_TESSELLATION // Gather bezier control points to compute limit surface tess levels - vec3 cpBezier[16]; for (int i=0; i<16; ++i) { - cpBezier[i] = outpt[i].v.position.xyz; + position[i] = outpt[i].v.position.xyz; } - GetTransitionTessLevels(cpBezier, patchParam, outerLevel, innerLevel); -#else - GetTransitionTessLevels(position, patchParam, outerLevel, innerLevel); #endif - gl_TessLevelOuter[0] = outerLevel[0]; - gl_TessLevelOuter[1] = outerLevel[1]; - gl_TessLevelOuter[2] = outerLevel[2]; - gl_TessLevelOuter[3] = outerLevel[3]; + vec4 tessLevelOuter = vec4(0); + vec2 tessLevelInner = vec2(0); - gl_TessLevelInner[0] = innerLevel[0]; - gl_TessLevelInner[1] = innerLevel[1]; + OsdGetTessLevels(position, patchParam, + tessLevelOuter, tessLevelInner, + tessOuterLo, tessOuterHi); + + gl_TessLevelOuter[0] = tessLevelOuter[0]; + gl_TessLevelOuter[1] = tessLevelOuter[1]; + gl_TessLevelOuter[2] = tessLevelOuter[2]; + gl_TessLevelOuter[3] = tessLevelOuter[3]; + + gl_TessLevelInner[0] = tessLevelInner[0]; + gl_TessLevelInner[1] = tessLevelInner[1]; } } @@ -246,19 +213,13 @@ void main() //---------------------------------------------------------- #ifdef OSD_PATCH_TESS_EVAL_BSPLINE_SHADER -#ifdef OSD_TRANSITION_TRIANGLE_SUBPATCH - layout(triangles) in; -#else - layout(quads) in; -#endif +layout(quads) in; -/* XXXdyu-patch-drawing support for frational spacing #if defined OSD_FRACTIONAL_ODD_SPACING layout(fractional_odd_spacing) in; #elif defined OSD_FRACTIONAL_EVEN_SPACING layout(fractional_even_spacing) in; #endif -*/ in block { ControlVertex v; @@ -278,9 +239,13 @@ out block { OSD_USER_VARYING_DECLARE } outpt; +patch in vec4 tessOuterLo, tessOuterHi; + void main() { - vec2 UV = GetTransitionParameterization(); + vec2 UV = OsdGetTessParameterization(gl_TessCoord.xy, + tessOuterLo, + tessOuterHi); #ifdef OSD_COMPUTE_NORMAL_DERIVATIVES float B[4], D[4], C[4]; @@ -372,7 +337,7 @@ void main() dUV += D[k] * DUCP[k]; } - int level = int(inpt[0].v.ptexInfo.z); + int level = inpt[0].v.patchCoord.z; Tangent *= 3 * level; BiTangent *= 3 * level; dUU *= 6 * level; @@ -407,7 +372,7 @@ void main() Tangent += B[k] * DUCP[k]; BiTangent += D[k] * BUCP[k]; } - int level = int(inpt[0].v.ptexInfo.z); + int level = inpt[0].v.patchCoord.z; Tangent *= 3 * level; BiTangent *= 3 * level; @@ -422,11 +387,8 @@ void main() OSD_USER_VARYING_PER_EVAL_POINT(UV, 5, 6, 9, 10); - outpt.v.patchCoord = inpt[0].v.patchCoord; - - outpt.v.patchCoord.xy = vec2(UV.x, UV.y); - - OSD_COMPUTE_PTEX_COORD_TESSEVAL_SHADER; + outpt.v.tessCoord = UV; + outpt.v.patchCoord = OsdInterpolatePatchCoord(UV, inpt[0].v.patchCoord); OSD_DISPLACEMENT_CALLBACK; diff --git a/opensubdiv/osd/glslPatchCommon.glsl b/opensubdiv/osd/glslPatchCommon.glsl index b4469b7c..46adf844 100644 --- a/opensubdiv/osd/glslPatchCommon.glsl +++ b/opensubdiv/osd/glslPatchCommon.glsl @@ -71,17 +71,11 @@ // mix(input[c].var, input[d].var, UV.x), UV.y) #endif -#ifndef OSD_TRANSITION_ROTATE -#define OSD_TRANSITION_ROTATE 0 -#endif +// XXXdyu-patch-drawing support for fractional spacing +#undef OSD_FRACTIONAL_ODD_SPACING +#undef OSD_FRACTIONAL_EVEN_SPACING -#if defined OSD_PATCH_BOUNDARY - #define OSD_PATCH_INPUT_SIZE 12 -#elif defined OSD_PATCH_CORNER - #define OSD_PATCH_INPUT_SIZE 9 -#else - #define OSD_PATCH_INPUT_SIZE 16 -#endif +#define OSD_PATCH_INPUT_SIZE 16 #define M_PI 3.14159265359f @@ -91,8 +85,7 @@ struct ControlVertex { vec4 position; - centroid vec4 patchCoord; // u, v, level, faceID - ivec4 ptexInfo; // U offset, V offset, 2^ptexlevel', rotation + ivec4 patchCoord; // U offset, V offset, faceLevel, faceId #ifdef OSD_ENABLE_PATCH_CULL ivec3 clipFlag; #endif @@ -101,10 +94,10 @@ struct ControlVertex { struct OutputVertex { vec4 position; vec3 normal; - centroid vec4 patchCoord; // u, v, level, faceID - centroid vec2 tessCoord; // tesscoord.st vec3 tangent; vec3 bitangent; + centroid vec4 patchCoord; // u, v, faceLevel, faceId + centroid vec2 tessCoord; // tesscoord.st #if defined OSD_COMPUTE_NORMAL_DERIVATIVES vec3 Nu; vec3 Nv; @@ -120,7 +113,7 @@ struct GregControlVertex { vec3 e1; uint zerothNeighbor; vec3 org; -#if OSD_MAX_VALENCE > 0 +#if defined OSD_MAX_VALENCE && OSD_MAX_VALENCE > 0 vec3 r[OSD_MAX_VALENCE]; #endif }; @@ -131,8 +124,7 @@ struct GregEvalVertex { vec3 Em; vec3 Fp; vec3 Fm; - centroid vec4 patchCoord; - ivec4 ptexInfo; + ivec4 patchCoord; }; // osd shaders need following functions defined @@ -144,74 +136,93 @@ int OsdGregoryQuadOffsetBase(); int OsdPrimitiveIdBase(); int OsdBaseVertex(); -float GetTessLevel(int patchLevel) -{ -#ifdef OSD_ENABLE_SCREENSPACE_TESSELLATION - return OsdTessLevel(); -#else - return OsdTessLevel() / pow(2, patchLevel-1); -#endif -} - -#ifndef GetPrimitiveID -#define GetPrimitiveID() (gl_PrimitiveID + OsdPrimitiveIdBase()) -#endif - -float GetPostProjectionSphereExtent(vec3 center, float diameter) -{ - vec4 p = OsdProjectionMatrix() * vec4(center, 1.0); - return abs(diameter * OsdProjectionMatrix()[1][1] / p.w); -} - -float TessAdaptive(vec3 p0, vec3 p1) -{ - // Adaptive factor can be any computation that depends only on arg values. - // Project the diameter of the edge's bounding sphere instead of using the - // length of the projected edge itself to avoid problems near silhouettes. - vec3 center = (p0 + p1) / 2.0; - float diameter = distance(p0, p1); - return round(max(1.0, OsdTessLevel() * GetPostProjectionSphereExtent(center, diameter))); -} - #ifndef OSD_DISPLACEMENT_CALLBACK #define OSD_DISPLACEMENT_CALLBACK #endif // ---------------------------------------------------------------------------- -// ptex coordinates +// Patch Parameters // ---------------------------------------------------------------------------- +// +// Each patch has a corresponding patchParam. This is a set of three values +// specifying additional information about the patch: +// +// faceId -- topological face identifier (e.g. Ptex FaceId) +// bitfield -- refinement-level, non-quad, boundary, transition, uv-offset +// sharpness -- crease sharpness for single-crease patches +// +// These are stored in OsdPatchParamBuffer indexed by the value returned +// from OsdGetPatchIndex() which is a function of the current PrimitiveID +// along with an optional client provided offset. +// + uniform isamplerBuffer OsdPatchParamBuffer; -#define GetPatchParam() \ - (texelFetch(OsdPatchParamBuffer, GetPrimitiveID()).y) +int OsdGetPatchIndex(int primitiveId) +{ + return (primitiveId + OsdPrimitiveIdBase()); +} -#define GetPatchLevel() \ - (texelFetch(OsdPatchParamBuffer, GetPrimitiveID()).y & 0xf) +ivec3 OsdGetPatchParam(int patchIndex) +{ + return texelFetch(OsdPatchParamBuffer, patchIndex).xyz; +} -#define GetSharpness() \ - (intBitsToFloat(texelFetch(OsdPatchParamBuffer, GetPrimitiveID()).z)) +int OsdGetPatchFaceId(ivec3 patchParam) +{ + return patchParam.x; +} -#define OSD_COMPUTE_PTEX_COORD_TESSCONTROL_SHADER \ - { \ - ivec2 ptexIndex = texelFetch(OsdPatchParamBuffer, \ - GetPrimitiveID()).xy; \ - int faceID = ptexIndex.x; \ - int lv = 1 << ((ptexIndex.y & 0x7) - ((ptexIndex.y >> 3) & 1)); \ - int u = (ptexIndex.y >> 22) & 0x3ff; \ - int v = (ptexIndex.y >> 12) & 0x3ff; \ - outpt[ID].v.patchCoord.w = faceID+0.5; \ - outpt[ID].v.ptexInfo = ivec4(u, v, lv, 0); \ - } +int OsdGetPatchFaceLevel(ivec3 patchParam) +{ + return (1 << ((patchParam.y & 0x7) - ((patchParam.y >> 3) & 1))); +} -#define OSD_COMPUTE_PTEX_COORD_TESSEVAL_SHADER \ - { \ - vec2 uv = outpt.v.patchCoord.xy; \ - ivec2 p = inpt[0].v.ptexInfo.xy; \ - int lv = inpt[0].v.ptexInfo.z; \ - outpt.v.tessCoord.xy = uv; \ - outpt.v.patchCoord.xy = (uv * vec2(1.0)/lv) + vec2(p.x, p.y)/lv;\ - } +int OsdGetPatchRefinementLevel(ivec3 patchParam) +{ + return (patchParam.y & 0x7); +} + +int OsdGetPatchBoundaryMask(ivec3 patchParam) +{ + return ((patchParam.y >> 4) & 0xf); +} + +int OsdGetPatchTransitionMask(ivec3 patchParam) +{ + return ((patchParam.y >> 8) & 0xf); +} + +ivec2 OsdGetPatchFaceUV(ivec3 patchParam) +{ + int u = (patchParam.y >> 22) & 0x3ff; + int v = (patchParam.y >> 12) & 0x3ff; + return ivec2(u,v); +} + +float OsdGetPatchSharpness(ivec3 patchParam) +{ + return intBitsToFloat(patchParam.z); +} + +ivec4 OsdGetPatchCoord(ivec3 patchParam) +{ + int faceId = OsdGetPatchFaceId(patchParam); + int faceLevel = OsdGetPatchFaceLevel(patchParam); + ivec2 faceUV = OsdGetPatchFaceUV(patchParam); + return ivec4(faceUV.x, faceUV.y, faceLevel, faceId); +} + +vec4 OsdInterpolatePatchCoord(vec2 localUV, ivec4 perPrimPatchCoord) +{ + int faceId = perPrimPatchCoord.w; + int faceLevel = perPrimPatchCoord.z; + vec2 faceUV = vec2(perPrimPatchCoord.x, perPrimPatchCoord.y); + vec2 uv = localUV/faceLevel + faceUV/faceLevel; + // add 0.5 to integer values for more robust interpolation + return vec4(uv.x, uv.y, faceLevel+0.5f, faceId+0.5f); +} // ---------------------------------------------------------------------------- // face varyings @@ -229,7 +240,7 @@ uniform samplerBuffer OsdFVarDataBuffer; #define OSD_COMPUTE_FACE_VARYING_1(result, fvarOffset, tessCoord) \ { \ float v[4]; \ - int primOffset = GetPrimitiveID() * 4; \ + int primOffset = OsdGetPatchIndex(gl_PrimitiveID) * 4; \ for (int i = 0; i < 4; ++i) { \ int index = (primOffset+i)*OSD_FVAR_WIDTH + fvarOffset; \ v[i] = texelFetch(OsdFVarDataBuffer, index).s \ @@ -242,7 +253,7 @@ uniform samplerBuffer OsdFVarDataBuffer; #define OSD_COMPUTE_FACE_VARYING_2(result, fvarOffset, tessCoord) \ { \ vec2 v[4]; \ - int primOffset = GetPrimitiveID() * 4; \ + int primOffset = OsdGetPatchIndex(gl_PrimitiveID) * 4; \ for (int i = 0; i < 4; ++i) { \ int index = (primOffset+i)*OSD_FVAR_WIDTH + fvarOffset; \ v[i] = vec2(texelFetch(OsdFVarDataBuffer, index).s, \ @@ -256,7 +267,7 @@ uniform samplerBuffer OsdFVarDataBuffer; #define OSD_COMPUTE_FACE_VARYING_3(result, fvarOffset, tessCoord) \ { \ vec3 v[4]; \ - int primOffset = GetPrimitiveID() * 4; \ + int primOffset = OsdGetPatchIndex(gl_PrimitiveID) * 4; \ for (int i = 0; i < 4; ++i) { \ int index = (primOffset+i)*OSD_FVAR_WIDTH + fvarOffset; \ v[i] = vec3(texelFetch(OsdFVarDataBuffer, index).s, \ @@ -271,7 +282,7 @@ uniform samplerBuffer OsdFVarDataBuffer; #define OSD_COMPUTE_FACE_VARYING_4(result, fvarOffset, tessCoord) \ { \ vec4 v[4]; \ - int primOffset = GetPrimitiveID() * 4; \ + int primOffset = OsdGetPatchIndex(gl_PrimitiveID) * 4; \ for (int i = 0; i < 4; ++i) { \ int index = (primOffset+i)*OSD_FVAR_WIDTH + fvarOffset; \ v[i] = vec4(texelFetch(OsdFVarDataBuffer, index).s, \ @@ -289,14 +300,14 @@ uniform samplerBuffer OsdFVarDataBuffer; #define OSD_COMPUTE_FACE_VARYING_TRI_1(result, fvarOffset, triVert) \ { \ - int primOffset = GetPrimitiveID() * 3; \ + int primOffset = OsdGetPatchIndex(gl_PrimitiveID) * 3; \ int index = (primOffset+triVert)*OSD_FVAR_WIDTH + fvarOffset; \ result = texelFetch(OsdFVarDataBuffer, index).s; \ } #define OSD_COMPUTE_FACE_VARYING_TRI_2(result, fvarOffset, triVert) \ { \ - int primOffset = GetPrimitiveID() * 3; \ + int primOffset = OsdGetPatchIndex(gl_PrimitiveID) * 3; \ int index = (primOffset+triVert)*OSD_FVAR_WIDTH + fvarOffset; \ result = vec2(texelFetch(OsdFVarDataBuffer, index).s, \ texelFetch(OsdFVarDataBuffer, index + 1).s); \ @@ -304,7 +315,7 @@ uniform samplerBuffer OsdFVarDataBuffer; #define OSD_COMPUTE_FACE_VARYING_TRI_3(result, fvarOffset, triVert) \ { \ - int primOffset = GetPrimitiveID() * 3; \ + int primOffset = OsdGetPatchIndex(gl_PrimitiveID) * 3; \ int index = (primOffset+triVert)*OSD_FVAR_WIDTH + fvarOffset; \ result = vec3(texelFetch(OsdFVarDataBuffer, index).s, \ texelFetch(OsdFVarDataBuffer, index + 1).s, \ @@ -313,7 +324,7 @@ uniform samplerBuffer OsdFVarDataBuffer; #define OSD_COMPUTE_FACE_VARYING_TRI_4(result, fvarOffset, triVert) \ { \ - int primOffset = GetPrimitiveID() * 3; \ + int primOffset = OsdGetPatchIndex(gl_PrimitiveID) * 3; \ int index = (primOffset+triVert)*OSD_FVAR_WIDTH + fvarOffset; \ result = vec4(texelFetch(OsdFVarDataBuffer, index).s, \ texelFetch(OsdFVarDataBuffer, index + 1).s, \ @@ -409,7 +420,7 @@ Univar4x4(in float u, out float B[4], out float D[4], out float C[4]) // ---------------------------------------------------------------------------- vec3 -EvalBezier(vec3 cp[16], vec2 uv) +OsdEvalBezier(vec3 cp[16], vec2 uv) { vec3 BUCP[4] = vec3[4](vec3(0,0,0), vec3(0,0,0), vec3(0,0,0), vec3(0,0,0)); @@ -434,3 +445,304 @@ EvalBezier(vec3 cp[16], vec2 uv) } // ---------------------------------------------------------------------------- +// Boundary Interpolation +// ---------------------------------------------------------------------------- + +void +OsdComputeBSplineBoundaryPoints(inout vec3 cpt[16], ivec3 patchParam) +{ + int boundaryMask = OsdGetPatchBoundaryMask(patchParam); + + if ((boundaryMask & 1) != 0) { + cpt[0] = 2*cpt[4] - cpt[8]; + cpt[1] = 2*cpt[5] - cpt[9]; + cpt[2] = 2*cpt[6] - cpt[10]; + cpt[3] = 2*cpt[7] - cpt[11]; + } + if ((boundaryMask & 2) != 0) { + cpt[3] = 2*cpt[2] - cpt[1]; + cpt[7] = 2*cpt[6] - cpt[5]; + cpt[11] = 2*cpt[10] - cpt[9]; + cpt[15] = 2*cpt[14] - cpt[13]; + } + if ((boundaryMask & 4) != 0) { + cpt[12] = 2*cpt[8] - cpt[4]; + cpt[13] = 2*cpt[9] - cpt[5]; + cpt[14] = 2*cpt[10] - cpt[6]; + cpt[15] = 2*cpt[11] - cpt[7]; + } + if ((boundaryMask & 8) != 0) { + cpt[0] = 2*cpt[1] - cpt[2]; + cpt[4] = 2*cpt[5] - cpt[6]; + cpt[8] = 2*cpt[9] - cpt[10]; + cpt[12] = 2*cpt[13] - cpt[14]; + } +} + +// ---------------------------------------------------------------------------- +// Tessellation +// ---------------------------------------------------------------------------- + +// +// Organization of B-spline and Bezier control points. +// +// Each patch is defined by 16 control points (labeled 0-15). +// +// The patch will be evaluated across the domain from (0,0) at +// the lower-left to (1,1) at the upper-right. When computing +// adaptive tessellation metrics, we consider refined vertex-vertex +// and edge-vertex points along the transition edges of the patch +// (labeled vv* and ev* respectively). +// +// The two segments of each transition edge are labeled Lo and Hi, +// with the Lo segment occuring before the Hi segment along the +// transition edge's domain parameterization. These Lo and Hi segment +// tessellation levels determine how domain evaluation coordinates +// are remapped along transition edges. The Hi segment value will +// be zero for a non-transition edge. +// +// (0,1) (1,1) +// +// vv3 ev23 vv2 +// | Lo3 | Hi3 | +// --O-----------O-----+-----O-----------O-- +// | 12 | 13 14 | 15 | +// | | | | +// | | | | +// Hi0 | | | | Hi2 +// | | | | +// O-----------O-----------O-----------O +// | 8 | 9 10 | 11 | +// | | | | +// ev03 --+ | | +-- ev12 +// | | | | +// | 4 | 5 6 | 7 | +// O-----------O-----------O-----------O +// | | | | +// Lo0 | | | | Lo2 +// | | | | +// | | | | +// | 0 | 1 2 | 3 | +// --O-----------O-----+-----O-----------O-- +// | Lo1 | Hi1 | +// vv0 ev01 vv1 +// +// (0,0) (1,0) +// + +float OsdComputePostProjectionSphereExtent(vec3 center, float diameter) +{ + vec4 p = OsdProjectionMatrix() * vec4(center, 1.0); + return abs(diameter * OsdProjectionMatrix()[1][1] / p.w); +} + +float OsdComputeTessLevel(vec3 p0, vec3 p1) +{ + // Adaptive factor can be any computation that depends only on arg values. + // Project the diameter of the edge's bounding sphere instead of using the + // length of the projected edge itself to avoid problems near silhouettes. + vec3 center = (p0 + p1) / 2.0; + float diameter = distance(p0, p1); + float projLength = OsdComputePostProjectionSphereExtent(center, diameter); + return round(max(1.0, OsdTessLevel() * projLength)); +} + +void +OsdGetTessLevelsUniform(ivec3 patchParam, + inout vec4 tessOuterLo, inout vec4 tessOuterHi) +{ + int refinementLevel = OsdGetPatchRefinementLevel(patchParam); + float tessLevel = OsdTessLevel() / pow(2, refinementLevel-1); + + tessOuterLo = vec4(tessLevel); + tessOuterHi = vec4(0); +} + +void +OsdGetTessLevelsRefinedPoints(vec3 cp[16], ivec3 patchParam, + inout vec4 tessOuterLo, inout vec4 tessOuterHi) +{ + // Each edge of a transition patch is adjacent to one or two patches + // at the next refined level of subdivision. We compute the corresponding + // vertex-vertex and edge-vertex refined points along the edges of the + // patch using Catmull-Clark subdivision stencil weights. + // For simplicity, we let the optimizer discard unused computation. + + vec3 vv0 = (cp[0] + cp[2] + cp[8] + cp[10]) * 0.015625 + + (cp[1] + cp[4] + cp[6] + cp[9]) * 0.09375 + cp[5] * 0.5625; + vec3 ev01 = (cp[1] + cp[2] + cp[9] + cp[10]) * 0.0625 + + (cp[5] + cp[6]) * 0.375; + + vec3 vv1 = (cp[1] + cp[3] + cp[9] + cp[11]) * 0.015625 + + (cp[2] + cp[5] + cp[7] + cp[10]) * 0.09375 + cp[6] * 0.5625; + vec3 ev12 = (cp[5] + cp[7] + cp[9] + cp[11]) * 0.0625 + + (cp[6] + cp[10]) * 0.375; + + vec3 vv2 = (cp[5] + cp[7] + cp[13] + cp[15]) * 0.015625 + + (cp[6] + cp[9] + cp[11] + cp[14]) * 0.09375 + cp[10] * 0.5625; + vec3 ev23 = (cp[5] + cp[6] + cp[13] + cp[14]) * 0.0625 + + (cp[9] + cp[10]) * 0.375; + + vec3 vv3 = (cp[4] + cp[6] + cp[12] + cp[14]) * 0.015625 + + (cp[5] + cp[8] + cp[10] + cp[13]) * 0.09375 + cp[9] * 0.5625; + vec3 ev03 = (cp[4] + cp[6] + cp[8] + cp[10]) * 0.0625 + + (cp[5] + cp[9]) * 0.375; + + tessOuterLo = vec4(0); + tessOuterHi = vec4(0); + + int transitionMask = OsdGetPatchTransitionMask(patchParam); + + if ((transitionMask & 8) != 0) { + tessOuterLo[0] = OsdComputeTessLevel(vv0, ev03); + tessOuterHi[0] = OsdComputeTessLevel(vv3, ev03); + } else { + tessOuterLo[0] = OsdComputeTessLevel(cp[5], cp[9]); + } + if ((transitionMask & 1) != 0) { + tessOuterLo[1] = OsdComputeTessLevel(vv0, ev01); + tessOuterHi[1] = OsdComputeTessLevel(vv1, ev01); + } else { + tessOuterLo[1] = OsdComputeTessLevel(cp[5], cp[6]); + } + if ((transitionMask & 2) != 0) { + tessOuterLo[2] = OsdComputeTessLevel(vv1, ev12); + tessOuterHi[2] = OsdComputeTessLevel(vv2, ev12); + } else { + tessOuterLo[2] = OsdComputeTessLevel(cp[6], cp[10]); + } + if ((transitionMask & 4) != 0) { + tessOuterLo[3] = OsdComputeTessLevel(vv3, ev23); + tessOuterHi[3] = OsdComputeTessLevel(vv2, ev23); + } else { + tessOuterLo[3] = OsdComputeTessLevel(cp[9], cp[10]); + } +} + +void +OsdGetTessLevelsLimitPoints(vec3 cpBezier[16], ivec3 patchParam, + inout vec4 tessOuterLo, inout vec4 tessOuterHi) +{ + // Each edge of a transition patch is adjacent to one or two patches + // at the next refined level of subdivision. When the patch control + // points have been converted to the Bezier basis, the control points + // at the four corners are on the limit surface (since a Bezier patch + // interpolates its corner control points). We can compute an adaptive + // tessellation level for transition edges on the limit surface by + // evaluating a limit position at the mid point of each transition edge. + + tessOuterLo = vec4(0); + tessOuterHi = vec4(0); + + int transitionMask = OsdGetPatchTransitionMask(patchParam); + + if ((transitionMask & 8) != 0) { + vec3 ev03 = OsdEvalBezier(cpBezier, vec2(0.0, 0.5)); + tessOuterLo[0] = OsdComputeTessLevel(cpBezier[0], ev03); + tessOuterHi[0] = OsdComputeTessLevel(cpBezier[12], ev03); + } else { + tessOuterLo[0] = OsdComputeTessLevel(cpBezier[0], cpBezier[12]); + } + if ((transitionMask & 1) != 0) { + vec3 ev01 = OsdEvalBezier(cpBezier, vec2(0.5, 0.0)); + tessOuterLo[1] = OsdComputeTessLevel(cpBezier[0], ev01); + tessOuterHi[1] = OsdComputeTessLevel(cpBezier[3], ev01); + } else { + tessOuterLo[1] = OsdComputeTessLevel(cpBezier[0], cpBezier[3]); + } + if ((transitionMask & 2) != 0) { + vec3 ev12 = OsdEvalBezier(cpBezier, vec2(1.0, 0.5)); + tessOuterLo[2] = OsdComputeTessLevel(cpBezier[3], ev12); + tessOuterHi[2] = OsdComputeTessLevel(cpBezier[15], ev12); + } else { + tessOuterLo[2] = OsdComputeTessLevel(cpBezier[3], cpBezier[15]); + } + if ((transitionMask & 4) != 0) { + vec3 ev23 = OsdEvalBezier(cpBezier, vec2(0.5, 1.0)); + tessOuterLo[3] = OsdComputeTessLevel(cpBezier[12], ev23); + tessOuterHi[3] = OsdComputeTessLevel(cpBezier[15], ev23); + } else { + tessOuterLo[3] = OsdComputeTessLevel(cpBezier[12], cpBezier[15]); + } +} + +void +OsdGetTessLevels(vec3 cp[16], ivec3 patchParam, + inout vec4 tessLevelOuter, inout vec2 tessLevelInner, + inout vec4 tessOuterLo, inout vec4 tessOuterHi) +{ +#if defined OSD_ENABLE_SCREENSPACE_TESSELLATION + OsdGetTessLevelsLimitPoints(cp, patchParam, tessOuterLo, tessOuterHi); +#elif defined OSD_ENABLE_SCREENSPACE_TESSELLATION_REFINED + OsdGetTessLevelsRefinedPoints(cp, patchParam, tessOuterLo, tessOuterHi); +#else + OsdGetTessLevelsUniform(patchParam, tessOuterLo, tessOuterHi); +#endif + + // Outer levels are the sum of the Lo and Hi segments where the Hi + // segments will have a length of zero for non-transition edges. + tessLevelOuter = tessOuterLo + tessOuterHi; + + // Inner levels are the average the corresponding outer levels. + tessLevelInner[0] = (tessLevelOuter[1] + tessLevelOuter[3]) * 0.5; + tessLevelInner[1] = (tessLevelOuter[0] + tessLevelOuter[2]) * 0.5; +} + +void +OsdGetTessLevels(vec3 cp0, vec3 cp1, vec3 cp2, vec3 cp3, + ivec3 patchParam, + inout vec4 tessLevelOuter, inout vec2 tessLevelInner) +{ + vec4 tessOuterLo = vec4(0); + vec4 tessOuterHi = vec4(0); + +#if defined OSD_ENABLE_SCREENSPACE_TESSELLATION + tessOuterLo[0] = OsdComputeTessLevel(cp0, cp1); + tessOuterLo[1] = OsdComputeTessLevel(cp0, cp3); + tessOuterLo[2] = OsdComputeTessLevel(cp2, cp3); + tessOuterLo[3] = OsdComputeTessLevel(cp1, cp2); + tessOuterHi = vec4(0); +#else + OsdGetTessLevelsUniform(patchParam, tessOuterLo, tessOuterHi); +#endif + + // Outer levels are the sum of the Lo and Hi segments where the Hi + // segments will have a length of zero for non-transition edges. + tessLevelOuter = tessOuterLo + tessOuterHi; + + // Inner levels are the average the corresponding outer levels. + tessLevelInner[0] = (tessLevelOuter[1] + tessLevelOuter[3]) * 0.5; + tessLevelInner[1] = (tessLevelOuter[0] + tessLevelOuter[2]) * 0.5; +} + +float +OsdGetTessTransitionSplit(float t, float n0, float n1) +{ + float ti = round(t * (n0 + n1)); + + if (ti <= n0) { + return 0.5 * (ti / n0); + } else { + return 0.5 * ((ti - n0) / n1) + 0.5; + } +} + +vec2 +OsdGetTessParameterization(vec2 uv, vec4 tessOuterLo, vec4 tessOuterHi) +{ + vec2 UV = uv; + if (UV.x == 0 && tessOuterHi[0] > 0) { + UV.y = OsdGetTessTransitionSplit(UV.y, tessOuterLo[0], tessOuterHi[0]); + } else + if (UV.y == 0 && tessOuterHi[1] > 0) { + UV.x = OsdGetTessTransitionSplit(UV.x, tessOuterLo[1], tessOuterHi[1]); + } else + if (UV.x == 1 && tessOuterHi[2] > 0) { + UV.y = OsdGetTessTransitionSplit(UV.y, tessOuterLo[2], tessOuterHi[2]); + } else + if (UV.y == 1 && tessOuterHi[3] > 0) { + UV.x = OsdGetTessTransitionSplit(UV.x, tessOuterLo[3], tessOuterHi[3]); + } + return UV; +} + diff --git a/opensubdiv/osd/glslPatchGregory.glsl b/opensubdiv/osd/glslPatchGregory.glsl index 3ff72516..cf60aae1 100644 --- a/opensubdiv/osd/glslPatchGregory.glsl +++ b/opensubdiv/osd/glslPatchGregory.glsl @@ -397,37 +397,28 @@ void main() OSD_USER_VARYING_PER_CONTROL_POINT(ID, ID); - int patchLevel = GetPatchLevel(); - outpt[ID].v.patchCoord = vec4(0, 0, - patchLevel+0.5f, - GetPrimitiveID()+0.5f); + ivec3 patchParam = OsdGetPatchParam(OsdGetPatchIndex(gl_PrimitiveID)); - OSD_COMPUTE_PTEX_COORD_TESSCONTROL_SHADER; + outpt[ID].v.patchCoord = OsdGetPatchCoord(patchParam); if (ID == 0) { OSD_PATCH_CULL(4); -#ifdef OSD_ENABLE_SCREENSPACE_TESSELLATION - gl_TessLevelOuter[0] = - TessAdaptive(inpt[0].v.hullPosition.xyz, inpt[1].v.hullPosition.xyz); - gl_TessLevelOuter[1] = - TessAdaptive(inpt[0].v.hullPosition.xyz, inpt[3].v.hullPosition.xyz); - gl_TessLevelOuter[2] = - TessAdaptive(inpt[2].v.hullPosition.xyz, inpt[3].v.hullPosition.xyz); - gl_TessLevelOuter[3] = - TessAdaptive(inpt[1].v.hullPosition.xyz, inpt[2].v.hullPosition.xyz); - gl_TessLevelInner[0] = - max(gl_TessLevelOuter[1], gl_TessLevelOuter[3]); - gl_TessLevelInner[1] = - max(gl_TessLevelOuter[0], gl_TessLevelOuter[2]); -#else - gl_TessLevelInner[0] = GetTessLevel(patchLevel); - gl_TessLevelInner[1] = GetTessLevel(patchLevel); - gl_TessLevelOuter[0] = GetTessLevel(patchLevel); - gl_TessLevelOuter[1] = GetTessLevel(patchLevel); - gl_TessLevelOuter[2] = GetTessLevel(patchLevel); - gl_TessLevelOuter[3] = GetTessLevel(patchLevel); -#endif + vec4 tessLevelOuter = vec4(0); + vec2 tessLevelInner = vec2(0); + + OsdGetTessLevels( + inpt[0].v.hullPosition.xyz, inpt[1].v.hullPosition.xyz, + inpt[2].v.hullPosition.xyz, inpt[3].v.hullPosition.xyz, + patchParam, tessLevelOuter, tessLevelInner); + + gl_TessLevelOuter[0] = tessLevelOuter[0]; + gl_TessLevelOuter[1] = tessLevelOuter[1]; + gl_TessLevelOuter[2] = tessLevelOuter[2]; + gl_TessLevelOuter[3] = tessLevelOuter[3]; + + gl_TessLevelInner[0] = tessLevelInner[0]; + gl_TessLevelInner[1] = tessLevelInner[1]; } } @@ -441,13 +432,11 @@ void main() layout(quads) in; layout(cw) in; -/* XXXdyu-patch-drawing support for frational spacing #if defined OSD_FRACTIONAL_ODD_SPACING layout(fractional_odd_spacing) in; #elif defined OSD_FRACTIONAL_EVEN_SPACING layout(fractional_even_spacing) in; #endif -*/ in block { GregEvalVertex v; @@ -554,7 +543,7 @@ void main() dUV += D[i] * DUCP[i]; } - int level = int(inpt[0].v.ptexInfo.z); + int level = inpt[0].v.patchCoord.z; BiTangent *= 3 * level; Tangent *= 3 * level; dUU *= 6 * level; @@ -609,7 +598,7 @@ void main() Tangent += B[i] * DUCP[i]; BiTangent += D[i] * BUCP[i]; } - int level = int(inpt[0].v.ptexInfo.z); + int level = inpt[0].v.patchCoord.z; BiTangent *= 3 * level; Tangent *= 3 * level; @@ -627,10 +616,9 @@ void main() OSD_USER_VARYING_PER_EVAL_POINT(vec2(u,v), 0, 3, 1, 2); - outpt.v.patchCoord = inpt[0].v.patchCoord; - outpt.v.patchCoord.xy = vec2(v, u); - - OSD_COMPUTE_PTEX_COORD_TESSEVAL_SHADER; + vec2 UV = vec2(v, u); + outpt.v.tessCoord = UV; + outpt.v.patchCoord = OsdInterpolatePatchCoord(UV, inpt[0].v.patchCoord); OSD_DISPLACEMENT_CALLBACK; diff --git a/opensubdiv/osd/glslPatchGregoryBasis.glsl b/opensubdiv/osd/glslPatchGregoryBasis.glsl index 9556c825..bf68540c 100644 --- a/opensubdiv/osd/glslPatchGregoryBasis.glsl +++ b/opensubdiv/osd/glslPatchGregoryBasis.glsl @@ -38,8 +38,7 @@ out block { void main() { outpt.v.position = OsdModelViewMatrix() * position; - outpt.v.patchCoord = vec4(0); - outpt.v.ptexInfo = ivec4(0); + outpt.v.patchCoord = ivec4(0); OSD_PATCH_CULL_COMPUTE_CLIPFLAGS(position); OSD_USER_VARYING_PER_VERTEX(); } @@ -70,42 +69,27 @@ void main() outpt[ID].v = inpt[ID].v; OSD_USER_VARYING_PER_CONTROL_POINT(ID, ID); - int patchLevel = GetPatchLevel(); + ivec3 patchParam = OsdGetPatchParam(OsdGetPatchIndex(gl_PrimitiveID)); - // +0.5 to avoid interpolation error of integer value - outpt[ID].v.patchCoord = vec4(0, 0, - patchLevel+0.5, - GetPrimitiveID()+0.5); - OSD_COMPUTE_PTEX_COORD_TESSCONTROL_SHADER; + outpt[ID].v.patchCoord = OsdGetPatchCoord(patchParam); if (ID == 0) { OSD_PATCH_CULL(OSD_PATCH_INPUT_SIZE); - // XXX: this metric is not consistent. - // we will 1) compute the cage length as before - // or 2) compute limit length for all patches. - #ifdef OSD_ENABLE_SCREENSPACE_TESSELLATION - gl_TessLevelOuter[0] = - TessAdaptive(inpt[0].v.position.xyz, inpt[5].v.position.xyz); - gl_TessLevelOuter[1] = - TessAdaptive(inpt[0].v.position.xyz, inpt[15].v.position.xyz); - gl_TessLevelOuter[2] = - TessAdaptive(inpt[10].v.position.xyz, inpt[15].v.position.xyz); - gl_TessLevelOuter[3] = - TessAdaptive(inpt[5].v.position.xyz, inpt[10].v.position.xyz); + vec4 tessLevelOuter = vec4(0); + vec2 tessLevelInner = vec2(0); - gl_TessLevelInner[0] = - max(gl_TessLevelOuter[1], gl_TessLevelOuter[3]); - gl_TessLevelInner[1] = - max(gl_TessLevelOuter[0], gl_TessLevelOuter[2]); - #else - gl_TessLevelInner[0] = GetTessLevel(patchLevel); - gl_TessLevelInner[1] = GetTessLevel(patchLevel); - gl_TessLevelOuter[0] = GetTessLevel(patchLevel); - gl_TessLevelOuter[1] = GetTessLevel(patchLevel); - gl_TessLevelOuter[2] = GetTessLevel(patchLevel); - gl_TessLevelOuter[3] = GetTessLevel(patchLevel); - #endif + OsdGetTessLevels(inpt[0].v.position.xyz, inpt[5].v.position.xyz, + inpt[10].v.position.xyz, inpt[15].v.position.xyz, + patchParam, tessLevelOuter, tessLevelInner); + + gl_TessLevelOuter[0] = tessLevelOuter[0]; + gl_TessLevelOuter[1] = tessLevelOuter[1]; + gl_TessLevelOuter[2] = tessLevelOuter[2]; + gl_TessLevelOuter[3] = tessLevelOuter[3]; + + gl_TessLevelInner[0] = tessLevelInner[0]; + gl_TessLevelInner[1] = tessLevelInner[1]; } } @@ -119,13 +103,11 @@ void main() layout(quads) in; layout(cw) in; -/* XXXdyu-patch-drawing support for frational spacing #if defined OSD_FRACTIONAL_ODD_SPACING layout(fractional_odd_spacing) in; #elif defined OSD_FRACTIONAL_EVEN_SPACING layout(fractional_even_spacing) in; #endif -*/ in block { ControlVertex v; @@ -218,7 +200,7 @@ void main() dUV += D[i] * DUCP[i]; } - int level = int(inpt[0].v.ptexInfo.z); + int level = inpt[0].v.patchCoord.z; BiTangent *= 3 * level; Tangent *= 3 * level; dUU *= 6 * level; @@ -268,7 +250,7 @@ void main() Tangent += B[i] * DUCP[i]; BiTangent += D[i] * BUCP[i]; } - int level = int(inpt[0].v.ptexInfo.z); + int level = inpt[0].v.patchCoord.z; BiTangent *= 3 * level; Tangent *= 3 * level; @@ -283,10 +265,9 @@ void main() //OSD_USER_VARYING_PER_EVAL_POINT(vec2(u,v), 0, 3, 1, 2); OSD_USER_VARYING_PER_EVAL_POINT(vec2(u,v), 0, 15, 5, 10); - outpt.v.patchCoord = inpt[0].v.patchCoord; - outpt.v.patchCoord.xy = vec2(v, u); - - OSD_COMPUTE_PTEX_COORD_TESSEVAL_SHADER; + vec2 UV = vec2(v, u); + outpt.v.tessCoord = UV; + outpt.v.patchCoord = OsdInterpolatePatchCoord(UV, inpt[0].v.patchCoord); OSD_DISPLACEMENT_CALLBACK; diff --git a/opensubdiv/osd/glslPatchTransition.glsl b/opensubdiv/osd/glslPatchTransition.glsl deleted file mode 100644 index 5b8ed194..00000000 --- a/opensubdiv/osd/glslPatchTransition.glsl +++ /dev/null @@ -1,251 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -//---------------------------------------------------------- -// Patches.TessControlTransition -//---------------------------------------------------------- -#ifdef OSD_PATCH_TESS_CONTROL_BSPLINE_SHADER - -patch out vec4 tessOuterLo, tessOuterHi; - -void -GetTessLevelsUniform(vec3 cp[16], int patchParam) -{ - float tessAmount = GetTessLevel(GetPatchLevel()); - - tessOuterLo = vec4(tessAmount); - tessOuterHi = vec4(0); -} - -// -// Organization of B-spline and Bezier control points. -// -// Each patch is defined by 16 control points (labeled 0-15). -// -// The patch will be evaluated across the domain from (0,0) at -// the lower-left to (1,1) at the upper-right. When computing -// adaptive tessellation metrics, we consider refined vertex-vertex -// and edge-vertex points along the transition edges of the patch -// (labeled vv* and ev* respectively). -// -// The two segments of each transition edge are labeled Lo and Hi, -// with the Lo segment occuring before the Hi segment along the -// transition edge's domain parameterization. These Lo and Hi segment -// tessellation levels determine how domain evaluation coordinates -// are remapped along transition edges. The Hi segment value will -// be zero for a non-transition edge. -// -// (0,1) (1,1) -// -// vv3 ev23 vv2 -// | Lo3 | Hi3 | -// --O-----------O-----+-----O-----------O-- -// | 12 | 13 14 | 15 | -// | | | | -// | | | | -// Hi0 | | | | Hi2 -// | | | | -// O-----------O-----------O-----------O -// | 8 | 9 10 | 11 | -// | | | | -// ev03 --+ | | +-- ev12 -// | | | | -// | 4 | 5 6 | 7 | -// O-----------O-----------O-----------O -// | | | | -// Lo0 | | | | Lo2 -// | | | | -// | | | | -// | 0 | 1 2 | 3 | -// --O-----------O-----+-----O-----------O-- -// | Lo1 | Hi1 | -// vv0 ev01 vv1 -// -// (0,0) (1,0) -// - -void -GetTessLevelsRefinedPoints(vec3 cp[16], int patchParam) -{ - // Each edge of a transition patch is adjacent to one or two patches - // at the next refined level of subdivision. We compute the corresponding - // vertex-vertex and edge-vertex refined points along the edges of the - // patch using Catmull-Clark subdivision stencil weights. - // For simplicity, we let the optimizer discard unused computation. - - vec3 vv0 = (cp[0] + cp[2] + cp[8] + cp[10]) * 0.015625 + - (cp[1] + cp[4] + cp[6] + cp[9]) * 0.09375 + cp[5] * 0.5625; - vec3 ev01 = (cp[1] + cp[2] + cp[9] + cp[10]) * 0.0625 + - (cp[5] + cp[6]) * 0.375; - - vec3 vv1 = (cp[1] + cp[3] + cp[9] + cp[11]) * 0.015625 + - (cp[2] + cp[5] + cp[7] + cp[10]) * 0.09375 + cp[6] * 0.5625; - vec3 ev12 = (cp[5] + cp[7] + cp[9] + cp[11]) * 0.0625 + - (cp[6] + cp[10]) * 0.375; - - vec3 vv2 = (cp[5] + cp[7] + cp[13] + cp[15]) * 0.015625 + - (cp[6] + cp[9] + cp[11] + cp[14]) * 0.09375 + cp[10] * 0.5625; - vec3 ev23 = (cp[5] + cp[6] + cp[13] + cp[14]) * 0.0625 + - (cp[9] + cp[10]) * 0.375; - - vec3 vv3 = (cp[4] + cp[6] + cp[12] + cp[14]) * 0.015625 + - (cp[5] + cp[8] + cp[10] + cp[13]) * 0.09375 + cp[9] * 0.5625; - vec3 ev03 = (cp[4] + cp[6] + cp[8] + cp[10]) * 0.0625 + - (cp[5] + cp[9]) * 0.375; - - tessOuterLo = vec4(1); - tessOuterHi = vec4(0); - - if (((patchParam >> 11) & 1) != 0) { - tessOuterLo[0] = TessAdaptive(vv0, ev03); - tessOuterHi[0] = TessAdaptive(vv3, ev03); - } else { - tessOuterLo[0] = TessAdaptive(cp[5], cp[9]); - } - if (((patchParam >> 8) & 1) != 0) { - tessOuterLo[1] = TessAdaptive(vv0, ev01); - tessOuterHi[1] = TessAdaptive(vv1, ev01); - } else { - tessOuterLo[1] = TessAdaptive(cp[5], cp[6]); - } - if (((patchParam >> 9) & 1) != 0) { - tessOuterLo[2] = TessAdaptive(vv1, ev12); - tessOuterHi[2] = TessAdaptive(vv2, ev12); - } else { - tessOuterLo[2] = TessAdaptive(cp[6], cp[10]); - } - if (((patchParam >> 10) & 1) != 0) { - tessOuterLo[3] = TessAdaptive(vv3, ev23); - tessOuterHi[3] = TessAdaptive(vv2, ev23); - } else { - tessOuterLo[3] = TessAdaptive(cp[9], cp[10]); - } -} - -void -GetTessLevelsLimitPoints(vec3 cpBezier[16], int patchParam) -{ - // Each edge of a transition patch is adjacent to one or two patches - // at the next refined level of subdivision. When the patch control - // points have been converted to the Bezier basis, the control points - // at the four corners are on the limit surface (since a Bezier patch - // interpolates its corner control points). We can compute an adaptive - // tessellation level for transition edges on the limit surface by - // evaluating a limit position at the mid point of each transition edge. - - tessOuterLo = vec4(1); - tessOuterHi = vec4(0); - - if (((patchParam >> 11) & 1) != 0) { - vec3 ev03 = EvalBezier(cpBezier, vec2(0.0, 0.5)); - tessOuterLo[0] = TessAdaptive(cpBezier[0], ev03); - tessOuterHi[0] = TessAdaptive(cpBezier[12], ev03); - } else { - tessOuterLo[0] = TessAdaptive(cpBezier[0], cpBezier[12]); - } - if (((patchParam >> 8) & 1) != 0) { - vec3 ev01 = EvalBezier(cpBezier, vec2(0.5, 0.0)); - tessOuterLo[1] = TessAdaptive(cpBezier[0], ev01); - tessOuterHi[1] = TessAdaptive(cpBezier[3], ev01); - } else { - tessOuterLo[1] = TessAdaptive(cpBezier[0], cpBezier[3]); - } - if (((patchParam >> 9) & 1) != 0) { - vec3 ev12 = EvalBezier(cpBezier, vec2(1.0, 0.5)); - tessOuterLo[2] = TessAdaptive(cpBezier[3], ev12); - tessOuterHi[2] = TessAdaptive(cpBezier[15], ev12); - } else { - tessOuterLo[2] = TessAdaptive(cpBezier[3], cpBezier[15]); - } - if (((patchParam >> 10) & 1) != 0) { - vec3 ev23 = EvalBezier(cpBezier, vec2(0.5, 1.0)); - tessOuterLo[3] = TessAdaptive(cpBezier[12], ev23); - tessOuterHi[3] = TessAdaptive(cpBezier[15], ev23); - } else { - tessOuterLo[3] = TessAdaptive(cpBezier[12], cpBezier[15]); - } -} - -void -GetTransitionTessLevels(vec3 cp[16], int patchParam, - inout vec4 outerLevel, inout vec2 innerLevel) -{ -#if defined OSD_ENABLE_SCREENSPACE_TESSELLATION - GetTessLevelsLimitPoints(cp, patchParam); -#elif defined OSD_ENABLE_SCREENSPACE_TESSELLATION_REFINED - GetTessLevelsRefinedPoints(cp, patchParam); -#else - GetTessLevelsUniform(cp, patchParam); -#endif - - // Outer levels are the sum of the Lo and Hi segments where the Hi - // segments will have a length of zero for non-transition edges. - outerLevel = tessOuterLo + tessOuterHi; - - // Inner levels are the average the corresponding outer levels. - innerLevel[0] = (outerLevel[1] + outerLevel[3]) * 0.5; - innerLevel[1] = (outerLevel[0] + outerLevel[2]) * 0.5; -} - -#endif - -//---------------------------------------------------------- -// Patches.TessEvalTransition -//---------------------------------------------------------- -#ifdef OSD_PATCH_TESS_EVAL_BSPLINE_SHADER - -patch in vec4 tessOuterLo, tessOuterHi; - -float -GetTransitionSplit(float t, float n0, float n1) -{ - float ti = round(t * (n0 + n1)); - - if (ti <= n0) { - return 0.5 * (ti / n0); - } else { - return 0.5 * ((ti - n0) / n1) + 0.5; - } -} - -vec2 -GetTransitionParameterization() -{ - vec2 UV = gl_TessCoord.xy; - if (UV.x == 0 && tessOuterHi[0] > 0) { - UV.y = GetTransitionSplit(UV.y, tessOuterLo[0], tessOuterHi[0]); - } else - if (UV.y == 0 && tessOuterHi[1] > 0) { - UV.x = GetTransitionSplit(UV.x, tessOuterLo[1], tessOuterHi[1]); - } else - if (UV.x == 1 && tessOuterHi[2] > 0) { - UV.y = GetTransitionSplit(UV.y, tessOuterLo[2], tessOuterHi[2]); - } else - if (UV.y == 1 && tessOuterHi[3] > 0) { - UV.x = GetTransitionSplit(UV.x, tessOuterLo[3], tessOuterHi[3]); - } - return UV; -} - -#endif diff --git a/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp b/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp deleted file mode 100644 index 2d763e7a..00000000 --- a/opensubdiv/osd/glslTransformFeedbackComputeContext.cpp +++ /dev/null @@ -1,253 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" - -//#define OSD_DEBUG_BUILD -//#include "../osd/debug.h" -#include "../osd/glslTransformFeedbackComputeContext.h" -#include "../osd/opengl.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// ----------------------------------------------------------------------------- - -template GLuint -createGLTextureBuffer(std::vector const & src, GLenum type) { - - int size = (int)src.size()*sizeof(T); - void const * ptr = &src.at(0); - - GLuint buffer; - glGenBuffers(1, &buffer); - - GLuint devicePtr; - glGenTextures(1, &devicePtr); - -#if defined(GL_EXT_direct_state_access) - if (glNamedBufferDataEXT and glTextureBufferEXT) { - glNamedBufferDataEXT(buffer, size, ptr, GL_STATIC_DRAW); - glTextureBufferEXT(devicePtr, GL_TEXTURE_BUFFER, type, buffer); - } else { -#else - { -#endif - GLint prev = 0; - - glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &prev); - glBindBuffer(GL_ARRAY_BUFFER, buffer); - glBufferData(GL_ARRAY_BUFFER, size, ptr, GL_STATIC_DRAW); - glBindBuffer(GL_ARRAY_BUFFER, prev); - - glGetIntegerv(GL_TEXTURE_BINDING_BUFFER, &prev); - glBindTexture(GL_TEXTURE_BUFFER, devicePtr); - glTexBuffer(GL_TEXTURE_BUFFER, type, buffer); - glBindTexture(GL_TEXTURE_BUFFER, prev); - } - - glDeleteBuffers(1, &buffer); - - //OSD_DEBUG_CHECK_GL_ERROR("createGLTextureBuffer end\n"); - return devicePtr; -} - -// ----------------------------------------------------------------------------- - -class GLSLTransformFeedbackComputeContext::GLStencilTables { - -public: - - GLStencilTables(Far::StencilTables const & stencilTables) { - _numStencils = stencilTables.GetNumStencils(); - if (_numStencils > 0) { - _sizes = createGLTextureBuffer(stencilTables.GetSizes(), GL_R8UI); - _offsets = createGLTextureBuffer(stencilTables.GetOffsets(), GL_R32I); - _indices = createGLTextureBuffer(stencilTables.GetControlIndices(), GL_R32I); - _weights = createGLTextureBuffer(stencilTables.GetWeights(), GL_R32F); - } else { - _sizes = _offsets = _indices = _weights = 0; - } - } - - ~GLStencilTables() { - if (_sizes) glDeleteTextures(1, &_sizes); - if (_offsets) glDeleteTextures(1, &_offsets); - if (_weights) glDeleteTextures(1, &_weights); - if (_indices) glDeleteTextures(1, &_indices); - } - - bool IsValid() const { - return _sizes and _offsets and _indices and _weights; - } - - GLuint GetSizes() const { - return _sizes; - } - - GLuint GetOffsets() const { - return _offsets; - } - - GLuint GetIndices() const { - return _indices; - } - - GLuint GetWeights() const { - return _weights; - } - - int GetNumStencils() const { - return _numStencils; - } - -private: - - GLuint _sizes, - _offsets, - _indices, - _weights; - - int _numStencils; -}; - -// ----------------------------------------------------------------------------- - -GLSLTransformFeedbackComputeContext::GLSLTransformFeedbackComputeContext( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables) : - _vertexStencilTables(0), _varyingStencilTables(0), - _numControlVertices(0) { - - if (vertexStencilTables) { - _vertexStencilTables = new GLStencilTables(*vertexStencilTables); - _numControlVertices = vertexStencilTables->GetNumControlVertices(); - } - - if (varyingStencilTables) { - _varyingStencilTables = new GLStencilTables(*varyingStencilTables); - - if (_numControlVertices) { - assert(_numControlVertices==varyingStencilTables->GetNumControlVertices()); - } else { - _numControlVertices = varyingStencilTables->GetNumControlVertices(); - } - } -} - -GLSLTransformFeedbackComputeContext::~GLSLTransformFeedbackComputeContext() { - delete _vertexStencilTables; - delete _varyingStencilTables; -} - -// ---------------------------------------------------------------------------- - -bool -GLSLTransformFeedbackComputeContext::HasVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->IsValid() : false; -} - -bool -GLSLTransformFeedbackComputeContext::HasVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->IsValid() : false; -} - -int -GLSLTransformFeedbackComputeContext::GetNumStencilsInVertexStencilTables() const { - return _vertexStencilTables ? _vertexStencilTables->GetNumStencils() : 0; -} - -int -GLSLTransformFeedbackComputeContext::GetNumStencilsInVaryingStencilTables() const { - return _varyingStencilTables ? _varyingStencilTables->GetNumStencils() : 0; -} - -// ---------------------------------------------------------------------------- -GLuint -GLSLTransformFeedbackComputeContext::GetVertexStencilTablesSizes() const { - return _vertexStencilTables ? _vertexStencilTables->GetSizes() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVertexStencilTablesOffsets() const { - return _vertexStencilTables ? _vertexStencilTables->GetOffsets() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVertexStencilTablesIndices() const { - return _vertexStencilTables ? _vertexStencilTables->GetIndices() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVertexStencilTablesWeights() const { - return _vertexStencilTables ? _vertexStencilTables->GetWeights() : 0; -} - -// ---------------------------------------------------------------------------- - -GLuint -GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesSizes() const { - return _varyingStencilTables ? _varyingStencilTables->GetSizes() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesOffsets() const { - return _varyingStencilTables ? _varyingStencilTables->GetOffsets() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesIndices() const { - return _varyingStencilTables ? _varyingStencilTables->GetIndices() : 0; -} - -GLuint -GLSLTransformFeedbackComputeContext::GetVaryingStencilTablesWeights() const { - return _varyingStencilTables ? _varyingStencilTables->GetWeights() : 0; -} - - -// ----------------------------------------------------------------------------- - -GLSLTransformFeedbackComputeContext * -GLSLTransformFeedbackComputeContext::Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void * /*deviceContext*/) { - - GLSLTransformFeedbackComputeContext *result = - new GLSLTransformFeedbackComputeContext( - vertexStencilTables, varyingStencilTables); - - return result; -} - - -// ----------------------------------------------------------------------------- - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glslTransformFeedbackComputeContext.h b/opensubdiv/osd/glslTransformFeedbackComputeContext.h deleted file mode 100644 index 5a51e55f..00000000 --- a/opensubdiv/osd/glslTransformFeedbackComputeContext.h +++ /dev/null @@ -1,134 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTEXT_H -#define OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTEXT_H - -#include "../version.h" - -#include - -#include "../osd/nonCopyable.h" -#include "../osd/opengl.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Far{ class StencilTables; } - -namespace Osd { - -/// -/// \brief GLSL-Compute(transform-feedback) Refine Context -/// -/// The GLSL (transform-feedback) implementation of the Refine module contextual functionality. -/// -/// Contexts interface the serialized topological data pertaining to the -/// geometric primitives with the capabilities of the selected discrete -/// compute device. -/// -class GLSLTransformFeedbackComputeContext { -public: - /// Creates an GLSLTransformFeedbackComputeContext instance - /// - /// @param vertexStencilTables The Far::StencilTables used for vertex - /// interpolation - /// - /// @param varyingStencilTables The Far::StencilTables used for varying - /// interpolation - /// - static GLSLTransformFeedbackComputeContext * Create( - Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables, - void *deviceContext = NULL); - - /// Destructor - virtual ~GLSLTransformFeedbackComputeContext(); - - /// Returns true if the Context has a 'vertex' interpolation stencil table - bool HasVertexStencilTables() const; - - /// Returns true if the Context has a 'varying' interpolation stencil table - bool HasVaryingStencilTables() const; - - /// Returns the number of control vertices - int GetNumControlVertices() const { - return _numControlVertices; - } - - /// Returns the number of stencils in vertex stencil table - int GetNumStencilsInVertexStencilTables() const; - - /// Returns the number of stencils in varying stencil table - int GetNumStencilsInVaryingStencilTables() const; - - /// Returns the GL texture buffer containing vertex-stencil stencil sizes - GLuint GetVertexStencilTablesSizes() const; - - /// Returns the GL texture buffer containing vertex-stencil stencil offsets - GLuint GetVertexStencilTablesOffsets() const; - - /// Returns the GL texture buffer containing vertex-stencil stencil indices - GLuint GetVertexStencilTablesIndices() const; - - /// Returns the GL texture buffer containing vertex-stencil stencil weights - GLuint GetVertexStencilTablesWeights() const; - - - /// Returns the GL texture buffer containing Varying-stencil stencil sizes - GLuint GetVaryingStencilTablesSizes() const; - - /// Returns the GL texture buffer containing Varying-stencil stencil offsets - GLuint GetVaryingStencilTablesOffsets() const; - - /// Returns the GL texture buffer containing Varying-stencil stencil indices - GLuint GetVaryingStencilTablesIndices() const; - - /// Returns the GL texture buffer containing Varying-stencil stencil weights - GLuint GetVaryingStencilTablesWeights() const; - - -protected: - - explicit GLSLTransformFeedbackComputeContext(Far::StencilTables const * vertexStencilTables, - Far::StencilTables const * varyingStencilTables); - -private: - - class GLStencilTables; - - GLStencilTables * _vertexStencilTables, - * _varyingStencilTables; - - int _numControlVertices; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTEXT_H diff --git a/opensubdiv/osd/glslTransformFeedbackComputeController.cpp b/opensubdiv/osd/glslTransformFeedbackComputeController.cpp deleted file mode 100644 index 958b9ff2..00000000 --- a/opensubdiv/osd/glslTransformFeedbackComputeController.cpp +++ /dev/null @@ -1,434 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -//#define OSD_DEBUG_BUILD -#include "../osd/debug.h" -#include "../osd/glslTransformFeedbackComputeController.h" -#include "../osd/glslTransformFeedbackComputeContext.h" -#include "../osd/opengl.h" -#include "../far/error.h" - -#include -#include -#include -#include -#include - -#if _MSC_VER - #define snprintf _snprintf -#endif - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -static const char *shaderSource = -#include "../osd/glslTransformFeedbackKernel.gen.h" -; - -static const char *shaderDefines = "" -#ifdef OPT_CATMARK_V_IT_VEC2 -"#define OPT_CATMARK_V_IT_VEC2\n" -#endif -#ifdef OPT_E0_IT_VEC4 -"#define OPT_E0_IT_VEC4\n" -#endif -#ifdef OPT_E0_S_VEC2 -"#define OPT_E0_S_VEC2\n" -#endif -; - -// ---------------------------------------------------------------------------- -static void -bindTexture(GLint sampler, GLuint texture, int unit) { - if (sampler==-1) { - return; - } - glUniform1i(sampler, unit); - glActiveTexture(GL_TEXTURE0 + unit); - glBindTexture(GL_TEXTURE_BUFFER, texture); - glActiveTexture(GL_TEXTURE0); -} - -// ---------------------------------------------------------------------------- - -class GLSLTransformFeedbackComputeController::KernelBundle : - NonCopyable { - -public: - - KernelBundle() : - _program(0), - _uniformSizes(0), - _uniformOffsets(0), - _uniformIndices(0), - _uniformWeights(0), - _uniformStart(0), - _uniformEnd(0), - _uniformPrimvarOffset(0) { } - - ~KernelBundle() { - if (_program) { - glDeleteProgram(_program); - } - } - - void UseProgram() const { - glUseProgram(_program); - } - - bool Compile(VertexBufferDescriptor const & desc) { - - _desc = VertexBufferDescriptor(0, desc.length, desc.stride); - - if (_program) { - glDeleteProgram(_program); - _program=0; - } - _program = glCreateProgram(); - - GLuint shader = glCreateShader(GL_VERTEX_SHADER); - - std::ostringstream defines; - defines << "#define LENGTH " << desc.length << "\n" - << "#define STRIDE " << desc.stride << "\n"; - std::string defineStr = defines.str(); - - const char *shaderSources[4] = {"#version 410\n", 0, 0, 0}; - - shaderSources[1] = defineStr.c_str(); - shaderSources[2] = shaderDefines; - shaderSources[3] = shaderSource; - glShaderSource(shader, 4, shaderSources, NULL); - glCompileShader(shader); - glAttachShader(_program, shader); - - std::vector outputs; - std::vector pOutputs; - { - // vertex data (may include custom vertex data) and varying data - // are stored into the same buffer, interleaved. - // - // (gl_SkipComponents1) - // outVertexData[0] - // outVertexData[1] - // outVertexData[2] - // (gl_SkipComponents1) - // - // note that "primvarOffset" in shader is still needed to read - // interleaved components even if gl_SkipComponents is used. - // - char attrName[32]; - int primvarOffset = (desc.offset % desc.stride); - for (int i = 0; i < primvarOffset; ++i) { - outputs.push_back("gl_SkipComponents1"); - } - for (int i = 0; i < desc.length; ++i) { - snprintf(attrName, 32, "outVertexBuffer[%d]", i); - outputs.push_back(attrName); - } - for (int i = primvarOffset + desc.length; i < desc.stride; ++i) { - outputs.push_back("gl_SkipComponents1"); - } - - // convert to char* array - for (size_t i = 0; i < outputs.size(); ++i) { - pOutputs.push_back(&outputs[i][0]); - } - } - - glTransformFeedbackVaryings(_program, (GLsizei)outputs.size(), - &pOutputs[0], GL_INTERLEAVED_ATTRIBS); - - GLint linked = 0; - glLinkProgram(_program); - glGetProgramiv(_program, GL_LINK_STATUS, &linked); - - if (linked == GL_FALSE) { - char buffer[1024]; - glGetShaderInfoLog(shader, 1024, NULL, buffer); - Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - - glGetProgramInfoLog(_program, 1024, NULL, buffer); - Far::Error(Far::FAR_RUNTIME_ERROR, buffer); - - glDeleteProgram(_program); - _program = 0; - return false; - } - - glDeleteShader(shader); - - // set uniform locations for compute kernels - _primvarBuffer = glGetUniformLocation(_program, "vertexBuffer"); - - _uniformSizes = glGetUniformLocation(_program, "sizes"); - _uniformOffsets = glGetUniformLocation(_program, "offsets"); - _uniformIndices = glGetUniformLocation(_program, "indices"); - _uniformWeights = glGetUniformLocation(_program, "weights"); - - _uniformStart = glGetUniformLocation(_program, "batchStart"); - _uniformEnd = glGetUniformLocation(_program, "batchEnd"); - - _uniformPrimvarOffset = glGetUniformLocation(_program, "primvarOffset"); - - OSD_DEBUG_CHECK_GL_ERROR("KernelBundle::Compile"); - - return true; - } - - GLint GetPrimvarBufferLocation() const { - return _primvarBuffer; - } - - GLint GetSizesLocation() const { - return _uniformSizes; - } - - GLint GetOffsetsLocation() const { - return _uniformOffsets; - } - GLint GetIndicesLocation() const { - return _uniformIndices; - } - GLint GetWeightsLocation() const { - return _uniformWeights; - } - - void TransformPrimvarBuffer(GLuint primvarBuffer, - int offset, int numCVs, int start, int end) const { - - assert(end >= start); - - // set batch range - glUniform1i(_uniformStart, start); - glUniform1i(_uniformEnd, end); - glUniform1i(_uniformPrimvarOffset, offset); - - int count = end - start, - stride = _desc.stride*sizeof(float); - - // note: offset includes both "batching offset" and "primvar offset". - // - glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, - 0, primvarBuffer, - (start + numCVs)*stride + (offset - offset%stride)*sizeof(float), - count*stride); - - glBeginTransformFeedback(GL_POINTS); - - glDrawArrays(GL_POINTS, 0, count); - - glEndTransformFeedback(); - - glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0); - - //OSD_DEBUG_CHECK_GL_ERROR("TransformPrimvarBuffer\n"); - } - - void ApplyStencilTableKernel(GLuint primvarBuffer, - int offset, int numCVs, - int start, int end) const { - - TransformPrimvarBuffer(primvarBuffer, - offset, numCVs, start, end); - } - - struct Match { - - Match(VertexBufferDescriptor const & d) : desc(d) { } - - bool operator() (KernelBundle const * kernel) { - return (desc.length==kernel->_desc.length and - desc.stride==kernel->_desc.stride); - } - - VertexBufferDescriptor desc; - }; - -private: - - GLuint _program; - - GLint _primvarBuffer; - - GLint _uniformSizes, // uniform paramaeters for kernels - _uniformOffsets, - _uniformIndices, - _uniformWeights, - - _uniformStart, // batch - _uniformEnd, - - _uniformPrimvarOffset; - - VertexBufferDescriptor _desc; // primvar buffer descriptor -}; - -// ---------------------------------------------------------------------------- -void -GLSLTransformFeedbackComputeController::bindBufferAndProgram( - GLuint & feedbackTexture) { - - glEnable(GL_RASTERIZER_DISCARD); - _currentBindState.kernelBundle->UseProgram(); - - if (not feedbackTexture) { - glGenTextures(1, &feedbackTexture); -#if defined(GL_EXT_direct_state_access) - if (glTextureBufferEXT) { - glTextureBufferEXT(feedbackTexture, GL_TEXTURE_BUFFER, GL_R32F, - _currentBindState.buffer); - } else { -#else - { -#endif - glBindTexture(GL_TEXTURE_BUFFER, feedbackTexture); - glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, _currentBindState.buffer); - glBindTexture(GL_TEXTURE_BUFFER, 0); - } - } - - bindTexture( - _currentBindState.kernelBundle->GetPrimvarBufferLocation(), feedbackTexture, 0); - - // bind vertex array - // always create new one, to be safe with multiple contexts. - glGenVertexArrays(1, &_vao); - glBindVertexArray(_vao); -} - -// ---------------------------------------------------------------------------- - -void -GLSLTransformFeedbackComputeController::bindContextStencilTables( - ComputeContext const *context, bool varying) { - - GLint sizesLocation = _currentBindState.kernelBundle->GetSizesLocation(), - offsetsLocation = _currentBindState.kernelBundle->GetOffsetsLocation(), - indicesLocation = _currentBindState.kernelBundle->GetIndicesLocation(), - weightsLocation = _currentBindState.kernelBundle->GetWeightsLocation(); - - if (not varying) { - bindTexture(sizesLocation, context->GetVertexStencilTablesSizes(), 1); - bindTexture(offsetsLocation, context->GetVertexStencilTablesOffsets(), 2); - bindTexture(indicesLocation, context->GetVertexStencilTablesIndices(), 3); - bindTexture(weightsLocation, context->GetVertexStencilTablesWeights(), 4); - } else { - bindTexture(sizesLocation, context->GetVaryingStencilTablesSizes(), 1); - bindTexture(offsetsLocation, context->GetVaryingStencilTablesOffsets(), 2); - bindTexture(indicesLocation, context->GetVaryingStencilTablesIndices(), 3); - bindTexture(weightsLocation, context->GetVaryingStencilTablesWeights(), 4); - } -} - -// ---------------------------------------------------------------------------- - -void -GLSLTransformFeedbackComputeController::unbindResources() { - - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_BUFFER, 0); - - glDisable(GL_RASTERIZER_DISCARD); - glUseProgram(0); - glActiveTexture(GL_TEXTURE0); - - glBindVertexArray(0); - glDeleteVertexArrays(1, &_vao); -} - -// ---------------------------------------------------------------------------- - -GLSLTransformFeedbackComputeController::KernelBundle const * -GLSLTransformFeedbackComputeController::getKernel( - VertexBufferDescriptor const &desc) { - - KernelRegistry::iterator it = - std::find_if(_kernelRegistry.begin(), _kernelRegistry.end(), - KernelBundle::Match(desc)); - - if (it != _kernelRegistry.end()) { - return *it; - } else { - KernelBundle * kernelBundle = new KernelBundle(); - kernelBundle->Compile(desc); - _kernelRegistry.push_back(kernelBundle); - return kernelBundle; - } -} - -// ---------------------------------------------------------------------------- - -void -GLSLTransformFeedbackComputeController::ApplyStencilTableKernel( - GLSLTransformFeedbackComputeContext const *context, int numStencils) const { - - assert(context); - - int start = 0; - int end = numStencils; - - _currentBindState.kernelBundle->ApplyStencilTableKernel( - _currentBindState.buffer, _currentBindState.desc.offset, - context->GetNumControlVertices(), - start, - end); -} - - -// ---------------------------------------------------------------------------- - -GLSLTransformFeedbackComputeController::GLSLTransformFeedbackComputeController() : - _vertexTexture(0), _varyingTexture(0), _vao(0) { -} - -GLSLTransformFeedbackComputeController::~GLSLTransformFeedbackComputeController() { - - for (KernelRegistry::iterator it = _kernelRegistry.begin(); - it != _kernelRegistry.end(); ++it) { - delete *it; - } - if (_vertexTexture) { - glDeleteTextures(1, &_vertexTexture); - } - if (_varyingTexture) { - glDeleteTextures(1, &_varyingTexture); - } -} - -// ---------------------------------------------------------------------------- - -void -GLSLTransformFeedbackComputeController::Synchronize() { - glFinish(); -} - - - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/glslTransformFeedbackComputeController.h b/opensubdiv/osd/glslTransformFeedbackComputeController.h deleted file mode 100644 index 3a9c92ce..00000000 --- a/opensubdiv/osd/glslTransformFeedbackComputeController.h +++ /dev/null @@ -1,214 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTROLLER_H -#define OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/glslTransformFeedbackComputeContext.h" -#include "../osd/vertexDescriptor.h" - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -class GLSLTransformFeedbackKernelBundle; - -/// \brief Compute controller for launching GLSLTransformFeedback transform feedback -/// subdivision kernels. -/// -/// GLSLTransformFeedbackComputeController is a compute controller class to launch -/// GLSLTransformFeedback transfrom feedback subdivision kernels. It requires -/// GLVertexBufferInterface as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class GLSLTransformFeedbackComputeController { -public: - typedef GLSLTransformFeedbackComputeContext ComputeContext; - - /// Constructor. - GLSLTransformFeedbackComputeController(); - - /// Destructor. - ~GLSLTransformFeedbackComputeController(); - - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The GLSLTransformFeedbackComputeContext to apply - /// refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( GLSLTransformFeedbackComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - if (vertexBuffer) { - - bind(vertexBuffer, vertexDesc, _vertexTexture); - - bindContextStencilTables(context, false); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVertexStencilTables()); - } - - if (varyingBuffer) { - - bind(varyingBuffer, varyingDesc, _varyingTexture); - - bindContextStencilTables(context, true); - - ApplyStencilTableKernel( - context, context->GetNumStencilsInVaryingStencilTables()); - } - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The GLSLTransformFeedbackComputeContext to apply - /// refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(GLSLTransformFeedbackComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context, - int numStencils) const; - - template - void bind( BUFFER * buffer, VertexBufferDescriptor const * desc, - GLuint feedbackTexture ) { - - assert(buffer); - - // if the vertex buffer descriptor is specified, use it - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (desc) { - _currentBindState.desc = *desc; - } else { - int numElements = buffer ? buffer->GetNumElements() : 0; - _currentBindState.desc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.buffer = buffer->BindVBO(); - - _currentBindState.kernelBundle = getKernel(_currentBindState.desc); - - bindBufferAndProgram(feedbackTexture); - } - - // Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - unbindResources(); - } - - // binds the primvar data buffer and compute program - void bindBufferAndProgram(GLuint & texture); - - // binds the stencil tables for 'vertex' interpolation - void bindContextStencilTables(ComputeContext const *context, bool varying=false); - - // unbinds the primvar data buffer and compute program - void unbindResources(); - -private: - - class KernelBundle; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : buffer(0), kernelBundle(0) { } - - void Reset() { - buffer = 0; - desc.Reset(); - kernelBundle = 0; - } - - GLuint buffer; - - VertexBufferDescriptor desc; - - KernelBundle const * kernelBundle; - }; - - BindState _currentBindState; - - typedef std::vector KernelRegistry; - - KernelBundle const * getKernel(VertexBufferDescriptor const &desc); - - KernelRegistry _kernelRegistry; - - GLuint _vertexTexture, - _varyingTexture, - _vao; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_GLSL_TRANSFORM_FEEDBACK_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/glslTransformFeedbackKernel.glsl b/opensubdiv/osd/glslXFBKernel.glsl similarity index 92% rename from opensubdiv/osd/glslTransformFeedbackKernel.glsl rename to opensubdiv/osd/glslXFBKernel.glsl index 0962d7f8..caba7e2a 100644 --- a/opensubdiv/osd/glslTransformFeedbackKernel.glsl +++ b/opensubdiv/osd/glslXFBKernel.glsl @@ -36,7 +36,7 @@ uniform samplerBuffer weights; uniform int batchStart = 0; uniform int batchEnd = 0; -uniform int primvarOffset = 0; +uniform int srcOffset = 0; //------------------------------------------------------------------------------ @@ -58,19 +58,13 @@ void addWithWeight(inout Vertex v, Vertex src, float weight) { Vertex readVertex(int index) { Vertex v; - int vertexIndex = primvarOffset + index * STRIDE; + int vertexIndex = srcOffset + index * SRC_STRIDE; for(int j = 0; j < LENGTH; j++) { v.vertexData[j] = texelFetch(vertexBuffer, vertexIndex+j).x; } return v; } -void copyVertex(out Vertex dst, int index) { - for(int i = 0; i < LENGTH; i++) { - dst.vertexData[i] = texelFetch(vertexBuffer, index*STRIDE+i).x; - } -} - void writeVertex(Vertex v) { for(int i = 0; i < LENGTH; i++) { outVertexBuffer[i] = v.vertexData[i]; diff --git a/opensubdiv/osd/hlslComputeKernel.hlsl b/opensubdiv/osd/hlslComputeKernel.hlsl index 1ea16886..2e02bad0 100644 --- a/opensubdiv/osd/hlslComputeKernel.hlsl +++ b/opensubdiv/osd/hlslComputeKernel.hlsl @@ -22,25 +22,21 @@ // language governing permissions and limitations under the Apache License. // -interface IComputeKernel { - void runKernel( uint3 ID ); -}; -IComputeKernel kernel; - cbuffer KernelUniformArgs : register( b0 ) { - int batchStart, - batchEnd, - primvarOffset, - numCVs; + int batchStart; + int batchEnd; + int srcOffset; + int dstOffset; }; RWBuffer vertexBuffer : register( u0 ); +RWBuffer dstVertexBuffer : register( u1 ); Buffer sizes : register( t1 ); -Buffer offsets : register( t2 ); -Buffer indices : register( t3 ); -Buffer weights : register( t4 ); +Buffer offsets : register( t2 ); +Buffer indices : register( t3 ); +Buffer weights : register( t4 ); -//-------------------------------------------------------------------------------- +//---------------------------------------------------------------------------- struct Vertex { float vertexData[LENGTH]; @@ -54,7 +50,7 @@ void clear(out Vertex v) { Vertex readVertex(int index) { Vertex v; - int vertexIndex = primvarOffset + index * STRIDE; + int vertexIndex = srcOffset + index * SRC_STRIDE; for (int i = 0; i < LENGTH; ++i) { v.vertexData[i] = vertexBuffer[vertexIndex + i]; } @@ -62,27 +58,36 @@ Vertex readVertex(int index) { } void writeVertex(int index, Vertex v) { - int vertexIndex = primvarOffset + index * STRIDE; + int vertexIndex = dstOffset + index * DST_STRIDE; for (int i = 0; i < LENGTH; ++i) { vertexBuffer[vertexIndex + i] = v.vertexData[i]; } } +void writeVertexSeparate(int index, Vertex v) { + int vertexIndex = dstOffset + index * DST_STRIDE; + for (int i = 0; i < LENGTH; ++i) { + dstVertexBuffer[vertexIndex + i] = v.vertexData[i]; + } +} + void addWithWeight(inout Vertex v, const Vertex src, float weight) { for (int i = 0; i < LENGTH; ++i) { v.vertexData[i] += weight * src.vertexData[i]; } } +// --------------------------------------------------------------------------- -//-------------------------------------------------------------------------------- -// Stencil compute Kernel -class ComputeStencil : IComputeKernel { +interface IComputeKernel { + void runKernel( uint3 ID ); +}; +IComputeKernel kernel; + +class SingleBufferCompute : IComputeKernel { int placeholder; - - void runKernel( uint3 ID ) { - + void runKernel(uint3 ID) { int current = int(ID.x) + batchStart; if (current>=batchEnd) { @@ -99,25 +104,44 @@ class ComputeStencil : IComputeKernel { addWithWeight(dst, readVertex( indices[offset+i] ), weights[offset+i]); } - // the vertex buffer contains our control vertices at the beginning: don't - // stomp on those ! - writeVertex(numCVs+current, dst); + writeVertex(current, dst); } }; +class SeparateBufferCompute : IComputeKernel { + + int placeholder; + void runKernel(uint3 ID) { + int current = int(ID.x) + batchStart; + + if (current>=batchEnd) { + return; + } + + Vertex dst; + clear(dst); + + int offset = offsets[current], + size = sizes[current]; + + for (int i=0; i> 4) & 1) != 0) { - cpt[0] = 2*cpt[4] - cpt[8]; - cpt[1] = 2*cpt[5] - cpt[9]; - cpt[2] = 2*cpt[6] - cpt[10]; - cpt[3] = 2*cpt[7] - cpt[11]; - } - if (((patchParam >> 4) & 2) != 0) { - cpt[3] = 2*cpt[2] - cpt[1]; - cpt[7] = 2*cpt[6] - cpt[5]; - cpt[11] = 2*cpt[10] - cpt[9]; - cpt[15] = 2*cpt[14] - cpt[13]; - } - if (((patchParam >> 4) & 4) != 0) { - cpt[12] = 2*cpt[8] - cpt[4]; - cpt[13] = 2*cpt[9] - cpt[5]; - cpt[14] = 2*cpt[10] - cpt[6]; - cpt[15] = 2*cpt[11] - cpt[7]; - } - if (((patchParam >> 4) & 8) != 0) { - cpt[0] = 2*cpt[1] - cpt[2]; - cpt[4] = 2*cpt[5] - cpt[6]; - cpt[8] = 2*cpt[9] - cpt[10]; - cpt[12] = 2*cpt[13] - cpt[14]; - } -} - // compute single-crease patch matrix float4x4 ComputeMatrixSimplified(float sharpness) @@ -118,33 +89,34 @@ HSConstFunc( uint primitiveID : SV_PrimitiveID) { HS_CONSTANT_FUNC_OUT output; - int patchParam = GetPatchParam(primitiveID); - int patchLevel = GetPatchLevel(primitiveID); float3 position[16]; for (int p=0; p<16; ++p) { position[p] = bezierPatch[p].position.xyz; } - reflectBoundaryEdges(position, patchParam); + int3 patchParam = OsdGetPatchParam(OsdGetPatchIndex(primitiveID)); + + OsdComputeBSplineBoundaryPoints(position, patchParam); OSD_PATCH_CULL(OSD_PATCH_INPUT_SIZE); - float4 outerLevel = float4(0,0,0,0); - float4 innerLevel = float4(0,0,0,0); + float4 tessLevelOuter = float4(0,0,0,0); + float4 tessLevelInner = float4(0,0,0,0); float4 tessOuterLo = float4(0,0,0,0); float4 tessOuterHi = float4(0,0,0,0); - GetTransitionTessLevels(position, patchParam, patchLevel, - outerLevel, innerLevel, - tessOuterLo, tessOuterHi); - output.tessLevelOuter[0] = outerLevel[0]; - output.tessLevelOuter[1] = outerLevel[1]; - output.tessLevelOuter[2] = outerLevel[2]; - output.tessLevelOuter[3] = outerLevel[3]; + OsdGetTessLevels(position, patchParam, + tessLevelOuter, tessLevelInner, + tessOuterLo, tessOuterHi); - output.tessLevelInner[0] = innerLevel[0]; - output.tessLevelInner[1] = innerLevel[1]; + output.tessLevelOuter[0] = tessLevelOuter[0]; + output.tessLevelOuter[1] = tessLevelOuter[1]; + output.tessLevelOuter[2] = tessLevelOuter[2]; + output.tessLevelOuter[3] = tessLevelOuter[3]; + + output.tessLevelInner[0] = tessLevelInner[0]; + output.tessLevelInner[1] = tessLevelInner[1]; output.tessOuterLo = tessOuterLo; output.tessOuterHi = tessOuterHi; @@ -170,9 +142,9 @@ HullVertex hs_main_patches( position[p] = patch[p].position.xyz; } - int patchParam = GetPatchParam(primitiveID); + int3 patchParam = OsdGetPatchParam(OsdGetPatchIndex(primitiveID)); - reflectBoundaryEdges(position, patchParam); + OsdComputeBSplineBoundaryPoints(position, patchParam); float3 H[4]; for (int l=0; l<4; ++l) { @@ -184,7 +156,7 @@ HullVertex hs_main_patches( HullVertex output; #if defined OSD_PATCH_ENABLE_SINGLE_CREASE - float sharpness = GetSharpness(primitiveID); + float sharpness = OsdGetPatchSharpness(patchParam); if (sharpness > 0) { float Sf = floor(sharpness); float Sc = ceil(sharpness); @@ -222,14 +194,7 @@ HullVertex hs_main_patches( output.position = float4(pos, 1.0); #endif - int patchLevel = GetPatchLevel(primitiveID); - - // +0.5 to avoid interpolation error of integer value - output.patchCoord = float4(0, 0, - patchLevel+0.5, - GetPrimitiveID(primitiveID)+0.5); - - OSD_COMPUTE_PTEX_COORD_HULL_SHADER; + output.patchCoord = OsdGetPatchCoord(patchParam); return output; } @@ -245,7 +210,9 @@ void ds_main_patches( in float2 domainCoord : SV_DomainLocation, out OutputVertex output ) { - float2 UV = GetTransitionParameterization(input, domainCoord); + float2 UV = OsdGetTessParameterization(domainCoord, + input.tessOuterLo, + input.tessOuterHi); #ifdef OSD_COMPUTE_NORMAL_DERIVATIVES float B[4], D[4], C[4]; @@ -337,7 +304,7 @@ void ds_main_patches( dUV += D[k] * DUCP[k]; } - int level = int(patch[0].ptexInfo.z); + int level = patch[0].patchCoord.z; Tangent *= 3 * level; BiTangent *= 3 * level; dUU *= 6 * level; @@ -372,7 +339,7 @@ void ds_main_patches( Tangent += B[k] * DUCP[k]; BiTangent += D[k] * BUCP[k]; } - int level = int(patch[0].ptexInfo.z); + int level = patch[0].patchCoord.z; Tangent *= 3 * level; BiTangent *= 3 * level; @@ -385,11 +352,7 @@ void ds_main_patches( output.position = float4(WorldPos, 1.0f); output.normal = normal; - output.patchCoord = patch[0].patchCoord; - - output.patchCoord.xy = float2(UV.x, UV.y); - - OSD_COMPUTE_PTEX_COORD_DOMAIN_SHADER; + output.patchCoord = OsdInterpolatePatchCoord(UV, patch[0].patchCoord); OSD_DISPLACEMENT_CALLBACK; diff --git a/opensubdiv/osd/hlslPatchCommon.hlsl b/opensubdiv/osd/hlslPatchCommon.hlsl index 7d2b6897..016f5abe 100644 --- a/opensubdiv/osd/hlslPatchCommon.hlsl +++ b/opensubdiv/osd/hlslPatchCommon.hlsl @@ -41,9 +41,10 @@ struct InputVertex { struct HullVertex { float4 position : POSITION; - float4 patchCoord : PATCHCOORD; // u, v, level, faceID - int4 ptexInfo : PTEXINFO; // u offset, v offset, 2^ptexlevel, rotation + int4 patchCoord : PATCHCOORD; // U offset, V offset, faceLevel, faceId +#ifdef OSD_ENABLE_PATCH_CULL int3 clipFlag : CLIPFLAG; +#endif #if defined OSD_PATCH_ENABLE_SINGLE_CREASE float4 P1 : POSITION1; float4 P2 : POSITION2; @@ -55,9 +56,9 @@ struct OutputVertex { float4 positionOut : SV_Position; float4 position : POSITION1; float3 normal : NORMAL; - float3 tangent : TANGENT0; + float3 tangent : TANGENT; float3 bitangent : TANGENT1; - float4 patchCoord : PATCHCOORD; // u, v, level, faceID + float4 patchCoord : PATCHCOORD; // u, v, faceLevel, faceId noperspective float4 edgeDistance : EDGEDISTANCE; #if defined(OSD_COMPUTE_NORMAL_DERIVATIVES) float3 Nu : TANGENT2; @@ -77,7 +78,7 @@ struct GregHullVertex { float3 e1 : POSITION2; uint zerothNeighbor : BLENDINDICE1; float3 org : POSITION3; -#if OSD_MAX_VALENCE > 0 +#if defined OSD_MAX_VALENCE && OSD_MAX_VALENCE > 0 float3 r[OSD_MAX_VALENCE] : POSITION4; #endif }; @@ -88,8 +89,7 @@ struct GregDomainVertex { float3 Em : POSITION2; float3 Fp : POSITION3; float3 Fm : POSITION4; - float4 patchCoord: TEXTURE0; - float4 ptexInfo: TEXTURE1; + int4 patchCoord: PATCHCOORD; }; struct HS_CONSTANT_FUNC_OUT { @@ -107,74 +107,105 @@ float OsdTessLevel(); int OsdGregoryQuadOffsetBase(); int OsdPrimitiveIdBase(); -float GetTessLevel(int patchLevel) -{ -#ifdef OSD_ENABLE_SCREENSPACE_TESSELLATION - return OsdTessLevel(); -#else - return OsdTessLevel() / pow(2, patchLevel-1); -#endif -} - -#ifndef GetPrimitiveID -#define GetPrimitiveID(x) (x + OsdPrimitiveIdBase()) -#endif - -float GetPostProjectionSphereExtent(float3 center, float diameter) -{ - float4 p = mul(OsdProjectionMatrix(), float4(center, 1.0)); - return abs(diameter * OsdProjectionMatrix()[1][1] / p.w); -} - -float TessAdaptive(float3 p0, float3 p1) -{ - // Adaptive factor can be any computation that depends only on arg values. - // Project the diameter of the edge's bounding sphere instead of using the - // length of the projected edge itself to avoid problems near silhouettes. - float3 center = (p0 + p1) / 2.0; - float diameter = distance(p0, p1); - return round(max(1.0, OsdTessLevel() * GetPostProjectionSphereExtent(center, diameter))); -} - #ifndef OSD_DISPLACEMENT_CALLBACK #define OSD_DISPLACEMENT_CALLBACK #endif +// ---------------------------------------------------------------------------- +// Patch Parameters +// ---------------------------------------------------------------------------- + +// +// Each patch has a corresponding patchParam. This is a set of three values +// specifying additional information about the patch: +// +// faceId -- topological face identifier (e.g. Ptex FaceId) +// bitfield -- refinement-level, non-quad, boundary, transition, uv-offset +// sharpness -- crease sharpness for single-crease patches +// +// These are stored in OsdPatchParamBuffer indexed by the value returned +// from OsdGetPatchIndex() which is a function of the current PrimitiveID +// along with an optional client provided offset. +// + #if defined OSD_PATCH_ENABLE_SINGLE_CREASE Buffer OsdPatchParamBuffer : register( t3 ); #else Buffer OsdPatchParamBuffer : register( t3 ); #endif -#define GetPatchParam(primitiveID) \ - (OsdPatchParamBuffer[GetPrimitiveID(primitiveID)].y) +int OsdGetPatchIndex(int primitiveId) +{ + return (primitiveId + OsdPrimitiveIdBase()); +} -#define GetPatchLevel(primitiveID) \ - (OsdPatchParamBuffer[GetPrimitiveID(primitiveID)].y & 0xf) +int3 OsdGetPatchParam(int patchIndex) +{ +#if defined OSD_PATCH_ENABLE_SINGLE_CREASE + return OsdPatchParamBuffer[patchIndex].xyz; +#else + uint2 p = OsdPatchParamBuffer[patchIndex].xy; + return int3(p.x, p.y, 0); +#endif +} -#define GetSharpness(primitiveID) \ - (asfloat(OsdPatchParamBuffer[GetPrimitiveID(primitiveID)].z)) +int OsdGetPatchFaceId(int3 patchParam) +{ + return patchParam.x; +} -#define OSD_COMPUTE_PTEX_COORD_HULL_SHADER \ - { \ - int2 ptexIndex = OsdPatchParamBuffer[GetPrimitiveID(primitiveID)].xy; \ - int faceID = ptexIndex.x; \ - int lv = 1 << ((ptexIndex.y & 0x7) - ((ptexIndex.y >> 3) & 1)); \ - int u = (ptexIndex.y >> 22) & 0x3ff; \ - int v = (ptexIndex.y >> 12) & 0x3ff; \ - output.patchCoord.w = faceID+0.5; \ - output.ptexInfo = int4(u, v, lv, 0); \ - } +int OsdGetPatchFaceLevel(int3 patchParam) +{ + return (1 << ((patchParam.y & 0x7) - ((patchParam.y >> 3) & 1))); +} -#define OSD_COMPUTE_PTEX_COORD_DOMAIN_SHADER \ - { \ - float2 uv = output.patchCoord.xy; \ - int2 p = patch[0].ptexInfo.xy; \ - int lv = patch[0].ptexInfo.z; \ - int rot = patch[0].ptexInfo.w; \ - output.patchCoord.xy = (uv * float2(1.0,1.0)/lv) + float2(p.x, p.y)/lv; \ - } +int OsdGetPatchRefinementLevel(int3 patchParam) +{ + return (patchParam.y & 0x7); +} +int OsdGetPatchBoundaryMask(int3 patchParam) +{ + return ((patchParam.y >> 4) & 0xf); +} + +int OsdGetPatchTransitionMask(int3 patchParam) +{ + return ((patchParam.y >> 8) & 0xf); +} + +int2 OsdGetPatchFaceUV(int3 patchParam) +{ + int u = (patchParam.y >> 22) & 0x3ff; + int v = (patchParam.y >> 12) & 0x3ff; + return int2(u,v); +} + +float OsdGetPatchSharpness(int3 patchParam) +{ + return asfloat(patchParam.z); +} + +int4 OsdGetPatchCoord(int3 patchParam) +{ + int faceId = OsdGetPatchFaceId(patchParam); + int faceLevel = OsdGetPatchFaceLevel(patchParam); + int2 faceUV = OsdGetPatchFaceUV(patchParam); + return int4(faceUV.x, faceUV.y, faceLevel, faceId); +} + +float4 OsdInterpolatePatchCoord(float2 localUV, int4 perPrimPatchCoord) +{ + int faceId = perPrimPatchCoord.w; + int faceLevel = perPrimPatchCoord.z; + float2 faceUV = float2(perPrimPatchCoord.x, perPrimPatchCoord.y); + float2 uv = localUV/faceLevel + faceUV/faceLevel; + return float4(uv.x, uv.y, faceLevel+0.5, faceId+0.5); +} + +// ---------------------------------------------------------------------------- +// patch culling +// ---------------------------------------------------------------------------- #ifdef OSD_ENABLE_PATCH_CULL @@ -203,35 +234,24 @@ float TessAdaptive(float3 p0, float3 p1) return output; \ } -#define OSD_PATCH_CULL_TRIANGLE(N) \ - int3 clipFlag = int3(0,0,0); \ - for(int i = 0; i < N; ++i) { \ - clipFlag |= patch[i].clipFlag; \ - } \ - if (any(clipFlag != int3(3,3,3))) { \ - output.tessLevelInner = 0; \ - output.tessLevelOuter[0] = 0; \ - output.tessLevelOuter[1] = 0; \ - output.tessLevelOuter[2] = 0; \ - return output; \ - } - #else #define OSD_PATCH_CULL_COMPUTE_CLIPFLAGS(P) #define OSD_PATCH_CULL(N) -#define OSD_PATCH_CULL_TRIANGLE(N) #endif -void Univar4x4(in float u, out float B[4], out float D[4]) +// ---------------------------------------------------------------------------- + +void +Univar4x4(in float u, out float B[4], out float D[4]) { float t = u; float s = 1.0f - u; - float A0 = s * s; + float A0 = s * s; float A1 = 2 * s * t; float A2 = t * t; - B[0] = s * A0; + B[0] = s * A0; B[1] = t * A0 + s * A1; B[2] = t * A1 + s * A2; B[3] = t * A2; @@ -275,9 +295,11 @@ Univar4x4(in float u, out float B[4], out float D[4], out float C[4]) // ---------------------------------------------------------------------------- float3 -EvalBezier(float3 cp[16], float2 uv) +OsdEvalBezier(float3 cp[16], float2 uv) { - float3 BUCP[4] = { float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0) }; + float3 BUCP[4] = { + float3(0,0,0), float3(0,0,0), float3(0,0,0), float3(0,0,0) + }; float B[4], D[4]; @@ -300,3 +322,304 @@ EvalBezier(float3 cp[16], float2 uv) } // ---------------------------------------------------------------------------- +// Boundary Interpolation +// ---------------------------------------------------------------------------- + +void +OsdComputeBSplineBoundaryPoints(inout float3 cpt[16], int3 patchParam) +{ + int boundaryMask = OsdGetPatchBoundaryMask(patchParam); + + if ((boundaryMask & 1) != 0) { + cpt[0] = 2*cpt[4] - cpt[8]; + cpt[1] = 2*cpt[5] - cpt[9]; + cpt[2] = 2*cpt[6] - cpt[10]; + cpt[3] = 2*cpt[7] - cpt[11]; + } + if ((boundaryMask & 2) != 0) { + cpt[3] = 2*cpt[2] - cpt[1]; + cpt[7] = 2*cpt[6] - cpt[5]; + cpt[11] = 2*cpt[10] - cpt[9]; + cpt[15] = 2*cpt[14] - cpt[13]; + } + if ((boundaryMask & 4) != 0) { + cpt[12] = 2*cpt[8] - cpt[4]; + cpt[13] = 2*cpt[9] - cpt[5]; + cpt[14] = 2*cpt[10] - cpt[6]; + cpt[15] = 2*cpt[11] - cpt[7]; + } + if ((boundaryMask & 8) != 0) { + cpt[0] = 2*cpt[1] - cpt[2]; + cpt[4] = 2*cpt[5] - cpt[6]; + cpt[8] = 2*cpt[9] - cpt[10]; + cpt[12] = 2*cpt[13] - cpt[14]; + } +} + +// ---------------------------------------------------------------------------- +// Tessellation +// ---------------------------------------------------------------------------- + +// +// Organization of B-spline and Bezier control points. +// +// Each patch is defined by 16 control points (labeled 0-15). +// +// The patch will be evaluated across the domain from (0,0) at +// the lower-left to (1,1) at the upper-right. When computing +// adaptive tessellation metrics, we consider refined vertex-vertex +// and edge-vertex points along the transition edges of the patch +// (labeled vv* and ev* respectively). +// +// The two segments of each transition edge are labeled Lo and Hi, +// with the Lo segment occuring before the Hi segment along the +// transition edge's domain parameterization. These Lo and Hi segment +// tessellation levels determine how domain evaluation coordinates +// are remapped along transition edges. The Hi segment value will +// be zero for a non-transition edge. +// +// (0,1) (1,1) +// +// vv3 ev23 vv2 +// | Lo3 | Hi3 | +// --O-----------O-----+-----O-----------O-- +// | 12 | 13 14 | 15 | +// | | | | +// | | | | +// Hi0 | | | | Hi2 +// | | | | +// O-----------O-----------O-----------O +// | 8 | 9 10 | 11 | +// | | | | +// ev03 --+ | | +-- ev12 +// | | | | +// | 4 | 5 6 | 7 | +// O-----------O-----------O-----------O +// | | | | +// Lo0 | | | | Lo2 +// | | | | +// | | | | +// | 0 | 1 2 | 3 | +// --O-----------O-----+-----O-----------O-- +// | Lo1 | Hi1 | +// vv0 ev01 vv1 +// +// (0,0) (1,0) +// + +float OsdComputePostProjectionSphereExtent(float3 center, float diameter) +{ + float4 p = mul(OsdProjectionMatrix(), float4(center, 1.0)); + return abs(diameter * OsdProjectionMatrix()[1][1] / p.w); +} + +float OsdComputeTessLevel(float3 p0, float3 p1) +{ + // Adaptive factor can be any computation that depends only on arg values. + // Project the diameter of the edge's bounding sphere instead of using the + // length of the projected edge itself to avoid problems near silhouettes. + float3 center = (p0 + p1) / 2.0; + float diameter = distance(p0, p1); + float projLength = OsdComputePostProjectionSphereExtent(center, diameter); + return round(max(1.0, OsdTessLevel() * projLength)); +} + +void +OsdGetTessLevelsUniform(int3 patchParam, + inout float4 tessOuterLo, inout float4 tessOuterHi) +{ + int refinementLevel = OsdGetPatchRefinementLevel(patchParam); + float tessLevel = OsdTessLevel() / pow(2, refinementLevel-1); + + tessOuterLo = float4(tessLevel,tessLevel,tessLevel,tessLevel); + tessOuterHi = float4(0,0,0,0); +} + +void +OsdGetTessLevelsRefinedPoints(float3 cp[16], int3 patchParam, + inout float4 tessOuterLo, inout float4 tessOuterHi) +{ + // Each edge of a transition patch is adjacent to one or two patches + // at the next refined level of subdivision. We compute the corresponding + // vertex-vertex and edge-vertex refined points along the edges of the + // patch using Catmull-Clark subdivision stencil weights. + // For simplicity, we let the optimizer discard unused computation. + + float3 vv0 = (cp[0] + cp[2] + cp[8] + cp[10]) * 0.015625 + + (cp[1] + cp[4] + cp[6] + cp[9]) * 0.09375 + cp[5] * 0.5625; + float3 ev01 = (cp[1] + cp[2] + cp[9] + cp[10]) * 0.0625 + + (cp[5] + cp[6]) * 0.375; + + float3 vv1 = (cp[1] + cp[3] + cp[9] + cp[11]) * 0.015625 + + (cp[2] + cp[5] + cp[7] + cp[10]) * 0.09375 + cp[6] * 0.5625; + float3 ev12 = (cp[5] + cp[7] + cp[9] + cp[11]) * 0.0625 + + (cp[6] + cp[10]) * 0.375; + + float3 vv2 = (cp[5] + cp[7] + cp[13] + cp[15]) * 0.015625 + + (cp[6] + cp[9] + cp[11] + cp[14]) * 0.09375 + cp[10] * 0.5625; + float3 ev23 = (cp[5] + cp[6] + cp[13] + cp[14]) * 0.0625 + + (cp[9] + cp[10]) * 0.375; + + float3 vv3 = (cp[4] + cp[6] + cp[12] + cp[14]) * 0.015625 + + (cp[5] + cp[8] + cp[10] + cp[13]) * 0.09375 + cp[9] * 0.5625; + float3 ev03 = (cp[4] + cp[6] + cp[8] + cp[10]) * 0.0625 + + (cp[5] + cp[9]) * 0.375; + + tessOuterLo = float4(0,0,0,0); + tessOuterHi = float4(0,0,0,0); + + int transitionMask = OsdGetPatchTransitionMask(patchParam); + + if ((transitionMask & 8) != 0) { + tessOuterLo[0] = OsdComputeTessLevel(vv0, ev03); + tessOuterHi[0] = OsdComputeTessLevel(vv3, ev03); + } else { + tessOuterLo[0] = OsdComputeTessLevel(cp[5], cp[9]); + } + if ((transitionMask & 1) != 0) { + tessOuterLo[1] = OsdComputeTessLevel(vv0, ev01); + tessOuterHi[1] = OsdComputeTessLevel(vv1, ev01); + } else { + tessOuterLo[1] = OsdComputeTessLevel(cp[5], cp[6]); + } + if ((transitionMask & 2) != 0) { + tessOuterLo[2] = OsdComputeTessLevel(vv1, ev12); + tessOuterHi[2] = OsdComputeTessLevel(vv2, ev12); + } else { + tessOuterLo[2] = OsdComputeTessLevel(cp[6], cp[10]); + } + if ((transitionMask & 4) != 0) { + tessOuterLo[3] = OsdComputeTessLevel(vv3, ev23); + tessOuterHi[3] = OsdComputeTessLevel(vv2, ev23); + } else { + tessOuterLo[3] = OsdComputeTessLevel(cp[9], cp[10]); + } +} + +void +OsdGetTessLevelsLimitPoints(float3 cpBezier[16], int3 patchParam, + inout float4 tessOuterLo, inout float4 tessOuterHi) +{ + // Each edge of a transition patch is adjacent to one or two patches + // at the next refined level of subdivision. When the patch control + // points have been converted to the Bezier basis, the control points + // at the four corners are on the limit surface (since a Bezier patch + // interpolates its corner control points). We can compute an adaptive + // tessellation level for transition edges on the limit surface by + // evaluating a limit position at the mid point of each transition edge. + + tessOuterLo = float4(0,0,0,0); + tessOuterHi = float4(0,0,0,0); + + int transitionMask = OsdGetPatchTransitionMask(patchParam); + + if ((transitionMask & 8) != 0) { + float3 ev03 = OsdEvalBezier(cpBezier, float2(0.0, 0.5)); + tessOuterLo[0] = OsdComputeTessLevel(cpBezier[0], ev03); + tessOuterHi[0] = OsdComputeTessLevel(cpBezier[12], ev03); + } else { + tessOuterLo[0] = OsdComputeTessLevel(cpBezier[0], cpBezier[12]); + } + if ((transitionMask & 1) != 0) { + float3 ev01 = OsdEvalBezier(cpBezier, float2(0.5, 0.0)); + tessOuterLo[1] = OsdComputeTessLevel(cpBezier[0], ev01); + tessOuterHi[1] = OsdComputeTessLevel(cpBezier[3], ev01); + } else { + tessOuterLo[1] = OsdComputeTessLevel(cpBezier[0], cpBezier[3]); + } + if ((transitionMask & 2) != 0) { + float3 ev12 = OsdEvalBezier(cpBezier, float2(1.0, 0.5)); + tessOuterLo[2] = OsdComputeTessLevel(cpBezier[3], ev12); + tessOuterHi[2] = OsdComputeTessLevel(cpBezier[15], ev12); + } else { + tessOuterLo[2] = OsdComputeTessLevel(cpBezier[3], cpBezier[15]); + } + if ((transitionMask & 4) != 0) { + float3 ev23 = OsdEvalBezier(cpBezier, float2(0.5, 1.0)); + tessOuterLo[3] = OsdComputeTessLevel(cpBezier[12], ev23); + tessOuterHi[3] = OsdComputeTessLevel(cpBezier[15], ev23); + } else { + tessOuterLo[3] = OsdComputeTessLevel(cpBezier[12], cpBezier[15]); + } +} + +void +OsdGetTessLevels(float3 cp[16], int3 patchParam, + inout float4 tessLevelOuter, inout float4 tessLevelInner, + inout float4 tessOuterLo, inout float4 tessOuterHi) +{ +#if defined OSD_ENABLE_SCREENSPACE_TESSELLATION + OsdGetTessLevelsLimitPoints(cp, patchParam, tessOuterLo, tessOuterHi); +#elif defined OSD_ENABLE_SCREENSPACE_TESSELLATION_REFINED + OsdGetTessLevelsRefinedPoints(cp, patchParam, tessOuterLo, tessOuterHi); +#else + OsdGetTessLevelsUniform(patchParam, tessOuterLo, tessOuterHi); +#endif + + // Outer levels are the sum of the Lo and Hi segments where the Hi + // segments will have a length of zero for non-transition edges. + tessLevelOuter = tessOuterLo + tessOuterHi; + + // Inner levels are the average the corresponding outer levels. + tessLevelInner[0] = (tessLevelOuter[1] + tessLevelOuter[3]) * 0.5; + tessLevelInner[1] = (tessLevelOuter[0] + tessLevelOuter[2]) * 0.5; +} + +void +OsdGetTessLevels(float3 cp0, float3 cp1, float3 cp2, float3 cp3, + int3 patchParam, + inout float4 tessLevelOuter, inout float4 tessLevelInner) +{ + float4 tessOuterLo = float4(0,0,0,0); + float4 tessOuterHi = float4(0,0,0,0); + +#if defined OSD_ENABLE_SCREENSPACE_TESSELLATION + tessOuterLo[0] = OsdComputeTessLevel(cp0, cp1); + tessOuterLo[1] = OsdComputeTessLevel(cp0, cp3); + tessOuterLo[2] = OsdComputeTessLevel(cp2, cp3); + tessOuterLo[3] = OsdComputeTessLevel(cp1, cp2); + tessOuterHi = float4(0,0,0,0); +#else + OsdGetTessLevelsUniform(patchParam, tessOuterLo, tessOuterHi); +#endif + + // Outer levels are the sum of the Lo and Hi segments where the Hi + // segments will have a length of zero for non-transition edges. + tessLevelOuter = tessOuterLo + tessOuterHi; + + // Inner levels are the average the corresponding outer levels. + tessLevelInner[0] = (tessLevelOuter[1] + tessLevelOuter[3]) * 0.5; + tessLevelInner[1] = (tessLevelOuter[0] + tessLevelOuter[2]) * 0.5; +} + +float +OsdGetTessTransitionSplit(float t, float n0, float n1) +{ + float ti = round(t * (n0 + n1)); + + if (ti <= n0) { + return 0.5 * (ti / n0); + } else { + return 0.5 * ((ti - n0) / n1) + 0.5; + } +} + +float2 +OsdGetTessParameterization(float2 uv, float4 tessOuterLo, float4 tessOuterHi) +{ + float2 UV = uv; + if (UV.x == 0 && tessOuterHi[0] > 0) { + UV.y = OsdGetTessTransitionSplit(UV.y, tessOuterLo[0], tessOuterHi[0]); + } else + if (UV.y == 0 && tessOuterHi[1] > 0) { + UV.x = OsdGetTessTransitionSplit(UV.x, tessOuterLo[1], tessOuterHi[1]); + } else + if (UV.x == 1 && tessOuterHi[2] > 0) { + UV.y = OsdGetTessTransitionSplit(UV.y, tessOuterLo[2], tessOuterHi[2]); + } else + if (UV.y == 1 && tessOuterHi[3] > 0) { + UV.x = OsdGetTessTransitionSplit(UV.x, tessOuterLo[3], tessOuterHi[3]); + } + return UV; +} + diff --git a/opensubdiv/osd/hlslPatchGregory.hlsl b/opensubdiv/osd/hlslPatchGregory.hlsl index 664ace03..3cc6624f 100644 --- a/opensubdiv/osd/hlslPatchGregory.hlsl +++ b/opensubdiv/osd/hlslPatchGregory.hlsl @@ -267,31 +267,29 @@ HS_CONSTANT_FUNC_OUT HSConstFunc( uint primitiveID : SV_PrimitiveID) { HS_CONSTANT_FUNC_OUT output; - int patchLevel = GetPatchLevel(primitiveID); + + int3 patchParam = OsdGetPatchParam(OsdGetPatchIndex(primitiveID)); OSD_PATCH_CULL(4); -#ifdef OSD_ENABLE_SCREENSPACE_TESSELLATION - output.tessLevelOuter[0] = - TessAdaptive(patch[0].hullPosition.xyz, patch[1].hullPosition.xyz); - output.tessLevelOuter[1] = - TessAdaptive(patch[0].hullPosition.xyz, patch[3].hullPosition.xyz); - output.tessLevelOuter[2] = - TessAdaptive(patch[2].hullPosition.xyz, patch[3].hullPosition.xyz); - output.tessLevelOuter[3] = - TessAdaptive(patch[1].hullPosition.xyz, patch[2].hullPosition.xyz); - output.tessLevelInner[0] = - max(output.tessLevelOuter[1], output.tessLevelOuter[3]); - output.tessLevelInner[1] = - max(output.tessLevelOuter[0], output.tessLevelOuter[2]); -#else - output.tessLevelInner[0] = GetTessLevel(patchLevel); - output.tessLevelInner[1] = GetTessLevel(patchLevel); - output.tessLevelOuter[0] = GetTessLevel(patchLevel); - output.tessLevelOuter[1] = GetTessLevel(patchLevel); - output.tessLevelOuter[2] = GetTessLevel(patchLevel); - output.tessLevelOuter[3] = GetTessLevel(patchLevel); -#endif + float4 tessLevelOuter = float4(0,0,0,0); + float4 tessLevelInner = float4(0,0,0,0); + + OsdGetTessLevels(patch[0].hullPosition.xyz, patch[1].hullPosition.xyz, + patch[2].hullPosition.xyz, patch[3].hullPosition.xyz, + patchParam, tessLevelOuter, tessLevelInner); + + output.tessLevelOuter[0] = tessLevelOuter[0]; + output.tessLevelOuter[1] = tessLevelOuter[1]; + output.tessLevelOuter[2] = tessLevelOuter[2]; + output.tessLevelOuter[3] = tessLevelOuter[3]; + + output.tessLevelInner[0] = tessLevelInner[0]; + output.tessLevelInner[1] = tessLevelInner[1]; + + output.tessOuterLo = float4(0,0,0,0); + output.tessOuterHi = float4(0,0,0,0); + return output; } @@ -446,12 +444,9 @@ GregDomainVertex hs_main_patches( output.Fp = Fp; output.Fm = Fm; - int patchLevel = GetPatchLevel(primitiveID); - output.patchCoord = float4(0, 0, - patchLevel+0.5f, - GetPrimitiveID(primitiveID)+0.5f); + int3 patchParam = OsdGetPatchParam(OsdGetPatchIndex(primitiveID)); - OSD_COMPUTE_PTEX_COORD_HULL_SHADER; + output.patchCoord = OsdGetPatchCoord(patchParam); return output; } @@ -527,8 +522,6 @@ void ds_main_patches( float3 Tangent = float3(0, 0, 0); float3 BiTangent = float3(0, 0, 0); -#line 519 - #ifdef OSD_COMPUTE_NORMAL_DERIVATIVES float B[4], D[4], C[4]; @@ -564,7 +557,7 @@ void ds_main_patches( dUV += D[i] * DUCP[i]; } - int level = int(patch[0].ptexInfo.z); + int level = patch[0].patchCoord.z; BiTangent *= 3 * level; Tangent *= 3 * level; dUU *= 6 * level; @@ -619,7 +612,7 @@ void ds_main_patches( Tangent += B[i] * DUCP[i]; BiTangent += D[i] * BUCP[i]; } - int level = int(patch[0].ptexInfo.z); + int level = patch[0].patchCoord.z; BiTangent *= 3 * level; Tangent *= 3 * level; @@ -635,12 +628,10 @@ void ds_main_patches( output.tangent = BiTangent; output.bitangent = Tangent; - output.patchCoord = patch[0].patchCoord; - output.patchCoord.xy = float2(v, u); - - output.edgeDistance = 0; + output.edgeDistance = 0; - OSD_COMPUTE_PTEX_COORD_DOMAIN_SHADER; + float2 UV = float2(v, u); + output.patchCoord = OsdInterpolatePatchCoord(UV, patch[0].patchCoord); OSD_DISPLACEMENT_CALLBACK; diff --git a/opensubdiv/osd/hlslPatchTransition.hlsl b/opensubdiv/osd/hlslPatchTransition.hlsl deleted file mode 100644 index a5f44e15..00000000 --- a/opensubdiv/osd/hlslPatchTransition.hlsl +++ /dev/null @@ -1,248 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -//---------------------------------------------------------- -// Patches.HullTransition -//---------------------------------------------------------- - -void -GetTessLevelsUniform(float3 cp[16], int patchParam, int patchLevel, - inout float4 tessOuterLo, inout float4 tessOuterHi) -{ - float tessAmount = GetTessLevel(patchLevel); - - tessOuterLo = float4(tessAmount,tessAmount,tessAmount,tessAmount); - tessOuterHi = float4(0,0,0,0); -} - -// -// Organization of B-spline and Bezier control points. -// -// Each patch is defined by 16 control points (labeled 0-15). -// -// The patch will be evaluated across the domain from (0,0) at -// the lower-left to (1,1) at the upper-right. When computing -// adaptive tessellation metrics, we consider refined vertex-vertex -// and edge-vertex points along the transition edges of the patch -// (labeled vv* and ev* respectively). -// -// The two segments of each transition edge are labeled Lo and Hi, -// with the Lo segment occuring before the Hi segment along the -// transition edge's domain parameterization. These Lo and Hi segment -// tessellation levels determine how domain evaluation coordinates -// are remapped along transition edges. The Hi segment value will -// be zero for a non-transition edge. -// -// (0,1) (1,1) -// -// vv3 ev23 vv2 -// | Lo3 | Hi3 | -// --O-----------O-----+-----O-----------O-- -// | 12 | 13 14 | 15 | -// | | | | -// | | | | -// Hi0 | | | | Hi2 -// | | | | -// O-----------O-----------O-----------O -// | 8 | 9 10 | 11 | -// | | | | -// ev03 --+ | | +-- ev12 -// | | | | -// | 4 | 5 6 | 7 | -// O-----------O-----------O-----------O -// | | | | -// Lo0 | | | | Lo2 -// | | | | -// | | | | -// | 0 | 1 2 | 3 | -// --O-----------O-----+-----O-----------O-- -// | Lo1 | Hi1 | -// vv0 ev01 vv1 -// -// (0,0) (1,0) -// - -void -GetTessLevelsRefinedPoints(float3 cp[16], int patchParam, - inout float4 tessOuterLo, inout float4 tessOuterHi) -{ - // Each edge of a transition patch is adjacent to one or two patches - // at the next refined level of subdivision. We compute the corresponding - // vertex-vertex and edge-vertex refined points along the edges of the - // patch using Catmull-Clark subdivision stencil weights. - // For simplicity, we let the optimizer discard unused computation. - - float3 vv0 = (cp[0] + cp[2] + cp[8] + cp[10]) * 0.015625 + - (cp[1] + cp[4] + cp[6] + cp[9]) * 0.09375 + cp[5] * 0.5625; - float3 ev01 = (cp[1] + cp[2] + cp[9] + cp[10]) * 0.0625 + - (cp[5] + cp[6]) * 0.375; - - float3 vv1 = (cp[1] + cp[3] + cp[9] + cp[11]) * 0.015625 + - (cp[2] + cp[5] + cp[7] + cp[10]) * 0.09375 + cp[6] * 0.5625; - float3 ev12 = (cp[5] + cp[7] + cp[9] + cp[11]) * 0.0625 + - (cp[6] + cp[10]) * 0.375; - - float3 vv2 = (cp[5] + cp[7] + cp[13] + cp[15]) * 0.015625 + - (cp[6] + cp[9] + cp[11] + cp[14]) * 0.09375 + cp[10] * 0.5625; - float3 ev23 = (cp[5] + cp[6] + cp[13] + cp[14]) * 0.0625 + - (cp[9] + cp[10]) * 0.375; - - float3 vv3 = (cp[4] + cp[6] + cp[12] + cp[14]) * 0.015625 + - (cp[5] + cp[8] + cp[10] + cp[13]) * 0.09375 + cp[9] * 0.5625; - float3 ev03 = (cp[4] + cp[6] + cp[8] + cp[10]) * 0.0625 + - (cp[5] + cp[9]) * 0.375; - - tessOuterLo = float4(1,1,1,1); - tessOuterHi = float4(0,0,0,0); - - if (((patchParam >> 11) & 1) != 0) { - tessOuterLo[0] = TessAdaptive(vv0, ev03); - tessOuterHi[0] = TessAdaptive(vv3, ev03); - } else { - tessOuterLo[0] = TessAdaptive(cp[5], cp[9]); - } - if (((patchParam >> 8) & 1) != 0) { - tessOuterLo[1] = TessAdaptive(vv0, ev01); - tessOuterHi[1] = TessAdaptive(vv1, ev01); - } else { - tessOuterLo[1] = TessAdaptive(cp[5], cp[6]); - } - if (((patchParam >> 9) & 1) != 0) { - tessOuterLo[2] = TessAdaptive(vv1, ev12); - tessOuterHi[2] = TessAdaptive(vv2, ev12); - } else { - tessOuterLo[2] = TessAdaptive(cp[6], cp[10]); - } - if (((patchParam >> 10) & 1) != 0) { - tessOuterLo[3] = TessAdaptive(vv3, ev23); - tessOuterHi[3] = TessAdaptive(vv2, ev23); - } else { - tessOuterLo[3] = TessAdaptive(cp[9], cp[10]); - } -} - -void -GetTessLevelsLimitPoints(float3 cpBezier[16], int patchParam, - inout float4 tessOuterLo, inout float4 tessOuterHi) -{ - // Each edge of a transition patch is adjacent to one or two patches - // at the next refined level of subdivision. When the patch control - // points have been converted to the Bezier basis, the control points - // at the four corners are on the limit surface (since a Bezier patch - // interpolates its corner control points). We can compute an adaptive - // tessellation level for transition edges on the limit surface by - // evaluating a limit position at the mid point of each transition edge. - - tessOuterLo = float4(1,1,1,1); - tessOuterHi = float4(0,0,0,0); - - if (((patchParam >> 11) & 1) != 0) { - float3 ev03 = EvalBezier(cpBezier, float2(0.0, 0.5)); - tessOuterLo[0] = TessAdaptive(cpBezier[0], ev03); - tessOuterHi[0] = TessAdaptive(cpBezier[12], ev03); - } else { - tessOuterLo[0] = TessAdaptive(cpBezier[0], cpBezier[12]); - } - if (((patchParam >> 8) & 1) != 0) { - float3 ev01 = EvalBezier(cpBezier, float2(0.5, 0.0)); - tessOuterLo[1] = TessAdaptive(cpBezier[0], ev01); - tessOuterHi[1] = TessAdaptive(cpBezier[3], ev01); - } else { - tessOuterLo[1] = TessAdaptive(cpBezier[0], cpBezier[3]); - } - if (((patchParam >> 9) & 1) != 0) { - float3 ev12 = EvalBezier(cpBezier, float2(1.0, 0.5)); - tessOuterLo[2] = TessAdaptive(cpBezier[3], ev12); - tessOuterHi[2] = TessAdaptive(cpBezier[15], ev12); - } else { - tessOuterLo[2] = TessAdaptive(cpBezier[3], cpBezier[15]); - } - if (((patchParam >> 10) & 1) != 0) { - float3 ev23 = EvalBezier(cpBezier, float2(0.5, 1.0)); - tessOuterLo[3] = TessAdaptive(cpBezier[12], ev23); - tessOuterHi[3] = TessAdaptive(cpBezier[15], ev23); - } else { - tessOuterLo[3] = TessAdaptive(cpBezier[12], cpBezier[15]); - } -} - -void -GetTransitionTessLevels( - float3 cp[16], int patchParam, int patchLevel, - inout float4 outerLevel, inout float4 innerLevel, - inout float4 tessOuterLo, inout float4 tessOuterHi) -{ -#if defined OSD_ENABLE_SCREENSPACE_TESSELLATION - GetTessLevelsLimitPoints(cp, patchParam, tessOuterLo, tessOuterHi); -#elif defined OSD_ENABLE_SCREENSPACE_TESSELLATION_REFINED - GetTessLevelsRefinedPoints(cp, patchParam, tessOuterLo, tessOuterHi); -#else - GetTessLevelsUniform(cp, patchParam, patchLevel, tessOuterLo, tessOuterHi); -#endif - - // Outer levels are the sum of the Lo and Hi segments where the Hi - // segments will have a length of zero for non-transition edges. - outerLevel = tessOuterLo + tessOuterHi; - - // Inner levels are the average the corresponding outer levels. - innerLevel[0] = (outerLevel[1] + outerLevel[3]) * 0.5; - innerLevel[1] = (outerLevel[0] + outerLevel[2]) * 0.5; -} - -//---------------------------------------------------------- -// Patches.DomainTransition -//---------------------------------------------------------- - -float -GetTransitionSplit(float t, float n0, float n1) -{ - float ti = round(t * (n0 + n1)); - - if (ti <= n0) { - return 0.5 * (ti / n0); - } else { - return 0.5 * ((ti - n0) / n1) + 0.5; - } -} - -float2 -GetTransitionParameterization( - in HS_CONSTANT_FUNC_OUT input, - in float2 uv) -{ - float2 UV = uv.xy; - if (UV.x == 0 && input.tessOuterHi[0] > 0) { - UV.y = GetTransitionSplit(UV.y, input.tessOuterLo[0], input.tessOuterHi[0]); - } else - if (UV.y == 0 && input.tessOuterHi[1] > 0) { - UV.x = GetTransitionSplit(UV.x, input.tessOuterLo[1], input.tessOuterHi[1]); - } else - if (UV.x == 1 && input.tessOuterHi[2] > 0) { - UV.y = GetTransitionSplit(UV.y, input.tessOuterLo[2], input.tessOuterHi[2]); - } else - if (UV.y == 1 && input.tessOuterHi[3] > 0) { - UV.x = GetTransitionSplit(UV.x, input.tessOuterLo[3], input.tessOuterHi[3]); - } - return UV; -} diff --git a/opensubdiv/osd/mesh.h b/opensubdiv/osd/mesh.h index aa9d8949..6f200859 100644 --- a/opensubdiv/osd/mesh.h +++ b/opensubdiv/osd/mesh.h @@ -37,9 +37,10 @@ #include "../far/stencilTables.h" #include "../far/stencilTablesFactory.h" -#include "../osd/vertex.h" #include "../osd/vertexDescriptor.h" +struct ID3D11DeviceContext; + namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { @@ -48,13 +49,12 @@ namespace Osd { enum MeshBits { MeshAdaptive = 0, MeshInterleaveVarying = 1, - MeshPtexData = 2, - MeshFVarData = 3, - MeshUseSingleCreasePatch = 4, - MeshEndCapBSplineBasis = 5, // exclusive - MeshEndCapGregoryBasis = 6, // exclusive - MeshEndCapLegacyGregory = 7, // exclusive - NUM_MESH_BITS = 8, + MeshFVarData = 2, + MeshUseSingleCreasePatch = 3, + MeshEndCapBSplineBasis = 4, // exclusive + MeshEndCapGregoryBasis = 5, // exclusive + MeshEndCapLegacyGregory = 6, // exclusive + NUM_MESH_BITS = 7, }; typedef std::bitset MeshBitset; @@ -81,13 +81,6 @@ public: virtual void Refine() = 0; - virtual void Refine(VertexBufferDescriptor const *vertexDesc, - VertexBufferDescriptor const *varyingDesc) = 0; - - virtual void Refine(VertexBufferDescriptor const *vertexDesc, - VertexBufferDescriptor const *varyingDesc, - bool interleaved) = 0; - virtual void Synchronize() = 0; virtual DrawContext * GetDrawContext() = 0; @@ -120,25 +113,143 @@ protected: // --------------------------------------------------------------------------- -template +template +STENCIL_TABLES const * +convertToCompatibleStencilTables( + Far::StencilTables const *table, DEVICE_CONTEXT *context) { + if (not table) return NULL; + return STENCIL_TABLES::Create(table, context); +} + +template <> +Far::StencilTables const * +convertToCompatibleStencilTables( + Far::StencilTables const *table, void * /*context*/) { + // no need for conversion + // XXX: We don't want to even copy. + if (not table) return NULL; + return new Far::StencilTables(*table); +} + +template <> +Far::StencilTables const * +convertToCompatibleStencilTables( + Far::StencilTables const *table, ID3D11DeviceContext * /*context*/) { + // no need for conversion + // XXX: We don't want to even copy. + if (not table) return NULL; + return new Far::StencilTables(*table); +} + +// --------------------------------------------------------------------------- + +template +class EvaluatorCacheT { +public: + ~EvaluatorCacheT() { + for(typename Evaluators::iterator it = _evaluators.begin(); + it != _evaluators.end(); ++it) { + delete it->evaluator; + } + } + + // XXX: FIXME, linear search + struct Entry { + Entry(VertexBufferDescriptor const &sd, + VertexBufferDescriptor const &dd, + EVALUATOR *e) : srcDesc(sd), dstDesc(dd), evaluator(e) {} + VertexBufferDescriptor srcDesc, dstDesc; + EVALUATOR *evaluator; + }; + typedef std::vector Evaluators; + + template + EVALUATOR *GetEvaluator(VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + DEVICE_CONTEXT *deviceContext) { + + for(typename Evaluators::iterator it = _evaluators.begin(); + it != _evaluators.end(); ++it) { + if (it->srcDesc.length == srcDesc.length and + it->srcDesc.stride == srcDesc.stride and + it->dstDesc.length == dstDesc.length and + it->dstDesc.stride == dstDesc.stride) { + return it->evaluator; + } + } + EVALUATOR *e = EVALUATOR::Create(srcDesc, dstDesc, deviceContext); + _evaluators.push_back(Entry(srcDesc, dstDesc, e)); + return e; + } + +private: + Evaluators _evaluators; +}; + + +// template helpers to see if the evaluator is instantiatable or not. +template +struct instantiatable +{ + typedef char yes[1]; + typedef char no[2]; + template static yes &chk(typename C::Instantiatable *t=0); + template static no &chk(...); + static bool const value = sizeof(chk(0)) == sizeof(yes); +}; +template +struct enable_if { typedef T type; }; +template +struct enable_if { }; + +// extract a kernel from cache if available +template +static EVALUATOR *GetEvaluator( + EvaluatorCacheT *cache, + VertexBufferDescriptor const &srcDesc, + VertexBufferDescriptor const &dstDesc, + DEVICE_CONTEXT deviceContext, + typename enable_if::value, void>::type*t=0) { + (void)t; + if (cache == NULL) return NULL; + return cache->GetEvaluator(srcDesc, dstDesc, deviceContext); +} + +// fallback +template +static EVALUATOR *GetEvaluator( + EvaluatorCacheT *, + VertexBufferDescriptor const &, + VertexBufferDescriptor const &, + DEVICE_CONTEXT, + typename enable_if::value, void>::type*t=0) { + (void)t; + return NULL; +} + +// --------------------------------------------------------------------------- + +template class Mesh : public MeshInterface { public: typedef VERTEX_BUFFER VertexBuffer; - typedef COMPUTE_CONTROLLER ComputeController; + typedef EVALUATOR Evaluator; + typedef STENCIL_TABLES StencilTables; typedef DRAW_CONTEXT DrawContext; typedef DEVICE_CONTEXT DeviceContext; - typedef typename ComputeController::ComputeContext ComputeContext; + typedef EvaluatorCacheT EvaluatorCache; typedef typename DrawContext::VertexBufferBinding VertexBufferBinding; - Mesh(ComputeController * computeController, - Far::TopologyRefiner * refiner, + Mesh(Far::TopologyRefiner * refiner, int numVertexElements, int numVaryingElements, int level, MeshBitset bits = MeshBitset(), + EvaluatorCache * evaluatorCache = NULL, DeviceContext * deviceContext = NULL) : _refiner(refiner), @@ -146,8 +257,9 @@ public: _numVertices(0), _vertexBuffer(NULL), _varyingBuffer(NULL), - _computeContext(NULL), - _computeController(computeController), + _vertexStencilTables(NULL), + _varyingStencilTables(NULL), + _evaluatorCache(evaluatorCache), _drawContext(NULL), _deviceContext(deviceContext) { @@ -158,18 +270,34 @@ public: bits.test(MeshAdaptive), bits.test(MeshUseSingleCreasePatch)); - int numVertexElementsInterleaved = numVertexElements + + int vertexBufferStride = numVertexElements + (bits.test(MeshInterleaveVarying) ? numVaryingElements : 0); - int numVaryingElementsNonInterleaved = + int varyingBufferStride = (bits.test(MeshInterleaveVarying) ? 0 : numVaryingElements); initializeContext(numVertexElements, numVaryingElements, - numVertexElementsInterleaved, level, bits); + level, bits); initializeVertexBuffers(_numVertices, - numVertexElementsInterleaved, - numVaryingElementsNonInterleaved); + vertexBufferStride, + varyingBufferStride); + + // configure vertex buffer descriptor + _vertexDesc = VertexBufferDescriptor(0, + numVertexElements, + vertexBufferStride); + if (bits.test(MeshInterleaveVarying)) { + _varyingDesc = VertexBufferDescriptor(numVertexElements, + numVaryingElements, + vertexBufferStride); + } else { + _varyingDesc = VertexBufferDescriptor(0, + numVaryingElements, + varyingBufferStride); + } + + // will retire _drawContext->UpdateVertexTexture(_vertexBuffer, _deviceContext); @@ -180,9 +308,10 @@ public: delete _patchTables; delete _vertexBuffer; delete _varyingBuffer; - delete _computeContext; + delete _vertexStencilTables; + delete _varyingStencilTables; delete _drawContext; - // devicecontext and computecontroller are not owned by this class. + // deviceContext and evaluatorCache are not owned by this class. } virtual void UpdateVertexBuffer(float const *vertexData, @@ -198,29 +327,50 @@ public: } virtual void Refine() { - _computeController->Compute(_computeContext, - _vertexBuffer, _varyingBuffer); - } - virtual void Refine(VertexBufferDescriptor const *vertexDesc, - VertexBufferDescriptor const *varyingDesc) { - _computeController->Compute(_computeContext, - _vertexBuffer, _varyingBuffer, - vertexDesc, varyingDesc); - } + int numControlVertices = _refiner->GetNumVertices(0); - virtual void Refine(VertexBufferDescriptor const *vertexDesc, - VertexBufferDescriptor const *varyingDesc, - bool interleaved) { - _computeController->Compute(_computeContext, - _vertexBuffer, - (interleaved ? - _vertexBuffer : _varyingBuffer), - vertexDesc, varyingDesc); + VertexBufferDescriptor srcDesc = _vertexDesc; + VertexBufferDescriptor dstDesc(srcDesc); + dstDesc.offset += numControlVertices * dstDesc.stride; + + // note that the _evaluatorCache can be NULL and thus + // the evaluatorInstance can be NULL + // (for uninstantiatable kernels CPU,TBB etc) + Evaluator const *instance = GetEvaluator( + _evaluatorCache, srcDesc, dstDesc, _deviceContext); + + Evaluator::EvalStencils(_vertexBuffer, srcDesc, + _vertexBuffer, dstDesc, + _vertexStencilTables, + instance, _deviceContext); + + if (_varyingDesc.length > 0) { + VertexBufferDescriptor srcDesc = _varyingDesc; + VertexBufferDescriptor dstDesc(srcDesc); + dstDesc.offset += numControlVertices * dstDesc.stride; + + instance = GetEvaluator( + _evaluatorCache, srcDesc, dstDesc, _deviceContext); + + if (_varyingBuffer) { + // non-interleaved + Evaluator::EvalStencils(_varyingBuffer, srcDesc, + _varyingBuffer, dstDesc, + _varyingStencilTables, + instance, _deviceContext); + } else { + // interleaved + Evaluator::EvalStencils(_vertexBuffer, srcDesc, + _vertexBuffer, dstDesc, + _varyingStencilTables, + instance, _deviceContext); + } + } } virtual void Synchronize() { - _computeController->Synchronize(); + Evaluator::Synchronize(_deviceContext); } virtual DrawContext * GetDrawContext() { @@ -263,7 +413,7 @@ public: private: void initializeContext(int numVertexElements, int numVaryingElements, - int numElements, int level, MeshBitset bits) { + int level, MeshBitset bits) { assert(_refiner); Far::StencilTablesFactory::Options options; @@ -332,16 +482,21 @@ private: } } - _drawContext = DrawContext::Create(_patchTables, numElements, - _deviceContext); - _computeContext = ComputeContext::Create(vertexStencils, - varyingStencils, - _deviceContext); + _drawContext = DrawContext::Create(_patchTables, _deviceContext); // numvertices = coarse verts + refined verts + gregory basis verts _numVertices = vertexStencils->GetNumControlVertices() + vertexStencils->GetNumStencils(); + // convert to device stenciltables if necessary. + _vertexStencilTables = + convertToCompatibleStencilTables( + vertexStencils, _deviceContext); + _varyingStencilTables = + convertToCompatibleStencilTables( + varyingStencils, _deviceContext); + + // FIXME: we do extra copyings for Far::Stencils. delete vertexStencils; delete varyingStencils; } @@ -366,14 +521,17 @@ private: int _numVertices; - VertexBuffer * _vertexBuffer, - * _varyingBuffer; + VertexBuffer * _vertexBuffer; + VertexBuffer * _varyingBuffer; - ComputeContext * _computeContext; - ComputeController * _computeController; + VertexBufferDescriptor _vertexDesc; + VertexBufferDescriptor _varyingDesc; + + StencilTables const * _vertexStencilTables; + StencilTables const * _varyingStencilTables; + EvaluatorCache * _evaluatorCache; DrawContext *_drawContext; - DeviceContext *_deviceContext; }; diff --git a/opensubdiv/osd/ompComputeController.cpp b/opensubdiv/osd/ompComputeController.cpp deleted file mode 100644 index 9e94a994..00000000 --- a/opensubdiv/osd/ompComputeController.cpp +++ /dev/null @@ -1,108 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../far/stencilTables.h" -#include "../osd/ompComputeController.h" -#include "../osd/ompKernel.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -OmpComputeController::OmpComputeController(int numThreads) { - - _numThreads = (numThreads == -1) ? omp_get_max_threads() : numThreads; -} - -void -OmpComputeController::ApplyStencilTableKernel( - ComputeContext const *context) const { - - assert(context); - - Far::StencilTables const * vertexStencils = context->GetVertexStencilTables(); - - if (vertexStencils and _currentBindState.vertexBuffer) { - - int start = 0; - int end = vertexStencils->GetNumStencils(); - - VertexBufferDescriptor const & desc = _currentBindState.vertexDesc; - - float const * srcBuffer = _currentBindState.vertexBuffer + desc.offset; - - float * destBuffer = _currentBindState.vertexBuffer + desc.offset + - vertexStencils->GetNumControlVertices() * desc.stride; - - if (end > start) { - OmpComputeStencils(_currentBindState.vertexDesc, - srcBuffer, destBuffer, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - start, - end); - } - } - - Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); - - if (varyingStencils and _currentBindState.varyingBuffer) { - - int start = 0; - int end = varyingStencils->GetNumStencils(); - - VertexBufferDescriptor const & desc = _currentBindState.varyingDesc; - - float const * srcBuffer = _currentBindState.varyingBuffer + desc.offset; - - float * destBuffer = _currentBindState.varyingBuffer + desc.offset + - varyingStencils->GetNumControlVertices() * desc.stride; - - if (end > start) { - OmpComputeStencils(_currentBindState.varyingDesc, - srcBuffer, destBuffer, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - start, - end); - } - } -} - -void -OmpComputeController::Synchronize() { - // XXX: -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/ompComputeController.h b/opensubdiv/osd/ompComputeController.h deleted file mode 100644 index 29831d4c..00000000 --- a/opensubdiv/osd/ompComputeController.h +++ /dev/null @@ -1,184 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_OMP_COMPUTE_CONTROLLER_H -#define OSD_OMP_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/cpuComputeContext.h" -#include "../osd/vertexDescriptor.h" - -#ifdef OPENSUBDIV_HAS_OPENMP - #include -#endif - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching OpenMP subdivision kernels. -/// -/// OmpComputeController is a compute controller class to launch OpenMP -/// threaded subdivision kernels. It requires CpuVertexBufferInterface -/// as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class OmpComputeController { -public: - typedef CpuComputeContext ComputeContext; - - /// Constructor. - /// - /// @param numThreads specifies how many openmp parallel threads to use. - /// -1 attempts to use all available processors. - /// - explicit OmpComputeController(int numThreads=-1); - - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( CpuComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - omp_set_num_threads(_numThreads); - - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - - ApplyStencilTableKernel(context); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(CpuComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context) const; - - template - void bind( VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc, - VertexBufferDescriptor const * varyingDesc ) { - - // if the vertex buffer descriptor is specified, use it. - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (vertexDesc) { - _currentBindState.vertexDesc = *vertexDesc; - } else { - int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0; - _currentBindState.vertexDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - if (varyingDesc) { - _currentBindState.varyingDesc = *varyingDesc; - } else { - int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0; - _currentBindState.varyingDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.vertexBuffer = vertexBuffer ? - vertexBuffer->BindCpuBuffer() : 0; - - _currentBindState.varyingBuffer = varyingBuffer ? - varyingBuffer->BindCpuBuffer() : 0; - } - - - void unbind() { - _currentBindState.Reset(); - } - -private: - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : vertexBuffer(0), varyingBuffer(0) { } - - void Reset() { - vertexBuffer = varyingBuffer = 0; - vertexDesc.Reset(); - varyingDesc.Reset(); - } - - float * vertexBuffer, - * varyingBuffer; - - VertexBufferDescriptor vertexDesc, - varyingDesc; - }; - - BindState _currentBindState; - int _numThreads; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_OMP_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/ompEvalStencilsController.cpp b/opensubdiv/osd/ompEvalStencilsController.cpp deleted file mode 100644 index c2f12a2b..00000000 --- a/opensubdiv/osd/ompEvalStencilsController.cpp +++ /dev/null @@ -1,155 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/ompEvalStencilsController.h" - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -OmpEvalStencilsController::OmpEvalStencilsController(int numThreads) { - - _numThreads = (numThreads == -1) ? omp_get_num_procs() : numThreads; -} - -OmpEvalStencilsController::~OmpEvalStencilsController() { -} - -int -OmpEvalStencilsController::_UpdateValues( CpuEvalStencilsContext * context ) { - - int result=0; - - Far::LimitStencilTables const * stencils = context->GetStencilTables(); - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return result; - - VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc, - outDesc = _currentBindState.outputDataDesc; - - // make sure that we have control data to work with - if (not ctrlDesc.CanEval(outDesc)) - return 0; - - float const * ctrl = _currentBindState.controlData + ctrlDesc.offset; - - if (not ctrl) - return result; - -#pragma omp parallel for - for (int i=0; iGetSizes()[i]; - Far::Index offset = stencils->GetOffsets()[i]; - - Far::Index const * index = &stencils->GetControlIndices().at(offset); - - float const * weight = &stencils->GetWeights().at(offset); - - float * out = _currentBindState.outputData + i * outDesc.stride + outDesc.offset; - - memset(out, 0, outDesc.length*sizeof(float)); - - for (int j=0; jGetStencilTables(); - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return result; - - VertexBufferDescriptor ctrlDesc = _currentBindState.controlDataDesc, - duDesc = _currentBindState.outputDuDesc, - dvDesc = _currentBindState.outputDvDesc; - - // make sure that we have control data to work with - if (not (ctrlDesc.CanEval(duDesc) and ctrlDesc.CanEval(dvDesc))) - return 0; - - float const * ctrl = _currentBindState.controlData + ctrlDesc.offset; - - if (not ctrl) - return result; - -#pragma omp parallel for - for (int i=0; iGetSizes()[i]; - Far::Index offset = stencils->GetOffsets()[i]; - - Far::Index const * index = &stencils->GetControlIndices().at(offset); - - float const * duweight = &stencils->GetDuWeights().at(offset), - * dvweight = &stencils->GetDvWeights().at(offset); - - float * du = _currentBindState.outputUDeriv + i * duDesc.stride + duDesc.offset, - * dv = _currentBindState.outputVDeriv + i * dvDesc.stride + dvDesc.offset; - - memset(du, 0, duDesc.length*sizeof(float)); - memset(dv, 0, dvDesc.length*sizeof(float)); - - for (int j=0; j -#endif - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// -/// \brief CPU stencils evaluation controller -/// -/// CpuStencilsController is a compute controller class to launch -/// single threaded CPU stencil evalution kernels. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class OmpEvalStencilsController { -public: - - /// \brief Constructor. - /// - /// @param numThreads specifies how many openmp parallel threads to use. - /// -1 attempts to use all available processors. - /// - OmpEvalStencilsController(int numThreads=-1); - - /// \brief Destructor. - ~OmpEvalStencilsController(); - - - /// \brief Applies stencil weights to the control vertex data - /// - /// Applies the stencil weights to the control vertex data to evaluate the - /// interpolated limit positions at the parametric locations of the stencils - /// - /// @param context the CpuEvalStencilsContext with the stencil weights - /// - /// @param controlDataDesc vertex buffer descriptor for the control vertex data - /// - /// @param controlVertices vertex buffer with the control vertices data - /// - /// @param outputDataDesc vertex buffer descriptor for the output vertex data - /// - /// @param outputData output vertex buffer for the interpolated data - /// - template - int UpdateValues( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDataDesc, OUTPUT_BUFFER *outputData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - omp_set_num_threads(_numThreads); - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputData( outputDataDesc, outputData ); - - int n = _UpdateValues( context ); - - unbind(); - - return n; - } - - /// \brief Applies derivative stencil weights to the control vertex data - /// - /// Computes the U and V derivative stencils to the control vertex data at - /// the parametric locations contained in each stencil - /// - /// @param context the CpuEvalStencilsContext with the stencil weights - /// - /// @param controlDataDesc vertex buffer descriptor for the control vertex data - /// - /// @param controlVertices vertex buffer with the control vertices data - /// - /// @param outputDuDesc vertex buffer descriptor for the U derivative output data - /// - /// @param outputDuData output vertex buffer for the U derivative data - /// - /// @param outputDvDesc vertex buffer descriptor for the V deriv output data - /// - /// @param outputDvData output vertex buffer for the V derivative data - /// - template - int UpdateDerivs( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDuDesc, OUTPUT_BUFFER *outputDuData, - VertexBufferDescriptor const & outputDvDesc, OUTPUT_BUFFER *outputDvData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputDerivData( outputDuDesc, outputDuData, outputDvDesc, outputDvData ); - - int n = _UpdateDerivs( context ); - - unbind(); - - return n; - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - /// \brief Binds control vertex data buffer - template - void bindControlData(VertexBufferDescriptor const & controlDataDesc, VERTEX_BUFFER *controlData ) { - - _currentBindState.controlData = controlData ? controlData->BindCpuBuffer() : 0; - _currentBindState.controlDataDesc = controlDataDesc; - - } - - /// \brief Binds output vertex data buffer - template - void bindOutputData( VertexBufferDescriptor const & outputDataDesc, VERTEX_BUFFER *outputData ) { - - _currentBindState.outputData = outputData ? outputData->BindCpuBuffer() : 0; - _currentBindState.outputDataDesc = outputDataDesc; - } - - /// \brief Binds output derivative vertex data buffer - template - void bindOutputDerivData( VertexBufferDescriptor const & outputDuDesc, VERTEX_BUFFER *outputDu, - VertexBufferDescriptor const & outputDvDesc, VERTEX_BUFFER *outputDv ) { - - _currentBindState.outputUDeriv = outputDu ? outputDu ->BindCpuBuffer() : 0; - _currentBindState.outputVDeriv = outputDv ? outputDv->BindCpuBuffer() : 0; - _currentBindState.outputDuDesc = outputDuDesc; - _currentBindState.outputDvDesc = outputDvDesc; - } - - /// \brief Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - } - -private: - - int _UpdateValues( CpuEvalStencilsContext * context ); - int _UpdateDerivs( CpuEvalStencilsContext * context ); - - int _numThreads; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of vertex buffers. - struct BindState { - - BindState() : controlData(0), outputData(0), outputUDeriv(0), outputVDeriv(0) { } - - void Reset() { - controlData = outputData = outputUDeriv = outputVDeriv = NULL; - controlDataDesc.Reset(); - outputDataDesc.Reset(); - outputDuDesc.Reset(); - outputDvDesc.Reset(); - } - - // transient mesh data - VertexBufferDescriptor controlDataDesc, - outputDataDesc, - outputDuDesc, - outputDvDesc; - - float * controlData, - * outputData, - * outputUDeriv, - * outputVDeriv; - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // FAR_OMP_EVALSTENCILS_CONTROLLER_H diff --git a/opensubdiv/osd/vertex.h b/opensubdiv/osd/ompEvaluator.cpp similarity index 54% rename from opensubdiv/osd/vertex.h rename to opensubdiv/osd/ompEvaluator.cpp index a74e208b..36db0c1b 100644 --- a/opensubdiv/osd/vertex.h +++ b/opensubdiv/osd/ompEvaluator.cpp @@ -1,5 +1,5 @@ // -// Copyright 2013 Pixar +// Copyright 2015 Pixar // // Licensed under the Apache License, Version 2.0 (the "Apache License") // with the following modification; you may not use this file except in @@ -22,43 +22,48 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_VERTEX_H -#define OSD_VERTEX_H - -#include "../version.h" +#include "../osd/ompEvaluator.h" +#include "../osd/ompKernel.h" +#include namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Osd { -class FarVertexEdit; +/* static */ +bool +OmpEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end) { + if (end <= start) return true; -//! -/*! - */ -class Vertex { -public: - Vertex() {} + // we can probably expand cpuKernel.cpp to here. + OmpEvalStencils(src, srcDesc, dst, dstDesc, + sizes, offsets, indices, weights, start, end); - Vertex(int /* index */) {} + return true; +} - Vertex(Vertex const & /* src */) {} +/* static */ +void +OmpEvaluator::Synchronize(void * /*deviceContext*/) { + // we use "omp parallel for" and it synchronizes by itself +} - void AddWithWeight(Vertex const & /* i */, float /* weight */, void * = 0) {} - - void AddVaryingWithWeight(const Vertex & /* i */, float /* weight */, void * = 0) {} - - void Clear(void * = 0) {} - - void ApplyVertexEdit(FarVertexEdit const &) { } -}; +/* static */ +void +OmpEvaluator::SetNumThreads(int numThreads) { + omp_set_num_threads(numThreads); +} } // end namespace Osd } // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - } // end namespace OpenSubdiv - -#endif // OSD_VERTEX_H diff --git a/opensubdiv/osd/ompEvaluator.h b/opensubdiv/osd/ompEvaluator.h new file mode 100644 index 00000000..06c492d8 --- /dev/null +++ b/opensubdiv/osd/ompEvaluator.h @@ -0,0 +1,114 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_OMP_EVALUATOR_H +#define OPENSUBDIV_OSD_OMP_EVALUATOR_H + +#include "../version.h" + +#include + +#include "../osd/vertexDescriptor.h" + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +class OmpEvaluator { +public: + /// \brief Generic static compute function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. + /// + /// @param instance not used in the omp kernel + /// (declared as a typed pointer to prevent + /// undesirable template resolution) + /// + /// @param deviceContext not used in the omp kernel + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + OmpEvaluator const * instance = NULL, + void * deviceContext = NULL) { + (void)instance; // unused; + (void)deviceContext; // unused; + + return EvalStencils(srcVertexBuffer->BindCpuBuffer(), + srcDesc, + dstVertexBuffer->BindCpuBuffer(), + dstDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + /// stencil compute function. + static bool EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, + int end); + + static void Synchronize(void *deviceContext = NULL); + + static void SetNumThreads(int numThreads); +}; + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_OMP_EVALUATOR_H diff --git a/opensubdiv/osd/ompKernel.cpp b/opensubdiv/osd/ompKernel.cpp index 6491d39a..606578c6 100644 --- a/opensubdiv/osd/ompKernel.cpp +++ b/opensubdiv/osd/ompKernel.cpp @@ -73,26 +73,32 @@ copy(float *dst, int dstIndex, const float *src, // XXXX manuelk this should be optimized further by using SIMD - considering // OMP is somewhat obsolete - this is probably not worth it. void -OmpComputeStencils(VertexBufferDescriptor const &vertexDesc, - float const * vertexSrc, - float * vertexDst, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end) { +OmpEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end) { + if (start > 0) { + sizes += start; + indices += offsets[start]; + weights += offsets[start]; + } + src += srcDesc.offset; + dst += dstDesc.offset; - assert(start>=0 and start0 ? start : 0); // Stencil index + int index = i + (start > 0 ? start : 0); // Stencil index // Get thread-local pointers int const * threadIndices = indices + offsets[index]; @@ -100,16 +106,16 @@ OmpComputeStencils(VertexBufferDescriptor const &vertexDesc, int threadId = omp_get_thread_num(); - float * threadResult = result + threadId*vertexDesc.length; + float * threadResult = result + threadId*srcDesc.length; - clear(threadResult, vertexDesc); + clear(threadResult, dstDesc); for (int j=0; j<(int)sizes[index]; ++j) { - addWithWeight(threadResult, vertexSrc, - threadIndices[j], threadWeights[j], vertexDesc); + addWithWeight(threadResult, src, + threadIndices[j], threadWeights[j], srcDesc); } - copy(vertexDst, i, threadResult, vertexDesc); + copy(dst, i, threadResult, dstDesc); } } diff --git a/opensubdiv/osd/ompKernel.h b/opensubdiv/osd/ompKernel.h index 163a03bc..efaf81d0 100644 --- a/opensubdiv/osd/ompKernel.h +++ b/opensubdiv/osd/ompKernel.h @@ -22,29 +22,28 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_OMP_KERNEL_H -#define OSD_OMP_KERNEL_H +#ifndef OPENSUBDIV_OSD_OMP_KERNEL_H +#define OPENSUBDIV_OSD_OMP_KERNEL_H #include "../version.h" -#include "../osd/vertexDescriptor.h" - namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Osd { -struct VertexDescriptor; +struct VertexBufferDescriptor; void -OmpComputeStencils(VertexBufferDescriptor const &vertexDesc, - float const * vertexSrc, - float * vertexDst, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end); +OmpEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end); } // end namespace Osd @@ -53,4 +52,4 @@ using namespace OPENSUBDIV_VERSION; } // end namespace OpenSubdiv -#endif // OSD_OMP_KERNEL_H +#endif // OPENSUBDIV_OSD_OMP_KERNEL_H diff --git a/opensubdiv/osd/ompSmoothNormalController.cpp b/opensubdiv/osd/ompSmoothNormalController.cpp deleted file mode 100644 index 8de17494..00000000 --- a/opensubdiv/osd/ompSmoothNormalController.cpp +++ /dev/null @@ -1,185 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/ompSmoothNormalController.h" - -#ifdef OPENSUBDIV_HAS_OPENMP - #include -#endif - -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -inline void -cross(float *n, const float *p0, const float *p1, const float *p2) { - - float a[3] = { p1[0]-p0[0], p1[1]-p0[1], p1[2]-p0[2] }; - float b[3] = { p2[0]-p0[0], p2[1]-p0[1], p2[2]-p0[2] }; - n[0] = a[1]*b[2]-a[2]*b[1]; - n[1] = a[2]*b[0]-a[0]*b[2]; - n[2] = a[0]*b[1]-a[1]*b[0]; - - float rn = 1.0f/sqrtf(n[0]*n[0] + n[1]*n[1] + n[2]*n[2]); - n[0] *= rn; - n[1] *= rn; - n[2] *= rn; -} - -void OmpSmoothNormalController::_smootheNormals( - CpuSmoothNormalContext * context) { - - VertexBufferDescriptor const & iDesc = context->GetInputVertexDescriptor(), - & oDesc = context->GetOutputVertexDescriptor(); - - assert(iDesc.length==3 and oDesc.length==3); - - float * oBuffer = context->GetCurrentOutputVertexBuffer() + oDesc.offset; - if (context->GetResetMemory()) { - -#pragma omp parallel for - for (int j=0; jGetNumVertices(); ++j) { - float * ptr = oBuffer + j * oDesc.stride; - memset(ptr, 0, oDesc.length*sizeof(float)); - } - } - - { // note: quads only ! - float const * iBuffer = context->GetCurrentInputVertexBuffer() + iDesc.offset; - - Far::Index const * fverts = context->GetFaceVertices(); - - int nfaces = context->GetNumFaces(); - -#pragma omp parallel for - for (int i=0; iGetInputVertexDescriptor(), - & oDesc = context->GetOutputVertexDescriptor(); - - assert(iDesc.length==3 and oDesc.length==3); - - float const * iBuffer = context->GetCurrentInputVertexBuffer() + iDesc.offset; - float * oBuffer = context->GetCurrentOutputVertexBuffer() + oDesc.offset; - - Far::PatchTables::PTable const & verts = context->GetControlVertices(); - - Far::PatchTables::PatchArrayVector const & parrays = context->GetPatchArrayVector(); - - if (verts.empty() or parrays.empty() or (not iBuffer) or (not oBuffer)) { - return; - } - - for (int i=0; i<(int)parrays.size(); ++i) { - - Far::PatchTables::PatchArray const & pa = parrays[i]; - - Far::PatchTables::Type type = pa.GetDescriptor().GetType(); - - - if (type==Far::PatchTables::QUADS or type==Far::PatchTables::TRIANGLES) { - - int nv = Far::PatchTables::Descriptor::GetNumControlVertices(type); - - // if necessary, reset all normal values to 0 - if (context->GetResetMemory()) { -#pragma omp parallel for - for (int j=0; jGetNumVertices(); ++j) { - float * ptr = oBuffer + j * oDesc.stride; - memset(ptr, 0, oDesc.length*sizeof(float)); - } - } - - -#pragma omp parallel for - for (int j=0; j<(int)pa.GetNumPatches(); ++j) { - - int idx = pa.GetVertIndex() + j*nv; - - float const * p0 = iBuffer + verts[idx+0]*iDesc.stride, - * p1 = iBuffer + verts[idx+1]*iDesc.stride, - * p2 = iBuffer + verts[idx+2]*iDesc.stride; - - // compute face normal - float n[3]; - cross( n, p0, p1, p2 ); - - // add normal to all vertices of the face - for (int k=0; k - void SmootheNormals( CpuSmoothNormalContext * context, - VERTEX_BUFFER * iBuffer, int iOfs, - VERTEX_BUFFER * oBuffer, int oOfs ) { - - if (not context) return; - - context->Bind(iBuffer, iOfs, oBuffer, oOfs); - - _smootheNormals(context); - - context->Unbind(); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -private: - - void _smootheNormals(CpuSmoothNormalContext * context); -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_OMP_SMOOTHNORMAL_CONTROLLER_H diff --git a/opensubdiv/osd/ptexTextureLoader.cpp b/opensubdiv/osd/ptexTextureLoader.cpp deleted file mode 100644 index a36189c9..00000000 --- a/opensubdiv/osd/ptexTextureLoader.cpp +++ /dev/null @@ -1,940 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/ptexTextureLoader.h" -#include "../far/error.h" - -#include -#include -#include -#include -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// block : atomic texture unit, points to the texels contained in a face -// -// |-----------------------| |-----------------------| -// | (u,v) | | (u,v) | -// | | | | -// | | | | -// | Block 0 | | Block 1 | -// | | | | -// | vres | + | vres | ... -// | | | | -// | | | | -// | | | | -// | | | | -// | ures | | ures | -// |-----------------------| |-----------------------| -// -struct PtexTextureLoader::block { - - int idx; // PTex face index - - unsigned short u, v; // location in memory pages - - Ptex::Res current, // current resolution of the block - native; // native resolution of the block - - // comparison operator : true when the current texel area of "b" is greater than "a" - static bool currentAreaSort(block const * a, block const * b) { - int darea = a->current.ulog2 * a->current.vlog2 - - b->current.ulog2 * b->current.vlog2; - if (darea==0) - return a->current.ulog2 < b->current.ulog2; - else - return darea < 0; - } - - // returns a "distance" metric from the native texel resolution - int8_t distanceFromNative( ) const { - int8_t udist = (int8_t)(native.ulog2-current.ulog2), - vdist = (int8_t)(native.vlog2-current.vlog2); - - return (int8_t)(udist * udist + vdist * vdist); - } - - // desirability predicates for resolution scaling optimizations - static bool downsizePredicate( block const * b0, block const * b1 ) { - int8_t d0 = b0->distanceFromNative(), - d1 = b1->distanceFromNative(); - - if (d0==d1) - return (b0->current.ulog2 * b0->current.vlog2) < - (b1->current.ulog2 * b1->current.vlog2); - else - return d0 < d1; - } - - static bool upsizePredicate( block const * b0, block const * b1 ) { - int8_t d0 = b0->distanceFromNative(), - d1 = b1->distanceFromNative(); - - if (d0==d1) - return (b0->current.ulog2 * b0->current.vlog2) < - (b1->current.ulog2 * b1->current.vlog2); - else - return d0 > d1; - } - - friend std::ostream & operator <<(std::ostream &s, block const & b); -}; - -// page : a handle on a single page of the GL texture array that contains the -// packed PTex texels. Pages populate "empty" slots with "blocks" of -// texels. -// Note : pages are square, because i said so... -// -// |--------------------------| |------------|-------------| -// | | |............|.............| -// | | |............|.............| -// | | |............|.............| -// | | |.... B 0 ...|.... B 1 ..../ -// | | |............|.............| -// | | |............|.............| -// | | |............|.............| -// | Empty Page | |------------|-------------| -// | | packed => |..........................| -// | | |..........................| -// | | |..........................| -// | | |.......... B 2 ...........| -// | | |..........................| -// | | |..........................| -// | | |..........................| -// |--------------------------| |--------------------------| -// -struct PtexTextureLoader::page { - - //---------------------------------------------------------------- - // slot : rectangular block of available texels in a page - struct slot { - unsigned short u, v, ures, vres; - - slot( unsigned short size ) : u(0), v(0), ures(size), vres(size) { } - - slot( unsigned short iu, unsigned short iv, unsigned short iures, unsigned short ivres ) : - u(iu), v(iv), ures(iures), vres(ivres) { } - - // true if a block can fit in this slot - bool fits( block const * b, int gutterWidth ) { - return ( (b->current.u()+2*gutterWidth)<=ures ) && - ((b->current.v()+2*gutterWidth)<=vres); - } - }; - - //---------------------------------------------------------------- - typedef std::list blist; - blist blocks; - - typedef std::list slist; - slist slots; - - // construct a page with a single empty slot the size of the page - page( unsigned short pagesize ) { - slots.push_back( slot( pagesize) ); - } - - // true if there is no empty texels in the page (ie. no slots left) - bool isFull( ) const { - return slots.size()==0; - } - - // true when the block "b" is successfully added to this page : - // - // |--------------------------| |------------|-------------| - // | | |............| | - // | | |............| | - // | | |.... B .....| Right Slot | - // | | |............| | - // | | |............| | - // | | |------------|-------------| - // | Original Slot | ==> | | - // | | | | - // | | | Bottom Slot | - // | | | | - // | | | | - // |--------------------------| |--------------------------| - // - bool addBlock( block * b, int gutterWidth ) { - for (slist::iterator i=slots.begin(); i!=slots.end(); ++i) { - - if (i->fits( b, gutterWidth )) { - - blocks.push_back( b ); - - int w = gutterWidth, - w2 = 2*w; - - b->u=(unsigned short)(i->u + w); - b->v=(unsigned short)(i->v + w); - - // add new slot to the right - if (i->ures > (b->current.u()+w2)) { - slots.push_front( slot( i->u+b->current.u()+w2, - i->v, - i->ures-b->current.u()-w2, - b->current.v()+w2)); - } - - // add new slot to the bottom - if (i->vres > (b->current.v()+w2)) { - slots.push_back( slot( i->u, - i->v+b->current.v()+w2, - i->ures, - i->vres-b->current.v()-w2 )); - } - - slots.erase( i ); - return true; - } - } - return false; - } - - friend std::ostream & operator <<(std::ostream &s, const page & p); -}; - -PtexTextureLoader::PtexTextureLoader( PtexTexture * p, - int gutterWidth, int pageMargin) : - _ptex(p), _indexBuffer( NULL ), _layoutBuffer( NULL ), _texelBuffer(NULL), - _gutterWidth(gutterWidth), _pageMargin(pageMargin) -{ - _bpp = p->numChannels() * Ptex::DataSize( p->dataType() ); - - _txn = 0; - - int nf = p->numFaces(); - _blocks.clear(); - _blocks.resize( nf ); - - for (int i=0; igetFaceInfo(i); - _blocks[i].idx=i; - _blocks[i].current=_blocks[i].native=f.res; - _txn += f.res.u() * f.res.v(); - } - - _txc = _txn; -} - -PtexTextureLoader::~PtexTextureLoader() -{ - ClearPages(); -} - -unsigned long int -PtexTextureLoader::GetNumBlocks( ) const { - return (unsigned long int)_blocks.size(); -} - -unsigned long int -PtexTextureLoader::GetNumPages( ) const { - return (unsigned long int)_pages.size(); -} - -// attempt to re-size per-face resolutions to hit the uncompressed texel -// memory use requirement -void -PtexTextureLoader::OptimizeResolution( unsigned long int memrec ) -{ - unsigned long int txrec = memrec / _bpp; - - if (txrec==_txc) - return; - else { - unsigned long int txcur = _txc; - - if (_blocks.size()==0) - return; - - std::vector blocks( _blocks.size() ); - for (unsigned long int i=0; i0) && (txcur>txrec) ) { - - unsigned long int txsaved = txcur; - - // start stealing from largest to smallest down - for (int i=(int)blocks.size()-1; i>=0; --i) { - - block * b = blocks[i]; - - // we have already hit rock bottom resolution... skip this block - if (b->current.ulog2==0 || b->current.vlog2==0) - continue; - - unsigned short ures = (unsigned short)(1<<(b->current.ulog2-1)), - vres = (unsigned short)(1<<(b->current.vlog2-1)); - - int diff = b->current.size() - ures * vres; - - // we are about to overshoot the limit with our big blocks : - // skip until we find something smaller - if ( ((unsigned long int)diff>txcur) || ((txcur-diff)current.ulog2--; - b->current.vlog2--; - txcur-=diff; - } - - // couldn't scavenge anymore even from smallest faces : time to bail out. - if (txsaved==txcur) - break; - } - _txc = txcur; - } else { - - // increasing footprint -------------------------------------- - - // blocks that have already been resized heavily will be considered first - std::sort(blocks.begin(), blocks.end(), block::upsizePredicate ); - - while ( (txcur < _txn) && (txcur < txrec) ) { - - unsigned long int txsaved = txcur; - - // start adding back to the largest faces first - for (int i=0; i<(int)blocks.size(); ++i) { - - block * b = blocks[i]; - - // already at native resolution... nothing to be done - if (b->current == b->native) - continue; - - unsigned short ures = (unsigned short)(1<<(b->current.ulog2+1)), - vres = (unsigned short)(1<<(b->current.vlog2+1)); - - int diff = ures * vres - b->current.size(); - - // we are about to overshoot the limit with our big blocks : - // skip until we find something smaller - if ( (txcur + diff) > txrec ) - continue; - - b->current.ulog2++; - b->current.vlog2++; - txcur+=diff; - } - - // couldn't scavenge anymore even from smallest faces : time to bail out. - if (txsaved==txcur) - break; - } - _txc = txcur; - } - } -} - -// greedy packing of blocks into pages -void -PtexTextureLoader::OptimizePacking( int maxnumpages ) -{ - if (_blocks.size()==0) - return; - - // generate a vector of pointers to the blocks ------------------- - std::vector blocks( _blocks.size() ); - for (unsigned long int i=0; icurrent.u()); - _pagesize = std::max(_pagesize, (unsigned short)blocks[i]->current.v()); - } - - // note: at least 2*GUTTER_WIDTH of margin required for each page to fit - _pagesize += (unsigned short)GetPageMargin(); - - // grow the pagesize to make sure the optimization will not exceed the maximum - // number of pages allowed - for (int npages=_txc/(_pagesize*_pagesize); npages>maxnumpages; _pagesize<<=1) - npages = _txc/(_pagesize*_pagesize ); - - ClearPages( ); - - // save some memory allocation time : guess the number of pages from the - // number of texels - _pages.reserve( _txc / (_pagesize*_pagesize) + 1 ); - - // pack blocks into slots ---------------------------------------- - for (unsigned long int i=0, firstslot=0; i<_blocks.size(); ++i ) { - - block * b = blocks[i]; - - // traverse existing pages for a suitable slot --------------- - bool added=false; - for( unsigned long int p=firstslot; p<_pages.size(); ++p ) - if( (added=_pages[p]->addBlock( b, GetGutterWidth() )) == true ) { - break; - } - - // if none was found : start new page - if( !added ) { - page * p = new page( _pagesize ); - p->addBlock(b, GetGutterWidth()); - _pages.push_back( p ); - } - - // adjust the page flag to the first page with open slots - if( (_pages.size()>(firstslot+1)) && - (_pages[firstslot+1]->isFull()) ) - ++firstslot; - } -} - -// resample border texels for guttering -// -static int -resampleBorder(PtexTexture * ptex, int face, int edgeId, unsigned char *result, - int dstLength, int bpp, float srcStart=0.0f, float srcEnd=1.0f) -{ - const Ptex::FaceInfo & pf = ptex->getFaceInfo(face); - PtexFaceData * data = ptex->getData(face); - - int edgeLength = (edgeId==0||edgeId==2) ? pf.res.u() : pf.res.v(); - int srcOffset = (int)(srcStart*edgeLength); - int srcLength = (int)((srcEnd-srcStart)*edgeLength); - - // if dstLength < 0, returns as original resolution without scaling - if (dstLength < 0) dstLength = srcLength; - - unsigned char *border = new unsigned char[bpp*srcLength]; - - // order of the result will be flipped to match adjacent pixel order - for(int i=0;igetPixel(u, v, &border[i*bpp]); - } - - // nearest resample to fit dstLength - for(int i=0;igetFaceInfo(face); - - // copy adjacent borders - int adjface = fi.adjface(edge); - if(adjface != -1) { - int ae = fi.adjedge(edge); - if (!fi.isSubface() && ptex->getFaceInfo(adjface).isSubface()) { - /* nonsubface -> subface (1:0.5) see http://ptex.us/adjdata.html for more detail - +------------------+ - | face | - +--------edge------+ - | adj face | | - +----------+-------+ - */ - resampleBorder(ptex, adjface, ae, border, length/2, bpp); - const Ptex::FaceInfo &sfi1 = ptex->getFaceInfo(adjface); - adjface = sfi1.adjface((ae+3)%4); - ae = (sfi1.adjedge((ae+3)%4)+3)%4; - resampleBorder(ptex, adjface, ae, border+(length/2*bpp), length/2, bpp); - - } else if (fi.isSubface() && !ptex->getFaceInfo(adjface).isSubface()) { - /* subface -> nonsubface (0.5:1). two possible configuration - case 1 case 2 - +----------+----------+ +----------+----------+--------+ - | face | B | | | face | B | - +---edge---+----------+ +----------+--edge----+--------+ - |0.0 0.5 1.0| |0.0 0.5 1.0| - | adj face | | adj face | - +---------------------+ +---------------------+ - */ - int Bf = fi.adjface((edge+1)%4); - int Be = fi.adjedge((edge+1)%4); - int f = ptex->getFaceInfo(Bf).adjface((Be+1)%4); - int e = ptex->getFaceInfo(Bf).adjedge((Be+1)%4); - if(f == adjface && e == ae) // case 1 - resampleBorder(ptex, adjface, ae, border, length, bpp, 0.0, 0.5); - else // case 2 - resampleBorder(ptex, adjface, ae, border, length, bpp, 0.5, 1.0); - - } else { - /* ordinary case (1:1 match) - +------------------+ - | face | - +--------edge------+ - | adj face | - +----------+-------+ - */ - resampleBorder(ptex, adjface, ae, border, length, bpp); - } - } else { - /* border edge. duplicate itself - +-----------------+ - | face | - +-------edge------+ - */ - resampleBorder(ptex, face, edge, border, length, bpp); - flipBuffer(border, length, bpp); - } -} - -// get corner pixel by traversing all adjacent faces around vertex -// -static bool -getCornerPixel(PtexTexture *ptex, float *resultPixel, int numchannels, - int face, int edge, int bpp, unsigned char *lineBuffer) -{ - const Ptex::FaceInfo &fi = ptex->getFaceInfo(face); - - /* - see http://ptex.us/adjdata.html Figure 2 for the reason of conditions edge==1 and 3 - */ - - if (fi.isSubface() && edge == 3) { - /* - in T-vertex case, this function sets 'D' pixel value to *resultPixel and returns false - gutter line - | - +------+-------+ - | | | - | D|C |<-- gutter line - | *-------+ - | B|A [2] | - | |[3] [1]| - | | [0] | - +------+-------+ - */ - int adjface = fi.adjface(edge); - if (adjface != -1 and !ptex->getFaceInfo(adjface).isSubface()) { - int length = resampleBorder(ptex, - adjface, - fi.adjedge(edge), - lineBuffer, - /*dstLength=*/-1, - bpp, - 0.0f, 1.0f); - /* then lineBuffer contains - - |-------DB-------| - 0 ^ length-1 - length/2-1 - */ - Ptex::ConvertToFloat(resultPixel, - lineBuffer + bpp*(length/2-1), - ptex->dataType(), - numchannels); - return true; - } - } - if (fi.isSubface() && edge == 1) { - /* gutter line - | - +------+-------+ - | | [3] | - | |[0] [2]| - | B|A [1] | - | *-------+ - | D|C |<-- gutter line - | | | - +------+-------+ - - note: here we're focusing on vertex A which corresponds to the edge 1, - but the edge 0 is an adjacent edge to get D pixel. - */ - int adjface = fi.adjface(0); - if (adjface != -1 and !ptex->getFaceInfo(adjface).isSubface()) { - int length = resampleBorder(ptex, - adjface, - fi.adjedge(0), - lineBuffer, - /*dstLength=*/-1, - bpp, - 0.0f, 1.0f); - /* then lineBuffer contains - - |-------BD-------| - 0 ^ length-1 - length/2 - */ - Ptex::ConvertToFloat(resultPixel, - lineBuffer + bpp*(length/2), - ptex->dataType(), - numchannels); - return true; - } - } - - int currentFace = face; - int currentEdge = edge; - int uv[4][2] = {{0,0}, {1,0}, {1,1}, {0,1}}; - float *pixel = (float*)alloca(sizeof(float)*numchannels); - float *accumPixel = (float*)alloca(sizeof(float)*numchannels); - - // clear accum pixel - memset(accumPixel, 0, sizeof(float)*numchannels); - - bool clockWise = true; - int valence = 0; - do { - valence++; - - if (valence > 255) { - Far::Warning("High valence detected in %s : invalid adjacency around " - "face %d", ptex->path(), face); - break; - } - - Ptex::FaceInfo info = ptex->getFaceInfo(currentFace); - ptex->getPixel(currentFace, - uv[currentEdge][0] * (info.res.u()-1), - uv[currentEdge][1] * (info.res.v()-1), - pixel, 0, numchannels); - for (int j = 0; j < numchannels; ++j) { - accumPixel[j] += pixel[j]; - if (valence == 3) { - resultPixel[j] = pixel[j]; - } - } - - // next face - if (clockWise) { - currentFace = info.adjface(currentEdge); - currentEdge = info.adjedge(currentEdge); - currentEdge = (currentEdge+1)%4; - } else { - currentFace = info.adjface((currentEdge+3)%4); - currentEdge = info.adjedge((currentEdge+3)%4); - } - - if (currentFace == -1) { - // border case. - if (clockWise) { - // reset position and restart counter clock wise - Ptex::FaceInfo sinfo = ptex->getFaceInfo(face); - currentFace = sinfo.adjface((edge+3)%4); - currentEdge = sinfo.adjedge((edge+3)%4); - clockWise = false; - } else { - // end - break; - } - } - } while(currentFace != face); - - if (valence == 4) { - return true; - } - - // non-4 valence. let's average and return false; - for (int j = 0; j < numchannels; ++j) { - resultPixel[j] = accumPixel[j]/valence; - } - return false; -} - -// sample neighbor pixels and populate around blocks -static void -guttering(PtexTexture *_ptex, PtexTextureLoader::block *b, unsigned char *pptr, - int bpp, int pagesize, int stride, int gwidth) -{ - unsigned char * lineBuffer = new unsigned char[pagesize * bpp]; - - for(int w=0; wcurrent.u() : b->current.v(); - // XXX: for now, sample same edge regardless of gutter depth - sampleNeighbor(_ptex, lineBuffer, b->idx, edge, len, bpp); - - unsigned char *s = lineBuffer, *d; - for(int j=0;jv-1-w) + bpp*(b->u+j); - break; - case Ptex::e_right: - d += stride*(b->v+j) + bpp*(b->u+b->current.u()+w); - break; - case Ptex::e_top: - d += stride*(b->v+b->current.v()+w) + bpp*(b->u+len-j-1); - break; - case Ptex::e_left: - d += stride*(b->v+len-j-1) + bpp*(b->u-1-w); - break; - } - for(int k=0; knumChannels(); - float *accumPixel = new float[numchannels]; - int uv[4][2] = {{-1,-1}, {1,-1}, {1,1}, {-1,1}}; - for (int edge=0; edge<4; edge++) { - - int du = (b->u+gwidth*uv[edge][0]); - int dv = (b->v+gwidth*uv[edge][1]); - - /* There are 3 cases when filling a corner pixel on gutter. - - case 1: Regular 4 valence - We already have correct 'B' and 'C' pixels by edge resampling above. - so here only one more pixel 'D' is needed, - and it will be placed on the gutter corner. - +-----+-----+ - | | |<-current - | B|A | - +-----*-----+ - | D|C | - | | | - +-----+-----+ - - case 2: T-vertex case (note that this doesn't mean 3 valence) - If the current face comes from non-quad root face, there could be a T-vertex - on its corner. Just like case 1, need to fill border corner with pixel 'D'. - +-----+-----+ - | | |<-current - | B|A | - | *-----+ - | D|C | - | | | - +-----+-----+ - - case 3: Other than 4 valence case (everything else, including boundary) - Since guttering pixels are placed on the border of each ptex faces, - It's not possible to store more than 4 pixels at a coner for a reasonable - interpolation. - In this case, we need to average all corner pixels and overwrite with an - averaged value, so that every face vertex picks the same value. - +---+---+ - | | |<-current - | B|A | - +---*---| - | D/E\C | - | / \ | - |/ \| - +-------+ - */ - - if (getCornerPixel(_ptex, accumPixel, numchannels, b->idx, edge, bpp, lineBuffer)) { - // case 1 and case 2 - if (edge==1||edge==2) du += b->current.u()-gwidth; - if (edge==2||edge==3) dv += b->current.v()-gwidth; - for (int u=0; udataType(), numchannels); - } - } - } else { - // case 3 - if (edge==1||edge==2) du += b->current.u()-gwidth-1; - if (edge==2||edge==3) dv += b->current.v()-gwidth-1; - // set accumPixel to 4 corners - // .. over (gwidth+1)x(gwidth+1) pixels for each corner - for (int u=0; u<=gwidth; ++u) { - for (int v=0; v<=gwidth; ++v) { - unsigned char *d = pptr + (dv+u)*stride + (du+v)*bpp; - Ptex::ConvertFromFloat(d, accumPixel, _ptex->dataType(), numchannels); - } - } - } - } - delete[] lineBuffer; - delete[] accumPixel; -} - -// prepares the data for the texture samplers used by the GLSL tables to render -// PTex texels -bool -PtexTextureLoader::GenerateBuffers( ) -{ - if (_pages.size()==0) return false; - - // populate the page index lookup texture ------------------------ - _indexBuffer = new unsigned int[ _blocks.size() ]; - for (unsigned long int i=0; i<_pages.size(); ++i) { - page * p = _pages[i]; - for (page::blist::iterator j=p->blocks.begin(); j!=p->blocks.end(); ++j) - _indexBuffer[ (*j)->idx ] = i; - } - - // populate the layout lookup texture ---------------------------- - float * lptr = _layoutBuffer = new float[ 4 * _blocks.size() ]; - for (unsigned long int i=0; i<_blocks.size(); ++ i) { - // normalize coordinates by pagesize resolution ! - *lptr++ = (float) _blocks[i].u / (float) _pagesize; - *lptr++ = (float) _blocks[i].v / (float) _pagesize; - *lptr++ = (float) _blocks[i].current.u() / (float) _pagesize; - *lptr++ = (float) _blocks[i].current.v() / (float) _pagesize; - } - - // populate the texels ------------------------------------------- - int stride = _bpp * _pagesize, - pagestride = stride * _pagesize; - - unsigned char * pptr = _texelBuffer = new unsigned char[ pagestride * _pages.size() ]; - - for (unsigned long int i=0; i<_pages.size(); i++) { - - page * p = _pages[i]; - - for (page::blist::iterator b=p->blocks.begin(); b!=p->blocks.end(); ++b) { - _ptex->getData( (*b)->idx, pptr + stride*(*b)->v + _bpp*(*b)->u, stride, (*b)->current ); - - if(GetGutterWidth() > 0) - guttering(_ptex, *b, pptr, _bpp, _pagesize, stride, GetGutterWidth()); - } - - pptr += pagestride; - } - - return true; -} - -void -PtexTextureLoader::ClearBuffers( ) -{ delete [] _indexBuffer; - delete [] _layoutBuffer; - delete [] _texelBuffer; -} - -// returns a ratio of texels wasted in the final GPU texture : anything under 5% -// is pretty good compared to our previous solution... -float -PtexTextureLoader::EvaluateWaste( ) const -{ - unsigned long int wasted=0; - for( unsigned long int i=0; i<_pages.size(); i++ ) { - page * p = _pages[i]; - for( page::slist::iterator s=p->slots.begin(); s!=p->slots.end(); ++s ) - wasted += s->ures * s->vres; - } - return (float)((double)wasted/(double)_txc); -} - -void -PtexTextureLoader::ClearPages( ) -{ for( unsigned long int i=0; i<_pages.size(); i++ ) - delete _pages[i]; - _pages.clear(); -} - -void -PtexTextureLoader::PrintBlocks() const -{ for( unsigned long int i=0; i<_blocks.size(); ++i ) - std::cout<<_blocks[i]<u<<" "<v<<" "<ures<<" "<vres<<"} "; - s<<" }\n"; - - s<<" blocks {"; - for (PtexTextureLoader::page::blist::const_iterator i=p.blocks.begin(); i!=p.blocks.end(); ++i) - s<<" "<< **i; - s<<" }\n"; - - s<<"}"; - return s; -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv - diff --git a/opensubdiv/osd/ptexTextureLoader.h b/opensubdiv/osd/ptexTextureLoader.h deleted file mode 100644 index 57df7a1d..00000000 --- a/opensubdiv/osd/ptexTextureLoader.h +++ /dev/null @@ -1,176 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_PTEX_TEXTURE_LOADER_H -#define OSD_PTEX_TEXTURE_LOADER_H - -#include "../version.h" - -#include - -class PtexTexture; - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -// Ptex reader helper - manages up/down sizing and texel packing of blocks into -// texel pages and generate the GL texture buffers for rendering : -// -// Pages table : maps the face (quad) to a page based on gl_PrimitiveID -// -// face idx = 1 -// V -// 0 1 2 ... -// |----------|----------|----------|-------- -// | page idx | page idx | page idx | ... -// |----------|----------|----------|-------- -// -// Layout table : coordinates of the gprim in the page -// -// - layout coords = vec4 normalized(top left (u,v), ures, vres)) -// -// face idx = 1 -// V -// 0 1 2 ... -// |--------|--------|--------|-------- -// | layout | layout | layout | ... -// |--------|--------|--------|-------- -// -// Texels buffer : the packed texels -// -// page 0 page 1 -// |------------|-------------||------------|-------------||------ -// |............|.............||............|.............|| -// |............|.............||............|.............|| -// |............|.............||............|..... ( X ) .|| -// |.... B 0 ...|.... B 1 ....||.... B 3 ...|.............|| -// |............|.............||............|.............|| -// |............|.............||............|.............|| -// |............|.............||............|.............|| -// |------------|-------------||------------|.... B 5 ....|| -// |..........................||............|.............|| -// |..........................||............|.............|| -// |..........................||............|.............|| -// |.......... B 2 ...........||.... B 4 ...|.............|| -// |..........................||............|.............|| -// |..........................||............|.............|| -// |..........................||............|.............|| -// |--------------------------||--------------------------||------- -// -// GLSL shader computes texel coordinates with : -// * vec3 ( X ) = ( layout.u + X, layout.v + Y, page idx ) -// - -class PtexTextureLoader { -public: - struct block; - struct page; - - PtexTextureLoader( PtexTexture *ptex, int gutterWidth, int pageMargin ); - - ~PtexTextureLoader(); - - unsigned short GetPageSize( ) const { - return _pagesize; - } - - unsigned long int GetNumBlocks( ) const; - - unsigned long int GetNumPages( ) const; - - unsigned int * GetIndexBuffer( ) const { - return _indexBuffer; - } - - const float * GetLayoutBuffer( ) const { - return _layoutBuffer; - } - - const unsigned char * GetTexelBuffer( ) const { - return _texelBuffer; - } - - unsigned long int GetUncompressedSize() const { - return _txc * _bpp; - } - - unsigned long int GetNativeUncompressedSize() const { - return _txn * _bpp; - } - - int GetGutterWidth() const { return _gutterWidth; } - - int GetPageMargin() const { return _pageMargin; } - - void OptimizeResolution( unsigned long int memrec ); - - void OptimizePacking( int maxnumpages ); - - bool GenerateBuffers( ); - - float EvaluateWaste( ) const; - - void ClearPages( ); - - void ClearBuffers(); - - void PrintBlocks() const; - - void PrintPages() const; - -protected: - - friend struct block; - - PtexTexture * _ptex; - -private: - - int _bpp; // bits per pixel - - unsigned long int _txc, // texel count for current resolution - _txn; // texel count for native resolution - - std::vector _blocks; - - std::vector _pages; - unsigned short _pagesize; - - unsigned int * _indexBuffer; - float * _layoutBuffer; - unsigned char * _texelBuffer; - - int _gutterWidth, _pageMargin; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_PTEX_TEXTURE_LOADER_H diff --git a/opensubdiv/osd/tbbComputeController.cpp b/opensubdiv/osd/tbbComputeController.cpp deleted file mode 100644 index 75ddcf05..00000000 --- a/opensubdiv/osd/tbbComputeController.cpp +++ /dev/null @@ -1,118 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include - -#include "../far/stencilTables.h" -#include "../osd/cpuComputeContext.h" -#include "../osd/tbbComputeController.h" -#include "../osd/tbbKernel.h" - -#ifdef OPENSUBDIV_HAS_TBB - #include -#endif - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -TbbComputeController::TbbComputeController(int numThreads) - : _numThreads(numThreads) { - - if(_numThreads == -1) - tbb::task_scheduler_init init; - else - tbb::task_scheduler_init init(numThreads); -} - -void -TbbComputeController::ApplyStencilTableKernel( - ComputeContext const *context) const { - - assert(context); - - Far::StencilTables const * vertexStencils = context->GetVertexStencilTables(); - - if (vertexStencils and _currentBindState.vertexBuffer) { - - int start = 0; - int end = vertexStencils->GetNumStencils(); - - VertexBufferDescriptor const & desc = _currentBindState.vertexDesc; - - float const * srcBuffer = _currentBindState.vertexBuffer + desc.offset; - - float * destBuffer = _currentBindState.vertexBuffer + desc.offset + - vertexStencils->GetNumControlVertices() * desc.stride; - - if (end > start) { - TbbComputeStencils(_currentBindState.vertexDesc, - srcBuffer, destBuffer, - &vertexStencils->GetSizes().at(0), - &vertexStencils->GetOffsets().at(0), - &vertexStencils->GetControlIndices().at(0), - &vertexStencils->GetWeights().at(0), - start, - end); - } - } - - Far::StencilTables const * varyingStencils = context->GetVaryingStencilTables(); - - if (varyingStencils and _currentBindState.varyingBuffer) { - - int start = 0; - int end = varyingStencils->GetNumStencils(); - - VertexBufferDescriptor const & desc = _currentBindState.varyingDesc; - - float const * srcBuffer = _currentBindState.varyingBuffer + desc.offset; - - float * destBuffer = _currentBindState.varyingBuffer + desc.offset + - varyingStencils->GetNumControlVertices() * desc.stride; - - if (end > start) { - TbbComputeStencils(_currentBindState.varyingDesc, - srcBuffer, destBuffer, - &varyingStencils->GetSizes().at(0), - &varyingStencils->GetOffsets().at(0), - &varyingStencils->GetControlIndices().at(0), - &varyingStencils->GetWeights().at(0), - start, - end); - } - } -} - -void -TbbComputeController::Synchronize() { - // XXX: -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv - diff --git a/opensubdiv/osd/tbbComputeController.h b/opensubdiv/osd/tbbComputeController.h deleted file mode 100644 index 6bb13d87..00000000 --- a/opensubdiv/osd/tbbComputeController.h +++ /dev/null @@ -1,177 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef OSD_TBB_COMPUTE_CONTROLLER_H -#define OSD_TBB_COMPUTE_CONTROLLER_H - -#include "../version.h" - -#include "../osd/cpuComputeContext.h" -#include "../osd/vertexDescriptor.h" - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// \brief Compute controller for launching TBB subdivision kernels. -/// -/// TbbComputeController is a compute controller class to launch TBB -/// threaded subdivision kernels. It requires CpuVertexBufferInterface -/// as arguments of Refine function. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class TbbComputeController { -public: - typedef CpuComputeContext ComputeContext; - - /// Constructor. - /// - /// @param numThreads specifies how many openmp parallel threads to use. - /// -1 attempts to use all available processors. - /// - explicit TbbComputeController(int numThreads=-1); - - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - /// @param vertexDesc The descriptor of vertex elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - /// @param varyingBuffer Vertex-interpolated data buffer - /// - /// @param varyingDesc The descriptor of varying elements to be refined. - /// if it's null, all primvars in the vertex buffer - /// will be refined. - /// - template - void Compute( CpuComputeContext const * context, - VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc=NULL, - VertexBufferDescriptor const * varyingDesc=NULL ){ - - bind(vertexBuffer, varyingBuffer, vertexDesc, varyingDesc); - - ApplyStencilTableKernel(context); - - unbind(); - } - - /// Execute subdivision kernels and apply to given vertex buffers. - /// - /// @param context The CpuContext to apply refinement operations to - /// - /// @param vertexBuffer Vertex-interpolated data buffer - /// - template - void Compute(CpuComputeContext const * context, - VERTEX_BUFFER *vertexBuffer) { - - Compute(context, vertexBuffer, (VERTEX_BUFFER*)0); - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - void ApplyStencilTableKernel(ComputeContext const *context) const; - - template - void bind( VERTEX_BUFFER * vertexBuffer, - VARYING_BUFFER * varyingBuffer, - VertexBufferDescriptor const * vertexDesc, - VertexBufferDescriptor const * varyingDesc ) { - - // if the vertex buffer descriptor is specified, use it. - // otherwise, assumes the data is tightly packed in the vertex buffer. - if (vertexDesc) { - _currentBindState.vertexDesc = *vertexDesc; - } else { - int numElements = vertexBuffer ? vertexBuffer->GetNumElements() : 0; - _currentBindState.vertexDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - if (varyingDesc) { - _currentBindState.varyingDesc = *varyingDesc; - } else { - int numElements = varyingBuffer ? varyingBuffer->GetNumElements() : 0; - _currentBindState.varyingDesc = - VertexBufferDescriptor(0, numElements, numElements); - } - - _currentBindState.vertexBuffer = vertexBuffer ? - vertexBuffer->BindCpuBuffer(): 0; - - _currentBindState.varyingBuffer = varyingBuffer ? - varyingBuffer->BindCpuBuffer() : 0; - } - - void unbind() { - _currentBindState.Reset(); - } - -private: - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of the vertex buffers. - struct BindState { - - BindState() : vertexBuffer(0), varyingBuffer(0) { } - - void Reset() { - vertexBuffer = varyingBuffer = 0; - vertexDesc.Reset(); - varyingDesc.Reset(); - } - - float * vertexBuffer, - * varyingBuffer; - - VertexBufferDescriptor vertexDesc, - varyingDesc; - }; - - BindState _currentBindState; - int _numThreads; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // OSD_TBB_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/tbbEvalStencilsController.cpp b/opensubdiv/osd/tbbEvalStencilsController.cpp deleted file mode 100644 index ecaffaf1..00000000 --- a/opensubdiv/osd/tbbEvalStencilsController.cpp +++ /dev/null @@ -1,200 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#include "../osd/tbbEvalStencilsController.h" - -#include -#include - -#include - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -#define grain_size 200 - -TbbEvalStencilsController::TbbEvalStencilsController(int numThreads) { - - _numThreads = numThreads > 0 ? numThreads : tbb::task_scheduler_init::automatic; - - tbb::task_scheduler_init init(numThreads); -} - -TbbEvalStencilsController::~TbbEvalStencilsController() { -} - - -class StencilKernel { - -public: - enum Mode { UNDEFINED, POINT, U_DERIV, V_DERIV }; - - StencilKernel( Far::LimitStencilTables const * stencils, - VertexBufferDescriptor ctrlDesc, - float const * ctrlData ) : - _stencils(stencils), - _mode(UNDEFINED), - _ctrlDesc(ctrlDesc), - _length(0), - _outStride(0), - _outData(0) { - - _ctrlData = ctrlData + ctrlDesc.offset; - } - - bool SetOutput(Mode mode, VertexBufferDescriptor outDesc, float * outData) { - - if (_ctrlDesc.CanEval(outDesc)) { - _mode = mode; - _length = outDesc.length; - _outStride = outDesc.stride; - _outData = outData + outDesc.offset; - return true; - } - return false; - } - - void operator() (tbb::blocked_range const &r) const { - - assert(_stencils and _ctrlData and _length and _outStride and _outData); - - Far::Index offset = _stencils->GetOffsets()[r.begin()]; - - unsigned char const * sizes = &_stencils->GetSizes()[r.begin()]; - Far::Index const * index = &_stencils->GetControlIndices()[offset]; - - float const * weight; - - switch (_mode) { - case POINT : weight = &_stencils->GetWeights()[offset]; break; - case U_DERIV : weight = &_stencils->GetDuWeights()[offset]; break; - case V_DERIV : weight = &_stencils->GetDvWeights()[offset]; break; - default: - return; - } - assert( weight); - - float * out = _outData + r.begin() * _outStride; - - for (int i=r.begin(); iGetStencilTables(); - if (not stencils) - return 0; - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return 0; - - StencilKernel kernel( stencils, _currentBindState.controlDataDesc, - _currentBindState.controlData ); - - - if (not kernel.SetOutput( StencilKernel::POINT, - _currentBindState.outputDataDesc, - _currentBindState.outputData )) - return 0; - - tbb::blocked_range range(0, nstencils, grain_size); - - tbb::parallel_for(range, kernel); - - return nstencils; -} - -int -TbbEvalStencilsController::_UpdateDerivs( CpuEvalStencilsContext * context ) { - - Far::LimitStencilTables const * stencils = context->GetStencilTables(); - if (not stencils) - return 0; - - int nstencils = stencils->GetNumStencils(); - if (not nstencils) - return 0; - - tbb::blocked_range range(0, nstencils, grain_size); - - StencilKernel kernel( stencils, _currentBindState.controlDataDesc, - _currentBindState.controlData ); - - if (not kernel.SetOutput( StencilKernel::U_DERIV, - _currentBindState.outputDuDesc, - _currentBindState.outputUDeriv ) ) - return 0; - - tbb::parallel_for(range, kernel); - - if (not kernel.SetOutput( StencilKernel::V_DERIV, - _currentBindState.outputDvDesc, - _currentBindState.outputVDeriv ) ) - return 0; - - tbb::parallel_for(range, kernel); - - return nstencils; -} - -void -TbbEvalStencilsController::Synchronize() { -} - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -} // end namespace OpenSubdiv diff --git a/opensubdiv/osd/tbbEvalStencilsController.h b/opensubdiv/osd/tbbEvalStencilsController.h deleted file mode 100644 index ce2cac0c..00000000 --- a/opensubdiv/osd/tbbEvalStencilsController.h +++ /dev/null @@ -1,216 +0,0 @@ -// -// Copyright 2013 Pixar -// -// Licensed under the Apache License, Version 2.0 (the "Apache License") -// with the following modification; you may not use this file except in -// compliance with the Apache License and the following modification to it: -// Section 6. Trademarks. is deleted and replaced with: -// -// 6. Trademarks. This License does not grant permission to use the trade -// names, trademarks, service marks, or product names of the Licensor -// and its affiliates, except as required to comply with Section 4(c) of -// the License and to reproduce the content of the NOTICE file. -// -// You may obtain a copy of the Apache License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the Apache License with the above modification is -// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the Apache License for the specific -// language governing permissions and limitations under the Apache License. -// - -#ifndef FAR_TBB_EVALSTENCILS_CONTROLLER_H -#define FAR_TBB_EVALSTENCILS_CONTROLLER_H - -#include "../version.h" - -#include "../osd/cpuEvalStencilsContext.h" - - -namespace OpenSubdiv { -namespace OPENSUBDIV_VERSION { - -namespace Osd { - -/// -/// \brief CPU stencils evaluation controller -/// -/// CpuStencilsController is a compute controller class to launch -/// single threaded CPU stencil evalution kernels. -/// -/// Controller entities execute requests from Context instances that they share -/// common interfaces with. Controllers are attached to discrete compute devices -/// and share the devices resources with Context entities. -/// -class TbbEvalStencilsController { -public: - - /// \brief Constructor. - /// - /// @param numThreads specifies how many openmp parallel threads to use. - /// -1 attempts to use all available processors. - /// - TbbEvalStencilsController(int numThreads=-1); - - /// \brief Destructor. - ~TbbEvalStencilsController(); - - - /// \brief Applies stencil weights to the control vertex data - /// - /// Applies the stencil weights to the control vertex data to evaluate the - /// interpolated limit positions at the parametric locations of the stencils - /// - /// @param context the CpuEvalStencilsContext with the stencil weights - /// - /// @param controlDataDesc vertex buffer descriptor for the control vertex data - /// - /// @param controlVertices vertex buffer with the control vertices data - /// - /// @param outputDataDesc vertex buffer descriptor for the output vertex data - /// - /// @param outputData output vertex buffer for the interpolated data - /// - template - int UpdateValues( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDataDesc, OUTPUT_BUFFER *outputData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputData( outputDataDesc, outputData ); - - int n = _UpdateValues( context ); - - unbind(); - - return n; - } - - /// \brief Applies derivative stencil weights to the control vertex data - /// - /// Computes the U and V derivative stencils to the control vertex data at - /// the parametric locations contained in each stencil - /// - /// @param context the CpuEvalStencilsContext with the stencil weights - /// - /// @param controlDataDesc vertex buffer descriptor for the control vertex data - /// - /// @param controlVertices vertex buffer with the control vertices data - /// - /// @param outputDuDesc vertex buffer descriptor for the U derivative output data - /// - /// @param outputDuData output vertex buffer for the U derivative data - /// - /// @param outputDvDesc vertex buffer descriptor for the V deriv output data - /// - /// @param outputDvData output vertex buffer for the V derivative data - /// - template - int UpdateDerivs( CpuEvalStencilsContext * context, - VertexBufferDescriptor const & controlDataDesc, CONTROL_BUFFER *controlVertices, - VertexBufferDescriptor const & outputDuDesc, OUTPUT_BUFFER *outputDuData, - VertexBufferDescriptor const & outputDvDesc, OUTPUT_BUFFER *outputDvData ) { - - if (not context->GetStencilTables()->GetNumStencils()) - return 0; - - bindControlData( controlDataDesc, controlVertices ); - - bindOutputDerivData( outputDuDesc, outputDuData, outputDvDesc, outputDvData ); - - int n = _UpdateDerivs( context ); - - unbind(); - - return n; - } - - /// Waits until all running subdivision kernels finish. - void Synchronize(); - -protected: - - /// \brief Binds control vertex data buffer - template - void bindControlData(VertexBufferDescriptor const & controlDataDesc, VERTEX_BUFFER *controlData ) { - - _currentBindState.controlData = controlData ? controlData->BindCpuBuffer() : 0; - _currentBindState.controlDataDesc = controlDataDesc; - - } - - /// \brief Binds output vertex data buffer - template - void bindOutputData( VertexBufferDescriptor const & outputDataDesc, VERTEX_BUFFER *outputData ) { - - _currentBindState.outputData = outputData ? outputData->BindCpuBuffer() : 0; - _currentBindState.outputDataDesc = outputDataDesc; - } - - /// \brief Binds output derivative vertex data buffer - template - void bindOutputDerivData( VertexBufferDescriptor const & outputDuDesc, VERTEX_BUFFER *outputDu, - VertexBufferDescriptor const & outputDvDesc, VERTEX_BUFFER *outputDv ) { - - _currentBindState.outputUDeriv = outputDu ? outputDu ->BindCpuBuffer() : 0; - _currentBindState.outputVDeriv = outputDv ? outputDv->BindCpuBuffer() : 0; - _currentBindState.outputDuDesc = outputDuDesc; - _currentBindState.outputDvDesc = outputDvDesc; - } - - /// \brief Unbinds any previously bound vertex and varying data buffers. - void unbind() { - _currentBindState.Reset(); - } - -private: - - int _UpdateValues( CpuEvalStencilsContext * context ); - int _UpdateDerivs( CpuEvalStencilsContext * context ); - - int _numThreads; - - // Bind state is a transitional state during refinement. - // It doesn't take an ownership of vertex buffers. - struct BindState { - - BindState() : controlData(0), outputData(0), outputUDeriv(0), outputVDeriv(0) { } - - void Reset() { - controlData = outputData = outputUDeriv = outputVDeriv = NULL; - controlDataDesc.Reset(); - outputDataDesc.Reset(); - outputDuDesc.Reset(); - outputDvDesc.Reset(); - } - - // transient mesh data - VertexBufferDescriptor controlDataDesc, - outputDataDesc, - outputDuDesc, - outputDvDesc; - - float * controlData, - * outputData, - * outputUDeriv, - * outputVDeriv; - }; - - BindState _currentBindState; -}; - -} // end namespace Osd - -} // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - -} // end namespace OpenSubdiv - -#endif // FAR_TBB_EVALSTENCILS_CONTROLLER_H diff --git a/opensubdiv/osd/computeController.h b/opensubdiv/osd/tbbEvaluator.cpp similarity index 54% rename from opensubdiv/osd/computeController.h rename to opensubdiv/osd/tbbEvaluator.cpp index b81ccaf5..71fc1528 100644 --- a/opensubdiv/osd/computeController.h +++ b/opensubdiv/osd/tbbEvaluator.cpp @@ -1,5 +1,5 @@ // -// Copyright 2013 Pixar +// Copyright 2015 Pixar // // Licensed under the Apache License, Version 2.0 (the "Apache License") // with the following modification; you may not use this file except in @@ -22,46 +22,51 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_COMPUTE_CONTROLLER_H -#define OSD_COMPUTE_CONTROLLER_H +#include "../osd/tbbEvaluator.h" +#include "../osd/tbbKernel.h" -#include "../version.h" +#include namespace OpenSubdiv { namespace OPENSUBDIV_VERSION { namespace Osd { -/*! - \page sequence_page API sequence diagrams +/* static */ +bool +TbbEvaluator::EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char * sizes, + const int * offsets, + const int * indices, + const float * weights, + int start, int end) { + if (end <= start) return true; - This section describes the typical sequence of initialization and drawing - animated prims using OpenSubdiv. + TbbEvalStencils(src, srcDesc, dst, dstDesc, + sizes, offsets, indices, weights, start, end); - \section init_sec Initialize + return true; +} - \image html OsdCreateSequence.png +/* static */ +void +TbbEvaluator::Synchronize(void *) { +} - \section draw_sec Refine and Draw - - \image html OsdRefineDrawSequence.png - - */ - -// XXX: do we really need this base class? -class ComputeController { -public: - virtual ~ComputeController() {} - -protected: - ComputeController() {} -}; +/* static */ +void +TbbEvaluator::SetNumThreads(int numThreads) { + if (numThreads == -1) { + tbb::task_scheduler_init init; + } else { + tbb::task_scheduler_init init(numThreads); + } +} } // end namespace Osd } // end namespace OPENSUBDIV_VERSION -using namespace OPENSUBDIV_VERSION; - } // end namespace OpenSubdiv - -#endif // OSD_COMPUTE_CONTROLLER_H diff --git a/opensubdiv/osd/tbbEvaluator.h b/opensubdiv/osd/tbbEvaluator.h new file mode 100644 index 00000000..8521cc82 --- /dev/null +++ b/opensubdiv/osd/tbbEvaluator.h @@ -0,0 +1,112 @@ +// +// Copyright 2015 Pixar +// +// Licensed under the Apache License, Version 2.0 (the "Apache License") +// with the following modification; you may not use this file except in +// compliance with the Apache License and the following modification to it: +// Section 6. Trademarks. is deleted and replaced with: +// +// 6. Trademarks. This License does not grant permission to use the trade +// names, trademarks, service marks, or product names of the Licensor +// and its affiliates, except as required to comply with Section 4(c) of +// the License and to reproduce the content of the NOTICE file. +// +// You may obtain a copy of the Apache License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the Apache License with the above modification is +// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the Apache License for the specific +// language governing permissions and limitations under the Apache License. +// + +#ifndef OPENSUBDIV_OSD_TBB_EVALUATOR_H +#define OPENSUBDIV_OSD_TBB_EVALUATOR_H + +#include "../version.h" +#include "../osd/vertexDescriptor.h" + +#include + +namespace OpenSubdiv { +namespace OPENSUBDIV_VERSION { + +namespace Osd { + +class TbbEvaluator { +public: + /// \brief Generic static stencil eval function. This function has a same + /// signature as other device kernels have so that it can be called + /// transparently from OsdMesh template interface. + /// + /// @param srcBuffer Input primvar buffer. + /// must have BindCpuBuffer() method returning a + /// const float pointer for read + /// + /// @param srcDesc vertex buffer descriptor for the input buffer + /// + /// @param dstBuffer Output primvar buffer + /// must have BindCpuBuffer() method returning a + /// float pointer for write + /// + /// @param dstDesc vertex buffer descriptor for the output buffer + /// + /// @param stencilTable stencil table to be applied. + /// + /// @param instance not used in the tbb kernel + /// (declared as a typed pointer to prevent + /// undesirable template resolution) + /// + /// @param deviceContext not used in the tbb kernel + /// + template + static bool EvalStencils(VERTEX_BUFFER *srcVertexBuffer, + VertexBufferDescriptor const &srcDesc, + VERTEX_BUFFER *dstVertexBuffer, + VertexBufferDescriptor const &dstDesc, + STENCIL_TABLE const *stencilTable, + TbbEvaluator const *instance = NULL, + void *deviceContext = NULL) { + (void)instance; // unused + (void)deviceContext; // unused + + return EvalStencils(srcVertexBuffer->BindCpuBuffer(), + srcDesc, + dstVertexBuffer->BindCpuBuffer(), + dstDesc, + &stencilTable->GetSizes()[0], + &stencilTable->GetOffsets()[0], + &stencilTable->GetControlIndices()[0], + &stencilTable->GetWeights()[0], + /*start = */ 0, + /*end = */ stencilTable->GetNumStencils()); + } + + static bool EvalStencils(const float *src, + VertexBufferDescriptor const &srcDesc, + float *dst, + VertexBufferDescriptor const &dstDesc, + const unsigned char *sizes, + const int *offsets, + const int *indices, + const float *weights, + int start, + int end); + + static void Synchronize(void *deviceContext = NULL); + + static void SetNumThreads(int numThreads); +}; + + +} // end namespace Osd + +} // end namespace OPENSUBDIV_VERSION +using namespace OPENSUBDIV_VERSION; + +} // end namespace OpenSubdiv + + +#endif // OPENSUBDIV_OSD_TBB_EVALUATOR_H diff --git a/opensubdiv/osd/tbbKernel.cpp b/opensubdiv/osd/tbbKernel.cpp index cd01030f..cf82cdb3 100644 --- a/opensubdiv/osd/tbbKernel.cpp +++ b/opensubdiv/osd/tbbKernel.cpp @@ -74,9 +74,9 @@ copy(float *dst, int dstIndex, const float *src, class TBBStencilKernel { - VertexBufferDescriptor _vertexDesc; + VertexBufferDescriptor _srcDesc; + VertexBufferDescriptor _dstDesc; float const * _vertexSrc; - float * _vertexDst; unsigned char const * _sizes; @@ -86,19 +86,24 @@ class TBBStencilKernel { public: - TBBStencilKernel(VertexBufferDescriptor vertexDesc, float const * vertexSrc, - float * vertexDst, unsigned char const * sizes, int const * offsets, - int const * indices, float const * weights ) : - _vertexDesc(vertexDesc), - _vertexSrc(vertexSrc), - _vertexDst(vertexDst), + TBBStencilKernel(float const *src, + VertexBufferDescriptor srcDesc, + float *dst, + VertexBufferDescriptor dstDesc, + unsigned char const * sizes, int const * offsets, + int const * indices, float const * weights) : + _srcDesc(srcDesc), + _dstDesc(dstDesc), + _vertexSrc(src), + _vertexDst(dst), _sizes(sizes), _offsets(offsets), _indices(indices), _weights(weights) { } TBBStencilKernel(TBBStencilKernel const & other) { - _vertexDesc = other._vertexDesc; + _srcDesc = other._srcDesc; + _dstDesc = other._dstDesc; _sizes = other._sizes; _offsets = other._offsets; _indices = other._indices; @@ -110,14 +115,14 @@ public: void operator() (tbb::blocked_range const &r) const { #define USE_SIMD #ifdef USE_SIMD - if (_vertexDesc.length==4 and _vertexDesc.stride==4) { + if (_srcDesc.length==4 and _srcDesc.stride==4 and _dstDesc.stride==4) { // SIMD fast path for aligned primvar data (4 floats) int offset = _offsets[r.begin()]; ComputeStencilKernel<4>(_vertexSrc, _vertexDst, _sizes, _indices+offset, _weights+offset, r.begin(), r.end()); - } else if (_vertexDesc.length==8 and _vertexDesc.stride==4) { + } else if (_srcDesc.length==8 and _srcDesc.stride==4 and _dstDesc.stride==4) { // SIMD fast path for aligned primvar data (8 floats) int offset = _offsets[r.begin()]; @@ -127,7 +132,7 @@ public: } else { #else { -#endif +#endif unsigned char const * sizes = _sizes; int const * indices = _indices; float const * weights = _weights; @@ -139,36 +144,43 @@ public: } // Slow path for non-aligned data - float * result = (float*)alloca(_vertexDesc.length * sizeof(float)); + float * result = (float*)alloca(_srcDesc.length * sizeof(float)); for (int i=r.begin(); i=0 and start 0) { + sizes += start; + indices += offsets[start]; + weights += offsets[start]; + } + src += srcDesc.offset; + dst += dstDesc.offset; - TBBStencilKernel kernel(vertexDesc, vertexSrc, vertexDst, - sizes, offsets, indices, weights); + TBBStencilKernel kernel(src, srcDesc, dst, dstDesc, + sizes, offsets, indices, weights); tbb::blocked_range range(start, end, grain_size); diff --git a/opensubdiv/osd/tbbKernel.h b/opensubdiv/osd/tbbKernel.h index e1524143..17b5f3c3 100644 --- a/opensubdiv/osd/tbbKernel.h +++ b/opensubdiv/osd/tbbKernel.h @@ -22,8 +22,8 @@ // language governing permissions and limitations under the Apache License. // -#ifndef OSD_TBB_KERNEL_H -#define OSD_TBB_KERNEL_H +#ifndef OPENSUBDIV_OSD_TBB_KERNEL_H +#define OPENSUBDIV_OSD_TBB_KERNEL_H #include "../version.h" @@ -35,14 +35,15 @@ namespace Osd { struct VertexBufferDescriptor; void -TbbComputeStencils(VertexBufferDescriptor const &vertexDesc, - float const * vertexSrc, - float * vertexDst, - unsigned char const * sizes, - int const * offsets, - int const * indices, - float const * weights, - int start, int end); +TbbEvalStencils(float const * src, + VertexBufferDescriptor const &srcDesc, + float * dst, + VertexBufferDescriptor const &dstDesc, + unsigned char const * sizes, + int const * offsets, + int const * indices, + float const * weights, + int start, int end); } // end namespace Osd @@ -51,4 +52,4 @@ using namespace OPENSUBDIV_VERSION; } // end namespace OpenSubdiv -#endif // OSD_TBB_KERNEL_H +#endif // OPENSUBDIV_OSD_TBB_KERNEL_H diff --git a/regression/osd_regression/main.cpp b/regression/osd_regression/main.cpp index 444ea155..552c60da 100644 --- a/regression/osd_regression/main.cpp +++ b/regression/osd_regression/main.cpp @@ -46,10 +46,9 @@ GLFWwindow* g_window=0; #include #include -#include +#include #include -#include -#include + #include @@ -269,17 +268,10 @@ static int checkMeshCPU( FarTopologyRefiner *refiner, const std::vector& coarseverts, xyzmesh * refmesh) { - - static Osd::CpuComputeController *controller = - new Osd::CpuComputeController(); - Far::StencilTables const *vertexStencils; Far::StencilTables const *varyingStencils; buildStencilTables(*refiner, &vertexStencils, &varyingStencils); - Osd::CpuComputeContext *context = Osd::CpuComputeContext::Create( - vertexStencils, varyingStencils); - assert(coarseverts.size() == (size_t)refiner->GetNumVerticesTotal()); @@ -288,12 +280,16 @@ checkMeshCPU( FarTopologyRefiner *refiner, vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size() ); - controller->Compute( context, vb ); + Osd::CpuEvaluator::EvalStencils( + vb, + Osd::VertexBufferDescriptor(0, 3, 3), + vb, + Osd::VertexBufferDescriptor(refiner->GetNumVertices(0)*3, 3, 3), + vertexStencils); int result = checkVertexBuffer(*refiner, refmesh, vb->BindCpuBuffer(), vb->GetNumElements()); - delete context; delete vertexStencils; delete varyingStencils; delete vb; @@ -306,28 +302,26 @@ static int checkMeshCPUGL(FarTopologyRefiner *refiner, const std::vector& coarseverts, xyzmesh * refmesh) { - - static Osd::CpuComputeController *controller = - new Osd::CpuComputeController(); - + Far::StencilTables const *vertexStencils; Far::StencilTables const *varyingStencils; buildStencilTables(*refiner, &vertexStencils, &varyingStencils); - Osd::CpuComputeContext *context = Osd::CpuComputeContext::Create( - vertexStencils, varyingStencils); - Osd::CpuGLVertexBuffer *vb = Osd::CpuGLVertexBuffer::Create(3, refiner->GetNumVerticesTotal()); vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size() ); - controller->Compute( context, vb ); - + Osd::CpuEvaluator::EvalStencils( + vb, + Osd::VertexBufferDescriptor(0, 3, 3), + vb, + Osd::VertexBufferDescriptor(refiner->GetNumVertices(0)*3, 3, 3), + vertexStencils); + int result = checkVertexBuffer(*refiner, refmesh, vb->BindCpuBuffer(), vb->GetNumElements()); - delete context; delete vertexStencils; delete varyingStencils; delete vb; diff --git a/tutorials/osd/tutorial_0/osd_tutorial_0.cpp b/tutorials/osd/tutorial_0/osd_tutorial_0.cpp index 2b7ee656..94a77040 100644 --- a/tutorials/osd/tutorial_0/osd_tutorial_0.cpp +++ b/tutorials/osd/tutorial_0/osd_tutorial_0.cpp @@ -32,8 +32,7 @@ #include #include -#include -#include +#include #include #include @@ -73,11 +72,10 @@ int main(int, char **) { nCoarseVerts=0, nRefinedVerts=0; - Osd::CpuComputeContext * context=0; - // // Setup phase // + Far::StencilTables const * stencilTables = NULL; { // Setup Context Far::TopologyRefiner const * refiner = createTopologyRefiner(maxlevel); @@ -87,24 +85,15 @@ int main(int, char **) { options.generateOffsets=true; options.generateIntermediateLevels=false; - Far::StencilTables const * stencilTables = - Far::StencilTablesFactory::Create(*refiner, options); - - // Create an Osd Compute Context from the stencil tables - context = Osd::CpuComputeContext::Create(stencilTables, - /*vayingStencil=*/NULL); + stencilTables = Far::StencilTablesFactory::Create(*refiner, options); nCoarseVerts = refiner->GetNumVertices(0); nRefinedVerts = stencilTables->GetNumStencils(); // We are done with Far: cleanup tables delete refiner; - delete stencilTables; } - // Setup Controller - Osd::CpuComputeController controller; - // Setup a buffer for vertex primvar data: Osd::CpuVertexBuffer * vbuffer = Osd::CpuVertexBuffer::Create(3, nCoarseVerts + nRefinedVerts); @@ -117,8 +106,14 @@ int main(int, char **) { // and update every time control data changes vbuffer->UpdateData(g_verts, 0, nCoarseVerts); + + Osd::VertexBufferDescriptor srcDesc(0, 3, 3); + Osd::VertexBufferDescriptor dstDesc(nCoarseVerts*3, 3, 3); + // Launch the computation - controller.Compute(context, vbuffer); + Osd::CpuEvaluator::EvalStencils(vbuffer, srcDesc, + vbuffer, dstDesc, + stencilTables); } { // Visualization with Maya : print a MEL script that generates particles @@ -133,8 +128,8 @@ int main(int, char **) { printf("-c 1;\n"); } + delete stencilTables; delete vbuffer; - delete context; } //------------------------------------------------------------------------------