OpenSubdiv/examples/glShareTopology/glShareTopology.cpp

1727 lines
55 KiB
C++
Raw Normal View History

//
// Copyright 2014 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#if defined(__APPLE__)
#if defined(OSD_USES_GLEW)
#include <GL/glew.h>
#else
#include <OpenGL/gl3.h>
#endif
#define GLFW_INCLUDE_GL3
#define GLFW_NO_GLU
#else
#include <stdlib.h>
#include <GL/glew.h>
#if defined(WIN32)
#include <GL/wglew.h>
#endif
#endif
#include <GLFW/glfw3.h>
GLFWwindow* g_window=0;
GLFWmonitor* g_primary=0;
#include <osd/glDrawContext.h>
#include <osd/glDrawRegistry.h>
#include <osd/glMesh.h>
#include <far/error.h>
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
#include <far/stencilTables.h>
#include <far/ptexIndices.h>
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
#include <osd/mesh.h>
#include <osd/glVertexBuffer.h>
#include <osd/cpuGLVertexBuffer.h>
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
#include <osd/cpuEvaluator.h>
#ifdef OPENSUBDIV_HAS_OPENMP
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
#include <osd/ompEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_TBB
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
#include <osd/tbbEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clGLVertexBuffer.h>
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
#include <osd/clEvaluator.h>
#include "../common/clDeviceContext.h"
CLDeviceContext g_clDeviceContext;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
#include <osd/cudaGLVertexBuffer.h>
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
#include <osd/cudaEvaluator.h>
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
#include <osd/glXFBEvaluator.h>
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
#include <osd/glComputeEvaluator.h>
#endif
#include <common/vtr_utils.h>
#include <shapes/catmark_cube.h>
#include <shapes/catmark_bishop.h>
#include <shapes/catmark_pawn.h>
#include <shapes/catmark_rook.h>
#include "../common/stopwatch.h"
#include "../common/simple_math.h"
#include "../common/gl_hud.h"
static const char *shaderSource =
#include "shader.gen.h"
;
#include <cfloat>
#include <vector>
#include <iostream>
#include <fstream>
#include <sstream>
using namespace OpenSubdiv;
// ---------------------------------------------------------------------------
class InstancesBase {
public:
virtual ~InstancesBase() {}
virtual void UpdateVertexBuffer(int instance, std::vector<float> const &src) = 0;
virtual void UpdateVaryingBuffer(int instance, std::vector<float> const &src) = 0;
virtual GLuint BindVertexBuffer() = 0;
virtual GLuint BindVaryingBuffer() = 0;
Osd::VertexBufferDescriptor const &GetVertexDesc() const {
return _vertexDesc;
}
Osd::VertexBufferDescriptor const &GetVaryingDesc() const {
return _varyingDesc;
}
protected:
InstancesBase(Osd::VertexBufferDescriptor const &vertexDesc,
Osd::VertexBufferDescriptor const &varyingDesc,
int numVertices) :
_vertexDesc(vertexDesc),
_varyingDesc(varyingDesc),
_numVertices(numVertices) {
}
int getNumVertices() const { return _numVertices; }
private:
Osd::VertexBufferDescriptor _vertexDesc;
Osd::VertexBufferDescriptor _varyingDesc;
int _numVertices; // # of vertices of single instance
};
template <class VERTEX_BUFFER, class DEVICE_CONTEXT>
class Instances : public InstancesBase {
public:
Instances(int numInstances,
Osd::VertexBufferDescriptor const &vertexDesc,
Osd::VertexBufferDescriptor const &varyingDesc,
bool interleaved,
int numVertices,
DEVICE_CONTEXT *deviceContext) :
InstancesBase(vertexDesc, varyingDesc, numVertices),
_vertexBuffer(NULL), _varyingBuffer(NULL), _interleaved(interleaved),
_deviceContext(deviceContext) {
if (interleaved) {
assert(vertexDesc.stride == varyingDesc.stride);
_vertexBuffer = createVertexBuffer(
vertexDesc.stride, numInstances * numVertices);
} else {
if (vertexDesc.stride > 0) {
_vertexBuffer = createVertexBuffer(
vertexDesc.stride, numInstances * numVertices);
}
if (varyingDesc.stride > 0) {
_varyingBuffer = createVertexBuffer(
varyingDesc.stride, numInstances * numVertices);
}
}
}
virtual ~Instances() {
delete _vertexBuffer;
delete _varyingBuffer;
}
virtual void UpdateVertexBuffer(int instance, std::vector<float> const &src) {
updateVertexBuffer(_vertexBuffer, &src[0], instance * getNumVertices(),
(int)src.size()/_vertexBuffer->GetNumElements());
}
virtual void UpdateVaryingBuffer(int instance, std::vector<float> const &src) {
updateVertexBuffer(_varyingBuffer, &src[0], instance * getNumVertices(),
(int)src.size()/_varyingBuffer->GetNumElements());
}
virtual GLuint BindVertexBuffer() {
return _vertexBuffer->BindVBO();
}
virtual GLuint BindVaryingBuffer() {
return _varyingBuffer->BindVBO();
}
VERTEX_BUFFER *createVertexBuffer(int numElements, int numVertices) {
return VERTEX_BUFFER::Create(numElements, numVertices, _deviceContext);
}
void updateVertexBuffer(VERTEX_BUFFER *vertexBuffer,
const float *src, int startVertex,
int numVertices) {
vertexBuffer->UpdateData(src, startVertex, numVertices, _deviceContext);
}
VERTEX_BUFFER *GetVertexBuffer() const { return _vertexBuffer; }
VERTEX_BUFFER *GetVaryingBuffer() const { return _interleaved ? _vertexBuffer :_varyingBuffer; }
private:
VERTEX_BUFFER *_vertexBuffer;
VERTEX_BUFFER *_varyingBuffer;
bool _interleaved;
DEVICE_CONTEXT *_deviceContext;
};
// ---------------------------------------------------------------------------
class TopologyBase {
public:
virtual ~TopologyBase() {
delete _drawContext;
}
virtual void Refine(InstancesBase *instance, int numInstances) = 0;
virtual InstancesBase *CreateInstances(
int numInstances,
Osd::VertexBufferDescriptor const &vertexDesc,
Osd::VertexBufferDescriptor const &varyingDesc,
bool interleaved) = 0;
virtual void UpdateVertexTexture(InstancesBase *instances) = 0;
virtual void Synchronize() = 0;
Osd::GLDrawContext *GetDrawContext() const {
return _drawContext;
}
void SetRestPosition(std::vector<float> const &restPosition) {
_restPosition = restPosition;
}
std::vector<float> const &GetRestPosition() const {
return _restPosition;
}
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
int GetNumVertices() const { // total (control + refined)
return _numVertices;
}
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
int GetNumControlVertices() const {
return _numControlVertices;
}
protected:
TopologyBase(Far::PatchTables const * patchTables) {
_drawContext = Osd::GLDrawContext::Create(patchTables);
}
int _numVertices;
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
int _numControlVertices;
private:
Osd::GLDrawContext *_drawContext;
std::vector<float> _restPosition;
};
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
template <class EVALUATOR,
class VERTEX_BUFFER,
class STENCIL_TABLES,
class DEVICE_CONTEXT=void>
class Topology : public TopologyBase {
public:
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
typedef EVALUATOR Evaluator;
typedef STENCIL_TABLES StencilTables;
typedef DEVICE_CONTEXT DeviceContext;
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
typedef Osd::EvaluatorCacheT<Evaluator> EvaluatorCache;
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
Topology(Far::PatchTables const * patchTables,
Far::StencilTables const * vertexStencils, //XXX: takes ownership
Far::StencilTables const * varyingStencils,
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
int numControlVertices,
EvaluatorCache * evaluatorCache = NULL,
DeviceContext * deviceContext = NULL)
: TopologyBase(patchTables),
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
_evaluatorCache(evaluatorCache),
_deviceContext(deviceContext) {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
_numControlVertices = numControlVertices;
_numVertices = numControlVertices + vertexStencils->GetNumStencils();
_vertexStencils = Osd::convertToCompatibleStencilTables<StencilTables>(
vertexStencils, deviceContext);
_varyingStencils = Osd::convertToCompatibleStencilTables<StencilTables>(
varyingStencils, deviceContext);
}
~Topology() {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
delete _vertexStencils;
delete _varyingStencils;
}
void Refine(InstancesBase *instance, int numInstances) {
Osd::VertexBufferDescriptor const &globalVertexDesc =
instance->GetVertexDesc();
Osd::VertexBufferDescriptor const &globalVaryingDesc =
instance->GetVaryingDesc();
Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *typedInstance =
static_cast<Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *>(instance);
for (int i = 0; i < numInstances; ++i) {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
Osd::VertexBufferDescriptor vertexSrcDesc(
globalVertexDesc.offset + _numVertices*i*globalVertexDesc.stride,
globalVertexDesc.length,
globalVertexDesc.stride);
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
Osd::VertexBufferDescriptor vertexDstDesc(
globalVertexDesc.offset + (_numVertices*i + _numControlVertices)*globalVertexDesc.stride,
globalVertexDesc.length,
globalVertexDesc.stride);
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
// vertex
Evaluator const *evalInstance = Osd::GetEvaluator<Evaluator>(
_evaluatorCache, vertexSrcDesc, vertexDstDesc, _deviceContext);
Evaluator::EvalStencils(typedInstance->GetVertexBuffer(), vertexSrcDesc,
typedInstance->GetVertexBuffer(), vertexDstDesc,
_vertexStencils,
evalInstance,
_deviceContext);
// varying
if (_varyingStencils) {
Osd::VertexBufferDescriptor varyingSrcDesc(
globalVaryingDesc.offset + _numVertices*i*globalVaryingDesc.stride,
globalVaryingDesc.length,
globalVaryingDesc.stride);
Osd::VertexBufferDescriptor varyingDstDesc(
globalVaryingDesc.offset + (_numVertices*i + _numControlVertices)*globalVaryingDesc.stride,
globalVaryingDesc.length,
globalVaryingDesc.stride);
evalInstance = Osd::GetEvaluator<Evaluator>(
_evaluatorCache, varyingSrcDesc, varyingDstDesc, _deviceContext);
if (typedInstance->GetVaryingBuffer()) {
// non interleaved
Evaluator::EvalStencils(
typedInstance->GetVaryingBuffer(), varyingSrcDesc,
typedInstance->GetVaryingBuffer(), varyingDstDesc,
_varyingStencils,
evalInstance,
_deviceContext);
} else {
// interleaved
Evaluator::EvalStencils(
typedInstance->GetVertexBuffer(), varyingSrcDesc,
typedInstance->GetVertexBuffer(), varyingDstDesc,
_varyingStencils,
evalInstance,
_deviceContext);
}
}
}
}
virtual InstancesBase *CreateInstances(
int numInstances,
Osd::VertexBufferDescriptor const &vertexDesc,
Osd::VertexBufferDescriptor const &varyingDesc,
bool interleaved) {
return new Instances<VERTEX_BUFFER, DEVICE_CONTEXT>(
numInstances, vertexDesc, varyingDesc,
interleaved, _numVertices, _deviceContext);
}
virtual void Synchronize() {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
Evaluator::Synchronize(_deviceContext);
}
virtual void UpdateVertexTexture(InstancesBase *instances) {
Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *typedInstance =
static_cast<Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *>(instances);
GetDrawContext()->UpdateVertexTexture(typedInstance->GetVertexBuffer());
}
private:
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
StencilTables const *_vertexStencils;
StencilTables const *_varyingStencils;
EvaluatorCache * _evaluatorCache;
DeviceContext *_deviceContext;
};
TopologyBase *g_topology = NULL;
InstancesBase *g_instances = NULL;
enum KernelType { kCPU = 0,
kOPENMP = 1,
kTBB = 2,
kCUDA = 3,
kCL = 4,
kGLSL = 5,
kGLSLCompute = 6 };
enum DisplayStyle { kWire = 0,
kShaded,
kWireShaded,
kVarying,
kVaryingInterleaved };
enum HudCheckBox { kHUD_CB_FREEZE };
// GUI variables
int g_displayStyle = kShaded,
g_adaptive = 0,
g_mbutton[3] = {0, 0, 0},
g_freeze = 0,
g_running = 1;
float g_rotate[2] = {0, 0},
g_dolly = 5,
g_pan[2] = {0, 0},
g_center[3] = {0, 0, 0},
g_size = 0;
int g_prev_x = 0,
g_prev_y = 0;
int g_width = 1024,
g_height = 1024;
GLhud g_hud;
// performance
float g_cpuTime = 0;
float g_gpuTime = 0;
Stopwatch g_fpsTimer;
int g_level = 2;
int g_tessLevel = 1;
int g_tessLevelMin = 1;
int g_numInstances = 25;
int g_frame = 0;
int g_kernel = kCPU;
GLuint g_transformUB = 0,
g_transformBinding = 0,
g_tessellationUB = 0,
g_tessellationBinding = 0,
g_lightingUB = 0,
g_lightingBinding = 0;
struct Transform {
float ModelViewMatrix[16];
float ProjectionMatrix[16];
float ModelViewProjectionMatrix[16];
} g_transformData;
GLuint g_queries[2] = {0, 0};
GLuint g_vao = 0;
static void
checkGLErrors(std::string const & where = "") {
GLuint err;
while ((err = glGetError()) != GL_NO_ERROR) {
std::cerr << "GL error: "
<< (where.empty() ? "" : where + " ")
<< err << "\n";
}
}
//------------------------------------------------------------------------------
struct SimpleShape {
std::string name;
Scheme scheme;
std::string data;
SimpleShape() { }
SimpleShape( std::string const & idata, char const * iname, Scheme ischeme )
: name(iname), scheme(ischeme), data(idata) { }
};
//------------------------------------------------------------------------------
static void
updateGeom() {
std::vector<float> const &restPosition = g_topology->GetRestPosition();
int nverts = (int)restPosition.size()/3;
int numVertexElements = (g_displayStyle == kVaryingInterleaved ? 7 : 3);
int numVaryingElements = (g_displayStyle == kVarying ? 4 : 0);
std::vector<float> vertex(numVertexElements * nverts);
std::vector<float> varying(numVaryingElements * nverts);
int column = (int)ceil(sqrt((float)g_numInstances));
for (int i = 0; i < g_numInstances; ++i) {
float *d = &vertex[0];
const float *p = &restPosition[0];
for (int j = 0; j < nverts; ++j) {
*d++ = p[0] + i%column - 0.5f*(column-1);
*d++ = p[1] + i/column - 0.5f*(column-1);
*d++ = p[2] * (float)(1+sin(0.1f*g_frame + i));
p += 3;
if (g_displayStyle == kVaryingInterleaved) {
*d++ = (1+(float)sin(0.1f*g_frame + i)) * 0.5f;
*d++ = 1;
*d++ = 1;
*d++ = 1.0;
}
}
g_instances->UpdateVertexBuffer(i, vertex);
if (g_displayStyle == kVarying) {
float *d = &varying[0];
for (int j = 0; j < nverts; ++j) {
*d++ = 1;
*d++ = (1+(float)sin(0.1f*g_frame + i)) * 0.5f;
*d++ = 1;
*d++ = 1.0;
}
g_instances->UpdateVaryingBuffer(i, varying);
}
}
}
static void
refine() {
Stopwatch s;
s.Start();
g_topology->Refine(g_instances, g_numInstances);
s.Stop();
g_cpuTime = float(s.GetElapsed() * 1000.0f);
s.Start();
g_topology->Synchronize();
s.Stop();
g_gpuTime = float(s.GetElapsed() * 1000.0f);
s.Stop();
}
//------------------------------------------------------------------------------
static TopologyBase *
createOsdMesh( const std::string &shapeStr, int level, Scheme scheme=kCatmark ) {
checkGLErrors("create osd enter");
Shape * shape = Shape::parseObj(shapeStr.c_str(), scheme);
std::vector<float> restPosition(shape->verts);
Far::TopologyRefiner * refiner = 0;
{
Sdc::SchemeType type = GetSdcType(*shape);
Sdc::Options options = GetSdcOptions(*shape);
refiner = Far::TopologyRefinerFactory<Shape>::Create(*shape,
Far::TopologyRefinerFactory<Shape>::Options(type, options));
assert(refiner);
}
// material assignment
std::vector<int> idsOnPtexFaces;
{
int numFaces = refiner->GetNumFaces(0);
// first, assign material ID to each coarse face
std::vector<int> idsOnCoarseFaces;
for (int i = 0; i < numFaces; ++i) {
int materialID = i%6;
idsOnCoarseFaces.push_back(materialID);
}
// create ptex index to coarse face index mapping
Far::PtexIndices ptexIndices(*refiner);
int numPtexFaces = ptexIndices.GetNumFaces();
// XXX: duped logic to simpleHbr
std::vector<int> ptexIndexToFaceMapping(numPtexFaces);
int ptexIndex = 0;
for (int face=0; face < numFaces; ++face) {
ptexIndexToFaceMapping[ptexIndex++] = face;
Far::ConstIndexArray fverts = refiner->GetFaceVertices(0, face);
if ( (scheme==kCatmark or scheme==kBilinear) and fverts.size() != 4 ) {
for (int j = 0; j < (fverts.size()-1); ++j) {
ptexIndexToFaceMapping[ptexIndex++] = face;
}
}
}
// convert ID array from coarse face index space to ptex index space
for (int i = 0; i < numPtexFaces; ++i) {
idsOnPtexFaces.push_back(idsOnCoarseFaces[ptexIndexToFaceMapping[i]]);
}
}
// Adaptive refinement currently supported only for catmull-clark scheme
bool doAdaptive = (g_adaptive!=0 and scheme==kCatmark);
if (doAdaptive) {
Far::TopologyRefiner::AdaptiveOptions options(level);
refiner->RefineAdaptive(options);
} else {
Far::TopologyRefiner::UniformOptions options(level);
options.fullTopologyInLastLevel = true;
refiner->RefineUniform(options);
}
Far::StencilTables const * vertexStencils=0, * varyingStencils=0;
{
Far::StencilTablesFactory::Options options;
options.generateOffsets = true;
options.generateIntermediateLevels = doAdaptive ? true : false;
vertexStencils = Far::StencilTablesFactory::Create(*refiner, options);
if (g_displayStyle==kVarying or g_displayStyle==kVaryingInterleaved) {
varyingStencils = Far::StencilTablesFactory::Create(*refiner, options);
}
assert(vertexStencils);
}
Far::PatchTables const * patchTables = NULL;
{
Far::PatchTablesFactory::Options poptions(level);
patchTables = Far::PatchTablesFactory::Create(*refiner, poptions);
}
// append gregory vertices into stencils
{
if (Far::StencilTables const *vertexStencilsWithEndCap =
Far::StencilTablesFactory::AppendEndCapStencilTables(
*refiner,
vertexStencils,
patchTables->GetEndCapVertexStencilTables())) {
delete vertexStencils;
vertexStencils = vertexStencilsWithEndCap;
}
if (varyingStencils) {
if (Far::StencilTables const *varyingStencilsWithEndCap =
Far::StencilTablesFactory::AppendEndCapStencilTables(
*refiner,
varyingStencils,
patchTables->GetEndCapVaryingStencilTables())) {
delete varyingStencils;
varyingStencils = varyingStencilsWithEndCap;
}
}
}
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
int numControlVertices = refiner->GetNumVertices(0);
// create partitioned patcharray
TopologyBase *topology = NULL;
if (g_kernel == kCPU) {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
topology = new Topology<Osd::CpuEvaluator,
Osd::CpuGLVertexBuffer,
Far::StencilTables>(
patchTables,
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
vertexStencils, varyingStencils,
numControlVertices);
#ifdef OPENSUBDIV_HAS_OPENMP
} else if (g_kernel == kOPENMP) {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
topology = new Topology<Osd::OmpEvaluator,
Osd::CpuGLVertexBuffer,
Far::StencilTables>(
patchTables,
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
vertexStencils, varyingStencils,
numControlVertices);
#endif
#ifdef OPENSUBDIV_HAS_TBB
} else if (g_kernel == kTBB) {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
topology = new Topology<Osd::TbbEvaluator,
Osd::CpuGLVertexBuffer,
Far::StencilTables>(
patchTables,
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
vertexStencils, varyingStencils,
numControlVertices);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if (g_kernel == kCUDA) {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
topology = new Topology<Osd::CudaEvaluator,
Osd::CudaGLVertexBuffer,
Osd::CudaStencilTables>(
patchTables,
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
vertexStencils, varyingStencils,
numControlVertices);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
} else if (g_kernel == kCL) {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
static Osd::EvaluatorCacheT<Osd::CLEvaluator> clEvaluatorCache;
topology = new Topology<Osd::CLEvaluator,
Osd::CLGLVertexBuffer,
Osd::CLStencilTables,
CLDeviceContext>(
patchTables,
vertexStencils, varyingStencils,
numControlVertices,
&clEvaluatorCache,
&g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
} else if (g_kernel == kGLSL) {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
static Osd::EvaluatorCacheT<Osd::GLXFBEvaluator> glXFBEvaluatorCache;
topology = new Topology<Osd::GLXFBEvaluator,
Osd::GLVertexBuffer,
Osd::GLStencilTablesTBO>(
patchTables,
vertexStencils, varyingStencils,
numControlVertices);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
} else if (g_kernel == kGLSLCompute) {
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
static Osd::EvaluatorCacheT<Osd::GLComputeEvaluator> glComputeEvaluatorCache;
topology = new Topology<Osd::GLComputeEvaluator,
Osd::GLVertexBuffer,
Osd::GLStencilTablesSSBO>(
patchTables,
vertexStencils, varyingStencils,
numControlVertices);
#endif
} else {
}
delete refiner;
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
// XXX: Weired API. think again..
/// delete vertexStencils;
/// delete varyingStencils;
delete patchTables;
// centering rest position
float min[3] = { FLT_MAX, FLT_MAX, FLT_MAX};
float max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
float center[3];
for (size_t i=0; i < restPosition.size()/3; ++i) {
for (int j=0; j<3; ++j) {
float v = restPosition[i*3+j];
min[j] = std::min(min[j], v);
max[j] = std::max(max[j], v);
}
}
for (int j=0; j<3; ++j) center[j] = (min[j] + max[j]) * 0.5f;
for (size_t i=0; i < restPosition.size()/3; ++i) {
restPosition[i*3+0] -= center[0];
restPosition[i*3+1] -= center[1];
restPosition[i*3+2] -= min[2];
}
// save rest position
topology->SetRestPosition(restPosition);
return topology;
}
//------------------------------------------------------------------------------
static void
fitFrame() {
g_pan[0] = g_pan[1] = 0;
g_dolly = g_size;
}
//------------------------------------------------------------------------------
union Effect {
Effect(int displayStyle_) : value(0) {
displayStyle = displayStyle_;
}
struct {
unsigned int displayStyle:3;
};
int value;
bool operator < (const Effect &e) const {
return value < e.value;
}
};
struct EffectDesc {
EffectDesc(OpenSubdiv::Far::PatchDescriptor desc,
Effect effect) : desc(desc), effect(effect),
maxValence(0), numElements(0) { }
OpenSubdiv::Far::PatchDescriptor desc;
Effect effect;
int maxValence;
int numElements;
bool operator < (const EffectDesc &e) const {
return desc < e.desc || (desc == e.desc &&
(maxValence < e.maxValence || ((maxValence == e.maxValence) &&
(effect < e.effect))));
}
};
class EffectDrawRegistry : public Osd::GLDrawRegistry<EffectDesc> {
protected:
virtual ConfigType *
_CreateDrawConfig(EffectDesc const & desc, SourceConfigType const * sconfig);
virtual SourceConfigType *
_CreateDrawSourceConfig(EffectDesc const & desc);
};
EffectDrawRegistry::SourceConfigType *
EffectDrawRegistry::_CreateDrawSourceConfig(EffectDesc const & effectDesc) {
Effect effect = effectDesc.effect;
SourceConfigType * sconfig =
BaseRegistry::_CreateDrawSourceConfig(effectDesc.desc);
assert(sconfig);
#if defined(GL_ARB_tessellation_shader) || defined(GL_VERSION_4_0)
const char *glslVersion = "#version 400\n";
#else
const char *glslVersion = "#version 330\n";
#endif
// legacy gregory patch requires OSD_MAX_VALENCE and OSD_NUM_ELEMENTS defined
if (effectDesc.desc.GetType() == Far::PatchDescriptor::GREGORY or
effectDesc.desc.GetType() == Far::PatchDescriptor::GREGORY_BOUNDARY) {
std::ostringstream ss;
ss << effectDesc.maxValence;
sconfig->commonShader.AddDefine("OSD_MAX_VALENCE", ss.str());
ss.str("");
ss << effectDesc.numElements;
sconfig->commonShader.AddDefine("OSD_NUM_ELEMENTS", ss.str());
}
if (effectDesc.desc.GetType() == Far::PatchDescriptor::QUADS or
effectDesc.desc.GetType() == Far::PatchDescriptor::TRIANGLES) {
sconfig->vertexShader.source = shaderSource;
sconfig->vertexShader.version = glslVersion;
sconfig->vertexShader.AddDefine("VERTEX_SHADER");
} else {
sconfig->geometryShader.AddDefine("SMOOTH_NORMALS");
}
sconfig->geometryShader.source = shaderSource;
sconfig->geometryShader.version = glslVersion;
sconfig->geometryShader.AddDefine("GEOMETRY_SHADER");
sconfig->fragmentShader.source = shaderSource;
sconfig->fragmentShader.version = glslVersion;
sconfig->fragmentShader.AddDefine("FRAGMENT_SHADER");
if (effectDesc.desc.GetType() == Far::PatchDescriptor::QUADS) {
// uniform catmark, bilinear
sconfig->geometryShader.AddDefine("PRIM_QUAD");
sconfig->fragmentShader.AddDefine("PRIM_QUAD");
sconfig->commonShader.AddDefine("UNIFORM_SUBDIVISION");
} else if (effectDesc.desc.GetType() == Far::PatchDescriptor::TRIANGLES) {
// uniform loop
sconfig->geometryShader.AddDefine("PRIM_TRI");
sconfig->fragmentShader.AddDefine("PRIM_TRI");
sconfig->commonShader.AddDefine("LOOP");
sconfig->commonShader.AddDefine("UNIFORM_SUBDIVISION");
} else {
// adaptive
sconfig->vertexShader.source = shaderSource + sconfig->vertexShader.source;
sconfig->tessControlShader.source = shaderSource + sconfig->tessControlShader.source;
sconfig->tessEvalShader.source = shaderSource + sconfig->tessEvalShader.source;
sconfig->geometryShader.AddDefine("PRIM_TRI");
sconfig->fragmentShader.AddDefine("PRIM_TRI");
}
switch (effect.displayStyle) {
case kWire:
sconfig->commonShader.AddDefine("GEOMETRY_OUT_WIRE");
break;
case kWireShaded:
sconfig->commonShader.AddDefine("GEOMETRY_OUT_LINE");
break;
case kShaded:
sconfig->commonShader.AddDefine("GEOMETRY_OUT_FILL");
break;
case kVarying:
sconfig->commonShader.AddDefine("VARYING_COLOR");
sconfig->commonShader.AddDefine("GEOMETRY_OUT_FILL");
break;
case kVaryingInterleaved:
sconfig->commonShader.AddDefine("VARYING_COLOR");
sconfig->commonShader.AddDefine("GEOMETRY_OUT_FILL");
break;
}
return sconfig;
}
EffectDrawRegistry::ConfigType *
EffectDrawRegistry::_CreateDrawConfig(
DescType const & desc,
SourceConfigType const * sconfig) {
ConfigType * config = BaseRegistry::_CreateDrawConfig(desc.desc, sconfig);
assert(config);
GLuint uboIndex;
// XXXdyu can use layout(binding=) with GLSL 4.20 and beyond
g_transformBinding = 0;
uboIndex = glGetUniformBlockIndex(config->program, "Transform");
if (uboIndex != GL_INVALID_INDEX)
glUniformBlockBinding(config->program, uboIndex, g_transformBinding);
g_tessellationBinding = 1;
uboIndex = glGetUniformBlockIndex(config->program, "Tessellation");
if (uboIndex != GL_INVALID_INDEX)
glUniformBlockBinding(config->program, uboIndex, g_tessellationBinding);
g_lightingBinding = 2;
uboIndex = glGetUniformBlockIndex(config->program, "Lighting");
if (uboIndex != GL_INVALID_INDEX)
glUniformBlockBinding(config->program, uboIndex, g_lightingBinding);
GLint loc;
#if not defined(GL_ARB_separate_shader_objects) || defined(GL_VERSION_4_1)
glUseProgram(config->program);
if ((loc = glGetUniformLocation(config->program, "OsdVertexBuffer")) != -1) {
glUniform1i(loc, 0); // GL_TEXTURE0
}
if ((loc = glGetUniformLocation(config->program, "OsdValenceBuffer")) != -1) {
glUniform1i(loc, 1); // GL_TEXTURE1
}
if ((loc = glGetUniformLocation(config->program, "OsdQuadOffsetBuffer")) != -1) {
glUniform1i(loc, 2); // GL_TEXTURE2
}
if ((loc = glGetUniformLocation(config->program, "OsdPatchParamBuffer")) != -1) {
glUniform1i(loc, 3); // GL_TEXTURE3
}
if ((loc = glGetUniformLocation(config->program, "OsdFVarDataBuffer")) != -1) {
glUniform1i(loc, 4); // GL_TEXTURE4
}
#else
if ((loc = glGetUniformLocation(config->program, "OsdVertexBuffer")) != -1) {
glProgramUniform1i(config->program, loc, 0); // GL_TEXTURE0
}
if ((loc = glGetUniformLocation(config->program, "OsdValenceBuffer")) != -1) {
glProgramUniform1i(config->program, loc, 1); // GL_TEXTURE1
}
if ((loc = glGetUniformLocation(config->program, "OsdQuadOffsetBuffer")) != -1) {
glProgramUniform1i(config->program, loc, 2); // GL_TEXTURE2
}
if ((loc = glGetUniformLocation(config->program, "OsdPatchParamBuffer")) != -1) {
glProgramUniform1i(config->program, loc, 3); // GL_TEXTURE3
}
if ((loc = glGetUniformLocation(config->program, "OsdFVarDataBuffer")) != -1) {
glProgramUniform1i(config->program, loc, 4); // GL_TEXTURE4
}
#endif
return config;
}
EffectDrawRegistry effectRegistry;
static Effect
GetEffect() {
return Effect(g_displayStyle);
}
//------------------------------------------------------------------------------
static GLuint
bindProgram(Effect effect, Osd::DrawContext::PatchArray const & patch) {
EffectDesc effectDesc(patch.GetDescriptor(), effect);
// only legacy gregory needs maxValence and numElements
int maxValence = g_topology->GetDrawContext()->GetMaxValence();
int numElements = (g_displayStyle == kVaryingInterleaved ? 7 : 3);
typedef OpenSubdiv::Far::PatchDescriptor Descriptor;
if (patch.GetDescriptor().GetType() == Descriptor::GREGORY or
patch.GetDescriptor().GetType() == Descriptor::GREGORY_BOUNDARY) {
effectDesc.maxValence = maxValence;
effectDesc.numElements = numElements;
}
EffectDrawRegistry::ConfigType *
config = effectRegistry.GetDrawConfig(effectDesc);
GLuint program = config->program;
glUseProgram(program);
if (! g_transformUB) {
glGenBuffers(1, &g_transformUB);
glBindBuffer(GL_UNIFORM_BUFFER, g_transformUB);
glBufferData(GL_UNIFORM_BUFFER,
sizeof(g_transformData), NULL, GL_STATIC_DRAW);
};
glBindBuffer(GL_UNIFORM_BUFFER, g_transformUB);
glBufferSubData(GL_UNIFORM_BUFFER,
0, sizeof(g_transformData), &g_transformData);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
glBindBufferBase(GL_UNIFORM_BUFFER, g_transformBinding, g_transformUB);
// Update and bind tessellation state
struct Tessellation {
float TessLevel;
} tessellationData;
tessellationData.TessLevel = static_cast<float>(1 << g_tessLevel);
if (! g_tessellationUB) {
glGenBuffers(1, &g_tessellationUB);
glBindBuffer(GL_UNIFORM_BUFFER, g_tessellationUB);
glBufferData(GL_UNIFORM_BUFFER,
sizeof(tessellationData), NULL, GL_STATIC_DRAW);
};
glBindBuffer(GL_UNIFORM_BUFFER, g_tessellationUB);
glBufferSubData(GL_UNIFORM_BUFFER,
0, sizeof(tessellationData), &tessellationData);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
glBindBufferBase(GL_UNIFORM_BUFFER, g_tessellationBinding, g_tessellationUB);
// Update and bind lighting state
struct Lighting {
struct Light {
float position[4];
float ambient[4];
float diffuse[4];
float specular[4];
} lightSource[2];
} lightingData = {
{{ { 0.5, 0.2f, 1.0f, 0.0f },
{ 0.1f, 0.1f, 0.1f, 1.0f },
{ 0.7f, 0.7f, 0.7f, 1.0f },
{ 0.8f, 0.8f, 0.8f, 1.0f } },
{ { -0.8f, 0.4f, -1.0f, 0.0f },
{ 0.0f, 0.0f, 0.0f, 1.0f },
{ 0.5f, 0.5f, 0.5f, 1.0f },
{ 0.8f, 0.8f, 0.8f, 1.0f } }}
};
if (! g_lightingUB) {
glGenBuffers(1, &g_lightingUB);
glBindBuffer(GL_UNIFORM_BUFFER, g_lightingUB);
glBufferData(GL_UNIFORM_BUFFER,
sizeof(lightingData), NULL, GL_STATIC_DRAW);
};
glBindBuffer(GL_UNIFORM_BUFFER, g_lightingUB);
glBufferSubData(GL_UNIFORM_BUFFER,
0, sizeof(lightingData), &lightingData);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
glBindBufferBase(GL_UNIFORM_BUFFER, g_lightingBinding, g_lightingUB);
Osd::GLDrawContext *drawContext = g_topology->GetDrawContext();
if (drawContext->GetVertexTextureBuffer()) {
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_BUFFER,
drawContext->GetVertexTextureBuffer());
}
if (drawContext->GetVertexValenceTextureBuffer()) {
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_BUFFER,
drawContext->GetVertexValenceTextureBuffer());
}
if (drawContext->GetQuadOffsetsTextureBuffer()) {
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_BUFFER,
drawContext->GetQuadOffsetsTextureBuffer());
}
if (drawContext->GetPatchParamTextureBuffer()) {
glActiveTexture(GL_TEXTURE3);
glBindTexture(GL_TEXTURE_BUFFER,
drawContext->GetPatchParamTextureBuffer());
}
if (drawContext->GetFvarDataTextureBuffer()) {
glActiveTexture(GL_TEXTURE4);
glBindTexture(GL_TEXTURE_BUFFER,
drawContext->GetFvarDataTextureBuffer());
}
glActiveTexture(GL_TEXTURE0);
return program;
}
//------------------------------------------------------------------------------
static int
drawPatches(Osd::DrawContext::PatchArrayVector const &patches,
int instanceIndex,
GLfloat const *color) {
int numDrawCalls = 0;
for (int i=0; i<(int)patches.size(); ++i) {
Osd::DrawContext::PatchArray const & patch = patches[i];
Far::PatchDescriptor desc = patch.GetDescriptor();
Far::PatchDescriptor::Type patchType = desc.GetType();
GLenum primType;
switch(patchType) {
case Far::PatchDescriptor::QUADS:
primType = GL_LINES_ADJACENCY;
break;
case Far::PatchDescriptor::TRIANGLES:
primType = GL_TRIANGLES;
break;
default:
#if defined(GL_ARB_tessellation_shader) || defined(GL_VERSION_4_0)
primType = GL_PATCHES;
glPatchParameteri(GL_PATCH_VERTICES, desc.GetNumControlVertices());
#else
primType = GL_POINTS;
#endif
}
#if defined(GL_ARB_tessellation_shader) || defined(GL_VERSION_4_0)
GLuint program = bindProgram(GetEffect(), patch);
GLuint uniformColor =
glGetUniformLocation(program, "diffuseColor");
glProgramUniform4f(program, uniformColor, color[0], color[1], color[2], 1);
GLuint uniformGregoryQuadOffsetBase =
glGetUniformLocation(program, "GregoryQuadOffsetBase");
GLuint uniformPrimitiveIdBase =
glGetUniformLocation(program, "PrimitiveIdBase");
glProgramUniform1i(program, uniformGregoryQuadOffsetBase,
patch.GetQuadOffsetIndex());
glProgramUniform1i(program, uniformPrimitiveIdBase,
patch.GetPatchIndex());
#else
GLuint program = bindProgram(GetEffect(), patch);
GLint uniformPrimitiveIdBase =
glGetUniformLocation(program, "PrimitiveIdBase");
if (uniformPrimitiveIdBase != -1)
glUniform1i(uniformPrimitiveIdBase, patch.GetPatchIndex());
#endif
GLvoid *indices = (void *)(patch.GetVertIndex() * sizeof(unsigned int));
int baseVertex = g_topology->GetNumVertices() * instanceIndex;
glProgramUniform1i(program, glGetUniformLocation(program, "BaseVertex"),
baseVertex);
glDrawElementsBaseVertex(primType,
patch.GetNumIndices(),
GL_UNSIGNED_INT,
indices,
baseVertex);
++numDrawCalls;
}
return numDrawCalls;
}
//------------------------------------------------------------------------------
static void
display() {
g_hud.GetFrameBuffer()->Bind();
Stopwatch s;
s.Start();
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glViewport(0, 0, g_width, g_height);
// prepare view matrix
double aspect = g_width/(double)g_height;
identity(g_transformData.ModelViewMatrix);
translate(g_transformData.ModelViewMatrix, -g_pan[0], -g_pan[1], -g_dolly);
rotate(g_transformData.ModelViewMatrix, g_rotate[1], 1, 0, 0);
rotate(g_transformData.ModelViewMatrix, g_rotate[0], 0, 1, 0);
rotate(g_transformData.ModelViewMatrix, -90, 1, 0, 0);
translate(g_transformData.ModelViewMatrix,
-g_center[0], -g_center[1], -g_center[2]);
perspective(g_transformData.ProjectionMatrix,
45.0f, (float)aspect, 0.01f, 500.0f);
multMatrix(g_transformData.ModelViewProjectionMatrix,
g_transformData.ModelViewMatrix,
g_transformData.ProjectionMatrix);
glEnable(GL_DEPTH_TEST);
// make sure that the vertex buffer is interoped back as a GL resources.
g_instances->BindVertexBuffer();
glBindVertexArray(g_vao);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER,
g_topology->GetDrawContext()->GetPatchIndexBuffer());
if (g_displayStyle == kVarying) {
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, g_instances->BindVertexBuffer());
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 3, 0);
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, g_instances->BindVaryingBuffer());
glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 4, 0);
} else if (g_displayStyle == kVaryingInterleaved) {
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
glBindBuffer(GL_ARRAY_BUFFER, g_instances->BindVertexBuffer());
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 7, 0);
glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 7,
(void*)(sizeof(GLfloat)*3));
} else {
glEnableVertexAttribArray(0);
glBindBuffer(GL_ARRAY_BUFFER, g_instances->BindVertexBuffer());
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof (GLfloat) * 3, 0);
glDisableVertexAttribArray(1);
}
// update vertex buffer to texture for gregory patch drawing.
g_topology->UpdateVertexTexture(g_instances);
Osd::DrawContext::PatchArrayVector const & patches =
g_topology->GetDrawContext()->GetPatchArrays();
int numDrawCalls = 0;
// primitive counting
glBeginQuery(GL_PRIMITIVES_GENERATED, g_queries[0]);
#if defined(GL_VERSION_3_3)
glBeginQuery(GL_TIME_ELAPSED, g_queries[1]);
#endif
// draw instances with same topology
for (int i = 0; i < g_numInstances; ++i) {
GLfloat color[3] = {i/(float)g_numInstances, 0.5, 0.5};
numDrawCalls += drawPatches(patches, i, color);
}
glEndQuery(GL_PRIMITIVES_GENERATED);
#if defined(GL_VERSION_3_3)
glEndQuery(GL_TIME_ELAPSED);
#endif
glBindVertexArray(0);
glUseProgram(0);
s.Stop();
float drawCpuTime = float(s.GetElapsed() * 1000.0f);
GLuint numPrimsGenerated = 0;
GLuint timeElapsed = 0;
glGetQueryObjectuiv(g_queries[0], GL_QUERY_RESULT, &numPrimsGenerated);
#if defined(GL_VERSION_3_3)
glGetQueryObjectuiv(g_queries[1], GL_QUERY_RESULT, &timeElapsed);
#endif
float drawGpuTime = timeElapsed / 1000.0f / 1000.0f;
g_hud.GetFrameBuffer()->ApplyImageShader();
if (g_hud.IsVisible()) {
g_fpsTimer.Stop();
double fps = 1.0/g_fpsTimer.GetElapsed();
g_fpsTimer.Start();
g_hud.DrawString(10, -180, "Tess level : %d", g_tessLevel);
g_hud.DrawString(10, -160, "Primitives : %d", numPrimsGenerated);
g_hud.DrawString(10, -140, "Draw calls : %d", numDrawCalls);
g_hud.DrawString(10, -100, "GPU Compute : %.3f ms", g_gpuTime);
g_hud.DrawString(10, -80, "CPU Compute : %.3f ms", g_cpuTime);
g_hud.DrawString(10, -60, "GPU Draw : %.3f ms", drawGpuTime);
g_hud.DrawString(10, -40, "CPU Draw : %.3f ms", drawCpuTime);
g_hud.DrawString(10, -20, "FPS : %3.1f", fps);
g_hud.Flush();
}
glFinish();
//checkGLErrors("display leave");
}
//------------------------------------------------------------------------------
static void
motion(GLFWwindow *, double dx, double dy) {
int x=(int)dx, y=(int)dy;
if (g_mbutton[0] && !g_mbutton[1] && !g_mbutton[2]) {
// orbit
g_rotate[0] += x - g_prev_x;
g_rotate[1] += y - g_prev_y;
} else if (!g_mbutton[0] && !g_mbutton[1] && g_mbutton[2]) {
// pan
g_pan[0] -= g_dolly*(x - g_prev_x)/g_width;
g_pan[1] += g_dolly*(y - g_prev_y)/g_height;
} else if ((g_mbutton[0] && !g_mbutton[1] && g_mbutton[2]) or
(!g_mbutton[0] && g_mbutton[1] && !g_mbutton[2])) {
// dolly
g_dolly -= g_dolly*0.01f*(x - g_prev_x);
if(g_dolly <= 0.01) g_dolly = 0.01f;
}
g_prev_x = x;
g_prev_y = y;
}
//------------------------------------------------------------------------------
static void
mouse(GLFWwindow *, int button, int state, int /* mods */) {
if (button == 0 && state == GLFW_PRESS && g_hud.MouseClick(g_prev_x, g_prev_y))
return;
if (button < 3) {
g_mbutton[button] = (state == GLFW_PRESS);
}
}
//------------------------------------------------------------------------------
static void
uninitGL() {
glDeleteQueries(2, g_queries);
glDeleteVertexArrays(1, &g_vao);
if (g_instances)
delete g_instances;
if (g_topology)
delete g_topology;
}
//------------------------------------------------------------------------------
static void
reshape(GLFWwindow *, int width, int height) {
g_width = width;
g_height = height;
int windowWidth = g_width, windowHeight = g_height;
// window size might not match framebuffer size on a high DPI display
glfwGetWindowSize(g_window, &windowWidth, &windowHeight);
g_hud.Rebuild(windowWidth, windowHeight, width, height);
}
//------------------------------------------------------------------------------
void windowClose(GLFWwindow*) {
g_running = false;
}
static void
rebuildInstances() {
delete g_instances;
if (g_displayStyle == kVaryingInterleaved) {
g_instances = g_topology->CreateInstances(
g_numInstances,
Osd::VertexBufferDescriptor(0, 3, 7),
Osd::VertexBufferDescriptor(3, 4, 7),
true);
} else if (g_displayStyle == kVarying) {
g_instances = g_topology->CreateInstances(
g_numInstances,
Osd::VertexBufferDescriptor(0, 3, 3),
Osd::VertexBufferDescriptor(0, 4, 4),
false);
} else {
g_instances = g_topology->CreateInstances(
g_numInstances,
Osd::VertexBufferDescriptor(0, 3, 3),
Osd::VertexBufferDescriptor(0, 0, 0),
false);
}
updateGeom();
refine();
}
static void
rebuildOsdMesh() {
static SimpleShape g_modelCube =
SimpleShape(catmark_cube, "catmark_cube", kCatmark);
//static SimpleShape g_modelBishop =
// SimpleShape(catmark_bishop, "catmark_bishop", kCatmark);
static SimpleShape g_modelPawn =
SimpleShape(catmark_pawn, "catmark_pawn", kCatmark);
// static SimpleShape g_modelRook =
// SimpleShape(catmark_rook, "catmark_rook", kCatmark);
delete g_topology;
g_topology = createOsdMesh(g_modelPawn.data, g_level);
//g_topology = createOsdMesh(g_modelCube.data, g_level);
rebuildInstances();
}
//------------------------------------------------------------------------------
static void
keyboard(GLFWwindow *, int key, int /* scancode */, int event, int /* mods */) {
if (event == GLFW_RELEASE) return;
if (g_hud.KeyDown(tolower(key))) return;
if (key == 'G') {
g_frame++;
updateGeom();
refine();
}
switch (key) {
case 'Q': g_running = 0; break;
case 'F': fitFrame(); break;
case '+':
case '=': g_tessLevel++; break;
case '-': g_tessLevel = std::max(g_tessLevelMin, g_tessLevel-1); break;
case '.': g_numInstances++; rebuildInstances(); break;
case ',': g_numInstances = std::max(1, g_numInstances-1); rebuildInstances(); break;
case GLFW_KEY_ESCAPE: g_hud.SetVisible(!g_hud.IsVisible()); break;
}
}
//------------------------------------------------------------------------------
static void
callbackKernel(int k) {
g_kernel = k;
#ifdef OPENSUBDIV_HAS_OPENCL
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
if (g_clDeviceContext.Initialize() == false) {
printf("Error in initializing OpenCL\n");
exit(1);
}
}
#endif
#ifdef OPENSUBDIV_HAS_CUDA
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
if (g_cudaDeviceContext.Initialize() == false) {
printf("Error in initializing Cuda\n");
exit(1);
}
}
#endif
rebuildOsdMesh();
}
static void
callbackLevel(int l) {
g_level = l;
rebuildOsdMesh();
}
static void
callbackSlider(float value, int /* data */) {
g_numInstances = (int)value;
rebuildInstances();
}
static void
callbackDisplayStyle(int b) {
g_displayStyle = b;
Refurbish osd layer API. In OpenSubdiv 2.x, we encapsulated subdivision tables into compute context in osd layer since those tables are order-dependent and have to be applied in a certain manner. In 3.0, we adopted stencil table based refinement. It's more simple and such an encapsulation is no longer needed. Also 2.0 API has several ownership issues of GPU kernel caching, and forces unnecessary instantiation of controllers even though the cpu kernels typically don't need instances unlike GPU ones. This change completely revisit osd client facing APIs. All contexts and controllers were replaced with device-specific tables and evaluators. While we can still use consistent API across various device backends, unnecessary complexities have been removed. For example, cpu evaluator is just a set of static functions and also there's no need to replicate FarStencilTables to ComputeContext. Also the new API delegates the ownership of compiled GPU kernels to clients, for the better management of resources especially in multiple GPU environment. In addition to integrating ComputeController and EvalStencilController into a single function Evaluator::EvalStencils(), EvalLimit API is also added into Evaluator. This is working but still in progress, and we'll make a followup change for the complete implementation. -some naming convention changes: GLSLTransformFeedback to GLXFBEvaluator GLSLCompute to GLComputeEvaluator -move LimitLocation struct into examples/glEvalLimit. We're still discussing patch evaluation interface. Basically we'd like to tease all ptex-specific parametrization out of far/osd layer. TODO: -implments EvalPatches() in the right way -derivative evaluation API is still interim. -VertexBufferDescriptor needs a better API to advance its location -synchronization mechanism is not ideal (too global). -OsdMesh class is hacky. need to fix it.
2015-05-09 00:31:26 +00:00
rebuildOsdMesh();
}
static void
callbackAdaptive(bool checked, int /* a */) {
if (Osd::GLDrawContext::SupportsAdaptiveTessellation()) {
g_adaptive = checked;
rebuildOsdMesh();
}
}
static void
callbackCheckBox(bool checked, int button) {
switch (button) {
case kHUD_CB_FREEZE:
g_freeze = checked;
break;
}
}
static void
initHUD() {
int windowWidth = g_width, windowHeight = g_height,
frameBufferWidth = g_width, frameBufferHeight = g_height;
// window size might not match framebuffer size on a high DPI display
glfwGetWindowSize(g_window, &windowWidth, &windowHeight);
glfwGetFramebufferSize(g_window, &frameBufferWidth, &frameBufferHeight);
g_hud.Init(windowWidth, windowHeight, frameBufferWidth, frameBufferHeight);
g_hud.SetFrameBuffer(new GLFrameBuffer);
int shading_pulldown = g_hud.AddPullDown("Shading (W)", 10, 10, 250, callbackDisplayStyle, 'w');
g_hud.AddPullDownButton(shading_pulldown, "Wire", kWire, g_displayStyle==kWire);
g_hud.AddPullDownButton(shading_pulldown, "Shaded", kShaded, g_displayStyle==kShaded);
g_hud.AddPullDownButton(shading_pulldown, "Wire+Shaded", kWireShaded, g_displayStyle==kWireShaded);
g_hud.AddPullDownButton(shading_pulldown, "Varying", kVarying, g_displayStyle==kVarying);
g_hud.AddPullDownButton(shading_pulldown, "Varying(Interleaved)", kVaryingInterleaved, g_displayStyle==kVaryingInterleaved);
g_hud.AddCheckBox("Freeze (spc)", g_freeze != 0,
10, 150, callbackCheckBox, kHUD_CB_FREEZE, ' ');
int compute_pulldown = g_hud.AddPullDown("Compute (K)", 475, 10, 300, callbackKernel, 'k');
g_hud.AddPullDownButton(compute_pulldown, "CPU", kCPU);
#ifdef OPENSUBDIV_HAS_OPENMP
g_hud.AddPullDownButton(compute_pulldown, "OpenMP", kOPENMP);
#endif
#ifdef OPENSUBDIV_HAS_TBB
g_hud.AddPullDownButton(compute_pulldown, "TBB", kTBB);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
if (CLDeviceContext::HAS_CL_VERSION_1_1()) {
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
}
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
g_hud.AddPullDownButton(compute_pulldown, "GLSL TransformFeedback", kGLSL);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
// Must also check at run time for OpenGL 4.3
if (GLEW_VERSION_4_3) {
g_hud.AddPullDownButton(compute_pulldown, "GLSL Compute", kGLSLCompute);
}
#endif
g_hud.AddSlider("Prim counts", 1, 100, 25,
-200, 20, 20, false, callbackSlider, 0);
if (Osd::GLDrawContext::SupportsAdaptiveTessellation())
g_hud.AddCheckBox("Adaptive (`)", g_adaptive!=0, 10, 190, callbackAdaptive, 0, '`');
for (int i = 1; i < 11; ++i) {
char level[16];
sprintf(level, "Lv. %d", i);
g_hud.AddRadioButton(3, level, i==2, 10, 210+i*20, callbackLevel, i, '0'+(i%10));
}
g_hud.Rebuild(windowWidth, windowHeight, frameBufferWidth, frameBufferHeight);
}
//------------------------------------------------------------------------------
static void
initGL() {
glClearColor(0.1f, 0.1f, 0.1f, 0.0f);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LEQUAL);
glCullFace(GL_BACK);
glEnable(GL_CULL_FACE);
glGenQueries(2, g_queries);
glGenVertexArrays(1, &g_vao);
}
//------------------------------------------------------------------------------
static void
idle() {
if (not g_freeze) {
++g_frame;
updateGeom();
refine();
}
}
//------------------------------------------------------------------------------
static void
callbackError(Far::ErrorType err, const char *message) {
printf("Error: %d\n", err);
printf("%s", message);
}
//------------------------------------------------------------------------------
static void
callbackErrorGLFW(int error, const char* description) {
fprintf(stderr, "GLFW Error (%d) : %s\n", error, description);
}
//------------------------------------------------------------------------------
static void
setGLCoreProfile() {
#define glfwOpenWindowHint glfwWindowHint
#define GLFW_OPENGL_VERSION_MAJOR GLFW_CONTEXT_VERSION_MAJOR
#define GLFW_OPENGL_VERSION_MINOR GLFW_CONTEXT_VERSION_MINOR
glfwOpenWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
#if not defined(__APPLE__)
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR, 4);
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 3);
#else
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 2);
#endif
#else
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MAJOR, 3);
glfwOpenWindowHint(GLFW_OPENGL_VERSION_MINOR, 2);
#endif
glfwOpenWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
}
//------------------------------------------------------------------------------
int main(int argc, char ** argv) {
std::string str;
for (int i = 1; i < argc; ++i) {
if (!strcmp(argv[i], "-d")) {
g_level = atoi(argv[++i]);
}
}
Far::SetErrorCallback(callbackError);
glfwSetErrorCallback(callbackErrorGLFW);
if (not glfwInit()) {
printf("Failed to initialize GLFW\n");
return 1;
}
static const char windowTitle[] = "OpenSubdiv face partitioning example";
#define CORE_PROFILE
#ifdef CORE_PROFILE
setGLCoreProfile();
#endif
if (not (g_window=glfwCreateWindow(g_width, g_height, windowTitle, NULL, NULL))) {
printf("Failed to open window.\n");
glfwTerminate();
return 1;
}
glfwMakeContextCurrent(g_window);
// accommocate high DPI displays (e.g. mac retina displays)
glfwGetFramebufferSize(g_window, &g_width, &g_height);
glfwSetFramebufferSizeCallback(g_window, reshape);
glfwSetKeyCallback(g_window, keyboard);
glfwSetCursorPosCallback(g_window, motion);
glfwSetMouseButtonCallback(g_window, mouse);
glfwSetWindowCloseCallback(g_window, windowClose);
#if defined(OSD_USES_GLEW)
#ifdef CORE_PROFILE
// this is the only way to initialize glew correctly under core profile context.
glewExperimental = true;
#endif
if (GLenum r = glewInit() != GLEW_OK) {
printf("Failed to initialize glew. Error = %s\n", glewGetErrorString(r));
exit(1);
}
#ifdef CORE_PROFILE
// clear GL errors which was generated during glewInit()
glGetError();
#endif
#endif
// activate feature adaptive tessellation if OSD supports it
g_adaptive = Osd::GLDrawContext::SupportsAdaptiveTessellation();
initGL();
glfwSwapInterval(0);
initHUD();
rebuildOsdMesh();
while (g_running) {
idle();
display();
glfwPollEvents();
glfwSwapBuffers(g_window);
glFinish();
}
uninitGL();
glfwTerminate();
}
//------------------------------------------------------------------------------