OpenSubdiv/opensubdiv/osd/glXFBEvaluator.cpp
David G. Yu 236afb2c06 Osd Varying and FaceVarying Patch Evaluation
Implemented EvalPatchesVarying and EvalPatchesFaceVarying
methods for Osd::*Evaluator classes, i.e. cpu, omp, tbb,
GLXFB, GLSLCompute, OpenCL, and CUDA.

Also, the GPU Kernel implementations have been updated to use
the common patchBasis implementation instead of re-implementing
methods to compute patch basis weights locally.
2016-09-29 09:56:15 -07:00

631 lines
22 KiB
C++

//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "../osd/glXFBEvaluator.h"
#include "../osd/glslPatchShaderSource.h"
#include <sstream>
#include <string>
#include <vector>
#include <cstdio>
#include "../far/error.h"
#include "../far/stencilTable.h"
#if _MSC_VER
#define snprintf _snprintf
#endif
namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
namespace Osd {
static const char *shaderSource =
#include "../osd/glslXFBKernel.gen.h"
;
template <class T> GLuint
createGLTextureBuffer(std::vector<T> const & src, GLenum type) {
GLint size = static_cast<int>(src.size()*sizeof(T));
void const * ptr = &src.at(0);
GLuint buffer;
glGenBuffers(1, &buffer);
GLuint devicePtr;
glGenTextures(1, &devicePtr);
#if defined(GL_EXT_direct_state_access)
if (glNamedBufferDataEXT && glTextureBufferEXT) {
glNamedBufferDataEXT(buffer, size, ptr, GL_STATIC_DRAW);
glTextureBufferEXT(devicePtr, GL_TEXTURE_BUFFER, type, buffer);
} else {
#else
{
#endif
GLint prev = 0;
glGetIntegerv(GL_ARRAY_BUFFER_BINDING, &prev);
glBindBuffer(GL_ARRAY_BUFFER, buffer);
glBufferData(GL_ARRAY_BUFFER, size, ptr, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, prev);
glGetIntegerv(GL_TEXTURE_BINDING_BUFFER, &prev);
glBindTexture(GL_TEXTURE_BUFFER, devicePtr);
glTexBuffer(GL_TEXTURE_BUFFER, type, buffer);
glBindTexture(GL_TEXTURE_BUFFER, prev);
}
glDeleteBuffers(1, &buffer);
return devicePtr;
}
GLStencilTableTBO::GLStencilTableTBO(
Far::StencilTable const *stencilTable) {
_numStencils = stencilTable->GetNumStencils();
if (_numStencils > 0) {
_sizes = createGLTextureBuffer(stencilTable->GetSizes(), GL_R32UI);
_offsets = createGLTextureBuffer(
stencilTable->GetOffsets(), GL_R32I);
_indices = createGLTextureBuffer(
stencilTable->GetControlIndices(), GL_R32I);
_weights = createGLTextureBuffer(stencilTable->GetWeights(), GL_R32F);
_duWeights = _dvWeights = 0;
} else {
_sizes = _offsets = _indices = _weights = 0;
_duWeights = _dvWeights = 0;
}
}
GLStencilTableTBO::GLStencilTableTBO(
Far::LimitStencilTable const *limitStencilTable) {
_numStencils = limitStencilTable->GetNumStencils();
if (_numStencils > 0) {
_sizes = createGLTextureBuffer(
limitStencilTable->GetSizes(), GL_R32UI);
_offsets = createGLTextureBuffer(
limitStencilTable->GetOffsets(), GL_R32I);
_indices = createGLTextureBuffer(
limitStencilTable->GetControlIndices(), GL_R32I);
_weights = createGLTextureBuffer(
limitStencilTable->GetWeights(), GL_R32F);
_duWeights = createGLTextureBuffer(
limitStencilTable->GetDuWeights(), GL_R32F);
_dvWeights = createGLTextureBuffer(
limitStencilTable->GetDvWeights(), GL_R32F);
} else {
_sizes = _offsets = _indices = _weights = 0;
_duWeights = _dvWeights = 0;
}
}
GLStencilTableTBO::~GLStencilTableTBO() {
if (_sizes) glDeleteTextures(1, &_sizes);
if (_offsets) glDeleteTextures(1, &_offsets);
if (_indices) glDeleteTextures(1, &_indices);
if (_weights) glDeleteTextures(1, &_weights);
if (_duWeights) glDeleteTextures(1, &_duWeights);
if (_dvWeights) glDeleteTextures(1, &_dvWeights);
}
// ---------------------------------------------------------------------------
GLXFBEvaluator::GLXFBEvaluator() : _srcBufferTexture(0) {
}
GLXFBEvaluator::~GLXFBEvaluator() {
if (_srcBufferTexture) {
glDeleteTextures(1, &_srcBufferTexture);
}
}
static GLuint
compileKernel(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
const char *kernelDefine) {
GLuint program = glCreateProgram();
GLuint vertexShader = glCreateShader(GL_VERTEX_SHADER);
std::string patchBasisShaderSource =
GLSLPatchShaderSource::GetPatchBasisShaderSource();
const char *patchBasisShaderSourceDefine = "#define OSD_PATCH_BASIS_GLSL\n";
std::ostringstream defines;
defines << "#define LENGTH " << srcDesc.length << "\n"
<< "#define SRC_STRIDE " << srcDesc.stride << "\n"
<< "#define VERTEX_SHADER\n"
<< kernelDefine << "\n"
<< patchBasisShaderSourceDefine << "\n";
std::string defineStr = defines.str();
const char *shaderSources[4] = {"#version 410\n", NULL, NULL, NULL};
shaderSources[1] = defineStr.c_str();
shaderSources[2] = patchBasisShaderSource.c_str();
shaderSources[3] = shaderSource;
glShaderSource(vertexShader, 4, shaderSources, NULL);
glCompileShader(vertexShader);
glAttachShader(program, vertexShader);
std::vector<std::string> outputs;
char attrName[32];
{
// vertex data (may include custom vertex data) and varying data
// are stored into the same buffer, interleaved.
//
// (gl_SkipComponents1)
// outVertexData[0]
// outVertexData[1]
// outVertexData[2]
// (gl_SkipComponents1)
//
// note that "primvarOffset" in shader is still needed to read
// interleaved components even if gl_SkipComponents is used.
//
int primvarOffset = (dstDesc.offset % dstDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < dstDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outVertexBuffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + dstDesc.length; i < dstDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
}
if (duDesc.length) {
//
// For derivatives, we use another buffer bindings so gl_NextBuffer
// is inserted here to switch the destination of transform feedback.
//
// Note that the destination buffers may or may not be shared between
// vertex and each derivatives. gl_NextBuffer seems still works well
// in either case.
//
outputs.push_back("gl_NextBuffer");
int primvarOffset = (duDesc.offset % duDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < duDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDuBuffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + duDesc.length; i < duDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
}
if (dvDesc.length) {
outputs.push_back("gl_NextBuffer");
int primvarOffset = (dvDesc.offset % dvDesc.stride);
for (int i = 0; i < primvarOffset; ++i) {
outputs.push_back("gl_SkipComponents1");
}
for (int i = 0; i < dvDesc.length; ++i) {
snprintf(attrName, sizeof(attrName), "outDvBuffer[%d]", i);
outputs.push_back(attrName);
}
for (int i = primvarOffset + dvDesc.length; i < dvDesc.stride; ++i) {
outputs.push_back("gl_SkipComponents1");
}
}
// convert to char* array
std::vector<const char *> pOutputs;
for (size_t i = 0; i < outputs.size(); ++i) {
pOutputs.push_back(&outputs[i][0]);
}
glTransformFeedbackVaryings(program, (GLsizei)outputs.size(),
&pOutputs[0], GL_INTERLEAVED_ATTRIBS);
GLint linked = 0;
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &linked);
if (linked == GL_FALSE) {
char buffer[1024];
glGetShaderInfoLog(vertexShader, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glGetProgramInfoLog(program, 1024, NULL, buffer);
Far::Error(Far::FAR_RUNTIME_ERROR, buffer);
glDeleteProgram(program);
program = 0;
}
glDeleteShader(vertexShader);
return program;
}
bool
GLXFBEvaluator::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc) {
// create a stencil kernel
_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc);
// create a patch kernel
_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc);
// create a texture for input buffer
if (!_srcBufferTexture) {
glGenTextures(1, &_srcBufferTexture);
}
return true;
}
/* static */
void
GLXFBEvaluator::Synchronize(void * /*kernel*/) {
// XXX: this is currently just for the test purpose.
// need to be reimplemented by fence and sync.
glFinish();
}
static void
bindTexture(GLint sampler, GLuint texture, int unit) {
if (sampler == -1) {
return;
}
glUniform1i(sampler, unit);
glActiveTexture(GL_TEXTURE0 + unit);
glBindTexture(GL_TEXTURE_BUFFER, texture);
glActiveTexture(GL_TEXTURE0);
}
bool
GLXFBEvaluator::EvalStencils(
GLuint srcBuffer, BufferDescriptor const &srcDesc,
GLuint dstBuffer, BufferDescriptor const &dstDesc,
GLuint duBuffer, BufferDescriptor const &duDesc,
GLuint dvBuffer, BufferDescriptor const &dvDesc,
GLuint sizesTexture,
GLuint offsetsTexture,
GLuint indicesTexture,
GLuint weightsTexture,
GLuint duWeightsTexture,
GLuint dvWeightsTexture,
int start, int end) const {
if (!_stencilKernel.program) return false;
int count = end - start;
if (count <= 0) {
return true;
}
// bind vertex array
// always create new one, to be safe with multiple contexts (slow though)
GLuint vao = 0;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glEnable(GL_RASTERIZER_DISCARD);
glUseProgram(_stencilKernel.program);
// Set input VBO as a texture buffer.
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
bindTexture(_stencilKernel.uniformSrcBufferTexture, _srcBufferTexture, 0);
// bind stencil table textures.
bindTexture(_stencilKernel.uniformSizesTexture, sizesTexture, 1);
bindTexture(_stencilKernel.uniformOffsetsTexture, offsetsTexture, 2);
bindTexture(_stencilKernel.uniformIndicesTexture, indicesTexture, 3);
bindTexture(_stencilKernel.uniformWeightsTexture, weightsTexture, 4);
if (_stencilKernel.uniformDuWeightsTexture >= 0 && duWeightsTexture)
bindTexture(_stencilKernel.uniformDuWeightsTexture, duWeightsTexture, 5);
if (_stencilKernel.uniformDvWeightsTexture >= 0 && dvWeightsTexture)
bindTexture(_stencilKernel.uniformDvWeightsTexture, dvWeightsTexture, 6);
// set batch range
glUniform1i(_stencilKernel.uniformStart, start);
glUniform1i(_stencilKernel.uniformEnd, end);
glUniform1i(_stencilKernel.uniformSrcOffset, srcDesc.offset);
// The destination buffer is bound at vertex boundary.
//
// Example: When we have a batched and interleaved vertex buffer
//
// Obj X | Obj Y |
// -----------+-------------------------------------------+-------
// | vtx 0 | vtx 1 | |
// -----------+---------------+---------------+-----------+-------
// | x y z r g b a | x y z r g b a | .... |
// -----------+---------------+---------------+-----------+-------
// ^
// srcDesc.offset for Obj Y color
//
// ^-------------------------------------------^
// XFB destination buffer range
// S S S * * * *
// k k k
// i i i
// p p p
//
// We use gl_SkipComponents to skip the first 3 XYZ so the
// buffer itself needs to be bound for entire section of ObjY.
//
// Note that for the source buffer (texture) we bind the whole
// buffer (all VBO range) and use srcOffset=srcDesc.offset for
// indexing.
//
int dstBufferBindOffset = dstDesc.stride ?
(dstDesc.offset - (dstDesc.offset % dstDesc.stride)) : 0;
int duBufferBindOffset = duDesc.stride ?
(duDesc.offset - (duDesc.offset % duDesc.stride)) : 0;
int dvBufferBindOffset = dvDesc.stride ?
(dvDesc.offset - (dvDesc.offset % dvDesc.stride)) : 0;
// bind destination buffer
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
0, dstBuffer,
dstBufferBindOffset * sizeof(float),
count * dstDesc.stride * sizeof(float));
if (duDesc.length > 0) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
1, duBuffer,
duBufferBindOffset * sizeof(float),
count * duDesc.stride * sizeof(float));
}
if (dvDesc.length > 0) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
2, dvBuffer,
dvBufferBindOffset * sizeof(float),
count * dvDesc.stride * sizeof(float));
}
glBeginTransformFeedback(GL_POINTS);
glDrawArrays(GL_POINTS, 0, count);
glEndTransformFeedback();
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
for (int i = 0; i < 5; ++i) {
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
glDisable(GL_RASTERIZER_DISCARD);
glUseProgram(0);
glActiveTexture(GL_TEXTURE0);
// revert vao
glBindVertexArray(0);
glDeleteVertexArrays(1, &vao);
return true;
}
bool
GLXFBEvaluator::EvalPatches(
GLuint srcBuffer, BufferDescriptor const &srcDesc,
GLuint dstBuffer, BufferDescriptor const &dstDesc,
GLuint duBuffer, BufferDescriptor const &duDesc,
GLuint dvBuffer, BufferDescriptor const &dvDesc,
int numPatchCoords,
GLuint patchCoordsBuffer,
const PatchArrayVector &patchArrays,
GLuint patchIndexTexture,
GLuint patchParamTexture) const {
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
if (!_patchKernel.program) return false;
// bind vertex array
// always create new one, to be safe with multiple contexts (slow though)
GLuint vao = 0;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glEnable(GL_RASTERIZER_DISCARD);
glUseProgram(_patchKernel.program);
// Set input VBO as a texture buffer.
glBindTexture(GL_TEXTURE_BUFFER, _srcBufferTexture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R32F, srcBuffer);
glBindTexture(GL_TEXTURE_BUFFER, 0);
bindTexture(_patchKernel.uniformSrcBufferTexture, _srcBufferTexture, 0);
// bind patch index and patch param textures.
bindTexture(_patchKernel.uniformPatchParamTexture, patchParamTexture, 1);
bindTexture(_patchKernel.uniformPatchIndexTexture, patchIndexTexture, 2);
// set other uniforms
glUniform4iv(_patchKernel.uniformPatchArray, (int)patchArrays.size(),
(const GLint*)&patchArrays[0]);
glUniform1i(_patchKernel.uniformSrcOffset, srcDesc.offset);
// input patchcoords
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
int stride = sizeof(int) * 5; // patchcoord = int*5 struct
glBindBuffer(GL_ARRAY_BUFFER, patchCoordsBuffer);
glVertexAttribIPointer(0, 3, GL_UNSIGNED_INT, stride, (void*)0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, stride, (void*)(sizeof(int)*3));
int dstBufferBindOffset =
dstDesc.offset - (dstDesc.offset % dstDesc.stride);
int duBufferBindOffset = duDesc.stride
? (duDesc.offset - (duDesc.offset % duDesc.stride))
: 0;
int dvBufferBindOffset = dvDesc.stride
? (dvDesc.offset - (dvDesc.offset % dvDesc.stride))
: 0;
// bind destination buffer
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
0, dstBuffer,
dstBufferBindOffset * sizeof(float),
numPatchCoords * dstDesc.stride * sizeof(float));
if (derivatives) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
1, duBuffer,
duBufferBindOffset * sizeof(float),
numPatchCoords * duDesc.stride * sizeof(float));
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
2, dvBuffer,
dvBufferBindOffset * sizeof(float),
numPatchCoords * dvDesc.stride * sizeof(float));
}
glBeginTransformFeedback(GL_POINTS);
glDrawArrays(GL_POINTS, 0, numPatchCoords);
glEndTransformFeedback();
glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
// unbind textures
for (int i = 0; i < 3; ++i) {
glActiveTexture(GL_TEXTURE0 + i);
glBindTexture(GL_TEXTURE_BUFFER, 0);
}
glDisable(GL_RASTERIZER_DISCARD);
glUseProgram(0);
glActiveTexture(GL_TEXTURE0);
glDisableVertexAttribArray(0);
glDisableVertexAttribArray(1);
// revert vao
glBindVertexArray(0);
glDeleteVertexArrays(1, &vao);
return true;
}
// ---------------------------------------------------------------------------
GLXFBEvaluator::_StencilKernel::_StencilKernel() : program(0) {
}
GLXFBEvaluator::_StencilKernel::~_StencilKernel() {
if (program) {
glDeleteProgram(program);
}
}
bool
GLXFBEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc) {
// create stencil kernel
if (program) {
glDeleteProgram(program);
}
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
const char *kernelDef = derivatives
? "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS\n"
"#define OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES\n"
: "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_STENCILS\n";
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, kernelDef);
if (program == 0) return false;
// cache uniform locations (TODO: use uniform block)
uniformSrcBufferTexture = glGetUniformLocation(program, "vertexBuffer");
uniformSrcOffset = glGetUniformLocation(program, "srcOffset");
uniformSizesTexture = glGetUniformLocation(program, "sizes");
uniformOffsetsTexture = glGetUniformLocation(program, "offsets");
uniformIndicesTexture = glGetUniformLocation(program, "indices");
uniformWeightsTexture = glGetUniformLocation(program, "weights");
uniformDuWeightsTexture = glGetUniformLocation(program, "duWeights");
uniformDvWeightsTexture = glGetUniformLocation(program, "dvWeights");
uniformStart = glGetUniformLocation(program, "batchStart");
uniformEnd = glGetUniformLocation(program, "batchEnd");
return true;
}
// ---------------------------------------------------------------------------
GLXFBEvaluator::_PatchKernel::_PatchKernel() : program(0) {
}
GLXFBEvaluator::_PatchKernel::~_PatchKernel() {
if (program) {
glDeleteProgram(program);
}
}
bool
GLXFBEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc) {
// create stencil kernel
if (program) {
glDeleteProgram(program);
}
bool derivatives = (duDesc.length > 0 || dvDesc.length > 0);
const char *kernelDef = derivatives
? "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES\n"
"#define OPENSUBDIV_GLSL_XFB_USE_DERIVATIVES\n"
: "#define OPENSUBDIV_GLSL_XFB_KERNEL_EVAL_PATCHES\n";
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc, kernelDef);
if (program == 0) return false;
// cache uniform locations
uniformSrcBufferTexture = glGetUniformLocation(program, "vertexBuffer");
uniformSrcOffset = glGetUniformLocation(program, "srcOffset");
uniformPatchArray = glGetUniformLocation(program, "patchArray");
uniformPatchParamTexture = glGetUniformLocation(program, "patchParamBuffer");
uniformPatchIndexTexture = glGetUniformLocation(program, "patchIndexBuffer");
return true;
}
} // end namespace Osd
} // end namespace OPENSUBDIV_VERSION
} // end namespace OpenSubdiv