Improved XFB Evaluation of 1st and 2nd derivatives

Most GL implementations support a maximum of 4 transform
feedback buffer bindings. With the addition of 1st and 2nd
derivative evaluation up to 6 bindings might be required,
i.e. dst, du, dv, duu, duv, dvv.

This change extends the GLXFB Evaluator interface to allow
a client to specialize the evaluator when it is known that
(at least) the 1st derivative and 2nd derivative outputs
are interleaved together into shared buffers.

When this option is used, the maximum number of transform
feedback buffer bindings can be reduced to 3 instead of 6.
This commit is contained in:
David G Yu 2017-01-30 11:42:08 -08:00
parent a410a52f4e
commit 63fdb39912
3 changed files with 104 additions and 36 deletions

View File

@ -151,9 +151,9 @@ GLStencilTableTBO::~GLStencilTableTBO() {
// ---------------------------------------------------------------------------
GLXFBEvaluator::GLXFBEvaluator(bool sharedDerivativeBuffers)
GLXFBEvaluator::GLXFBEvaluator(bool interleavedDerivativeBuffers)
: _srcBufferTexture(0),
_sharedDerivativeBuffers(sharedDerivativeBuffers) {
_interleavedDerivativeBuffers(interleavedDerivativeBuffers) {
}
GLXFBEvaluator::~GLXFBEvaluator() {
@ -171,7 +171,7 @@ compileKernel(BufferDescriptor const &srcDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
const char *kernelDefine,
bool sharedDerivativeBuffers) {
bool interleavedDerivativeBuffers) {
GLuint program = glCreateProgram();
@ -192,16 +192,16 @@ compileKernel(BufferDescriptor const &srcDesc,
bool deriv2 = (duuDesc.length > 0 || duvDesc.length > 0 || dvvDesc.length > 0);
if (deriv1) {
defines << "#define OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES\n";
if (sharedDerivativeBuffers) {
if (interleavedDerivativeBuffers) {
defines <<
"#define OPENSUBDIV_GLSL_XFB_SHARED_1ST_DERIVATIVE_BUFFERS\n";
"#define OPENSUBDIV_GLSL_XFB_INTERLEAVED_1ST_DERIVATIVE_BUFFERS\n";
}
}
if (deriv2) {
defines << "#define OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES\n";
if (sharedDerivativeBuffers) {
if (interleavedDerivativeBuffers) {
defines <<
"#define OPENSUBDIV_GLSL_XFB_SHARED_2ND_DERIVATIVE_BUFFERS\n";
"#define OPENSUBDIV_GLSL_XFB_INTERLEAVED_2ND_DERIVATIVE_BUFFERS\n";
}
}
@ -247,15 +247,15 @@ compileKernel(BufferDescriptor const &srcDesc,
// For derivatives, we use another buffer bindings so gl_NextBuffer
// is inserted here to switch the destination of transform feedback.
//
// Note that the destination buffers may or may not be shared between
// Note that the destination buffers may or may not be interleaved between
// vertex and each derivatives. gl_NextBuffer seems still works well
// in either case.
//
// If we know that the buffers for derivatives are shared, then we
// If we know that the buffers for derivatives are interleaved, then we
// can use fewer buffer bindings. This can be important, since most GL
// implementations will support only up to 4 transform feedback bindings.
//
if (deriv1 && sharedDerivativeBuffers) {
if (deriv1 && interleavedDerivativeBuffers) {
outputs.push_back("gl_NextBuffer");
int primvar1Offset = (duDesc.offset % duDesc.stride);
@ -308,7 +308,7 @@ compileKernel(BufferDescriptor const &srcDesc,
}
}
}
if (deriv2 && sharedDerivativeBuffers) {
if (deriv2 && interleavedDerivativeBuffers) {
outputs.push_back("gl_NextBuffer");
int primvar1Offset = (duuDesc.offset % duuDesc.stride);
@ -428,12 +428,12 @@ GLXFBEvaluator::Compile(BufferDescriptor const &srcDesc,
// create a stencil kernel
_stencilKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
_sharedDerivativeBuffers);
_interleavedDerivativeBuffers);
// create a patch kernel
_patchKernel.Compile(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
_sharedDerivativeBuffers);
_interleavedDerivativeBuffers);
// create a texture for input buffer
if (!_srcBufferTexture) {
@ -600,7 +600,7 @@ GLXFBEvaluator::EvalStencils(
dstBufferBindOffset * sizeof(float),
count * dstDesc.stride * sizeof(float));
if ((duDesc.length > 0) && _sharedDerivativeBuffers) {
if ((duDesc.length > 0) && _interleavedDerivativeBuffers) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
1, duBuffer,
duBufferBindOffset * sizeof(float),
@ -621,7 +621,7 @@ GLXFBEvaluator::EvalStencils(
}
}
if ((duuDesc.length > 0) && _sharedDerivativeBuffers) {
if ((duuDesc.length > 0) && _interleavedDerivativeBuffers) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
2, duuBuffer,
duuBufferBindOffset * sizeof(float),
@ -774,7 +774,7 @@ GLXFBEvaluator::EvalPatches(
dstBufferBindOffset * sizeof(float),
numPatchCoords * dstDesc.stride * sizeof(float));
if (deriv1 && _sharedDerivativeBuffers) {
if (deriv1 && _interleavedDerivativeBuffers) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
1, duBuffer,
duBufferBindOffset * sizeof(float),
@ -790,7 +790,7 @@ GLXFBEvaluator::EvalPatches(
dvBufferBindOffset * sizeof(float),
numPatchCoords * dvDesc.stride * sizeof(float));
}
if (deriv2 && _sharedDerivativeBuffers) {
if (deriv2 && _interleavedDerivativeBuffers) {
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER,
2, duuBuffer,
duuBufferBindOffset * sizeof(float),
@ -856,7 +856,7 @@ GLXFBEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
bool sharedDerivativeBuffers) {
bool interleavedDerivativeBuffers) {
// create stencil kernel
if (program) {
glDeleteProgram(program);
@ -867,7 +867,7 @@ GLXFBEvaluator::_StencilKernel::Compile(BufferDescriptor const &srcDesc,
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
kernelDefines, sharedDerivativeBuffers);
kernelDefines, interleavedDerivativeBuffers);
if (program == 0) return false;
// cache uniform locations (TODO: use uniform block)
@ -906,7 +906,7 @@ GLXFBEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
bool sharedDerivativeBuffers) {
bool interleavedDerivativeBuffers) {
// create stencil kernel
if (program) {
glDeleteProgram(program);
@ -917,7 +917,7 @@ GLXFBEvaluator::_PatchKernel::Compile(BufferDescriptor const &srcDesc,
program = compileKernel(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc,
kernelDefines, sharedDerivativeBuffers);
kernelDefines, interleavedDerivativeBuffers);
if (program == 0) return false;
// cache uniform locations

View File

@ -98,16 +98,73 @@ private:
class GLXFBEvaluator {
public:
typedef bool Instantiatable;
/// Generic creator template.
template <typename DEVICE_CONTEXT>
static GLXFBEvaluator *Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
DEVICE_CONTEXT deviceContext) {
bool interleavedDerivativeBuffers = deviceContext
? deviceContext->AreInterleavedDerivativeBuffers()
: false;
return Create(srcDesc, dstDesc, duDesc, dvDesc,
interleavedDerivativeBuffers);
}
/// Specialization to allow creation without a device context.
static GLXFBEvaluator *Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
void * deviceContext) {
(void)deviceContext; // not used
return Create(srcDesc, dstDesc, duDesc, dvDesc);
}
static GLXFBEvaluator * Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
void * deviceContext = NULL) {
bool interleavedDerivativeBuffers = false) {
GLXFBEvaluator *instance = new GLXFBEvaluator(interleavedDerivativeBuffers);
if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc))
return instance;
delete instance;
return NULL;
}
/// Generic creator template.
template <typename DEVICE_CONTEXT>
static GLXFBEvaluator *Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
DEVICE_CONTEXT deviceContext) {
bool interleavedDerivativeBuffers = deviceContext
? deviceContext->AreInterleavedDerivativeBuffers()
: false;
return Create(srcDesc, dstDesc, duDesc, dvDesc,
BufferDescriptor(),
BufferDescriptor(),
BufferDescriptor(),
deviceContext);
duuDesc, duvDesc, dvvDesc,
interleavedDerivativeBuffers);
}
/// Specialization to allow creation without a device context.
static GLXFBEvaluator *Create(BufferDescriptor const &srcDesc,
BufferDescriptor const &dstDesc,
BufferDescriptor const &duDesc,
BufferDescriptor const &dvDesc,
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
void * deviceContext) {
(void)deviceContext; // not used
return Create(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc);
}
static GLXFBEvaluator * Create(BufferDescriptor const &srcDesc,
@ -117,9 +174,8 @@ public:
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
void * deviceContext = NULL) {
(void)deviceContext; // not used
GLXFBEvaluator *instance = new GLXFBEvaluator();
bool interleavedDerivativeBuffers = false) {
GLXFBEvaluator *instance = new GLXFBEvaluator(interleavedDerivativeBuffers);
if (instance->Compile(srcDesc, dstDesc, duDesc, dvDesc,
duuDesc, duvDesc, dvvDesc))
return instance;
@ -127,8 +183,20 @@ public:
return NULL;
}
/// Constructor.
GLXFBEvaluator(bool sharedDerivativeBuffers = false);
/// \brief Constructor.
///
/// The transform feedback evaluator can make more sparing use of
/// transform feeback buffer bindings when it is known that evaluator
/// output buffers are shared and the corresponding buffer descriptors
/// are interleaved. When \a interleavedDerivativeBuffers is true
/// then evaluation requires that either 1st derivative outputs are
/// interleaved and 2nd derivative output are interleaved separately
/// or that both 1st derivative and 2nd derivative outputs are
/// interleaved together. This reduces the maximum number of required
/// transform feedback buffer bindings to 3 instead of 6 which is
/// significant, since most transform feedback implementations support
/// a maximum of 4 bindings.
GLXFBEvaluator(bool interleavedDerivativeBuffers = false);
/// Destructor. note that the GL context must be made current.
~GLXFBEvaluator();
@ -2061,7 +2129,7 @@ public:
private:
GLuint _srcBufferTexture;
bool _sharedDerivativeBuffers;
bool _interleavedDerivativeBuffers;
struct _StencilKernel {
_StencilKernel();
@ -2073,7 +2141,7 @@ private:
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
bool sharedDerivativeBuffers);
bool interleavedDerivativeBuffers);
GLuint program;
GLint uniformSrcBufferTexture;
GLint uniformSrcOffset; // src buffer offset (in elements)
@ -2101,7 +2169,7 @@ private:
BufferDescriptor const &duuDesc,
BufferDescriptor const &duvDesc,
BufferDescriptor const &dvvDesc,
bool sharedDerivativeBuffers);
bool interleavedDerivativeBuffers);
GLuint program;
GLint uniformSrcBufferTexture;
GLint uniformSrcOffset; // src buffer offset (in elements)

View File

@ -64,7 +64,7 @@ void writeVertex(Vertex v) {
//------------------------------------------------------------------------------
#if defined(OPENSUBDIV_GLSL_XFB_USE_1ST_DERIVATIVES) && \
defined(OPENSUBDIV_GLSL_XFB_SHARED_1ST_DERIVATIVE_BUFFERS)
defined(OPENSUBDIV_GLSL_XFB_INTERLEAVED_1ST_DERIVATIVE_BUFFERS)
out float outDeriv1Buffer[2*LENGTH];
void writeDu(Vertex v) {
@ -96,7 +96,7 @@ void writeDv(Vertex v) {
#endif
#if defined(OPENSUBDIV_GLSL_XFB_USE_2ND_DERIVATIVES) && \
defined(OPENSUBDIV_GLSL_XFB_SHARED_2ND_DERIVATIVE_BUFFERS)
defined(OPENSUBDIV_GLSL_XFB_INTERLEAVED_2ND_DERIVATIVE_BUFFERS)
out float outDeriv2Buffer[3*LENGTH];
void writeDuu(Vertex v) {