Implemented CATMARK_QUAD_FACE_VERTEX and CATMARK_TRI_QUAD_FACE_VERTEX kernels for CUDA, GLSL Transform Feedback, and GLSL Compute platforms.

This commit is contained in:
Nathan Litke 2014-05-28 16:46:38 -07:00
parent 064115bbe8
commit b769f4f60d
13 changed files with 481 additions and 0 deletions

View File

@ -35,6 +35,16 @@ void OsdCudaComputeFace(float *vertex, float *varying,
int varyingLength, int varyingStride,
int *F_IT, int *F_ITa, int offset, int tableOffset, int start, int end);
void OsdCudaComputeQuadFace(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F_IT, int offset, int tableOffset, int start, int end);
void OsdCudaComputeTriQuadFace(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F_IT, int offset, int tableOffset, int start, int end);
void OsdCudaComputeEdge(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
@ -171,6 +181,46 @@ OsdCudaComputeController::ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
assert(context);
const OsdCudaTable * F_IT = context->GetTable(FarSubdivisionTables::F_IT);
assert(F_IT);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeQuadFace(
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(F_IT->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {
assert(context);
const OsdCudaTable * F_IT = context->GetTable(FarSubdivisionTables::F_IT);
assert(F_IT);
float *vertex = _currentBindState.GetOffsettedVertexBuffer();
float *varying = _currentBindState.GetOffsettedVaryingBuffer();
OsdCudaComputeTriQuadFace(
vertex, varying,
_currentBindState.vertexDesc.length, _currentBindState.vertexDesc.stride,
_currentBindState.varyingDesc.length, _currentBindState.varyingDesc.stride,
static_cast<int*>(F_IT->GetCudaMemory()),
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdCudaComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdCudaComputeContext const *context) const {

View File

@ -120,6 +120,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -140,6 +140,163 @@ computeFace(float *fVertex, float *fVarying,
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeQuadFace(float *fVertex, float *fVaryings, int *F0_IT, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + threadIdx.x + blockIdx.x*blockDim.x;
i < end;
i += blockDim.x * gridDim.x) {
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
if(NUM_VARYING_ELEMENTS > 0){
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dst.addWithWeight(&vertex[fidx0], 0.25f);
dst.addWithWeight(&vertex[fidx1], 0.25f);
dst.addWithWeight(&vertex[fidx2], 0.25f);
dst.addWithWeight(&vertex[fidx3], 0.25f);
dstVarying.addWithWeight(&varyings[fidx0], 0.25f);
dstVarying.addWithWeight(&varyings[fidx1], 0.25f);
dstVarying.addWithWeight(&varyings[fidx2], 0.25f);
dstVarying.addWithWeight(&varyings[fidx3], 0.25f);
vertex[offset + i] = dst;
varyings[offset + i] = dstVarying;
}else{
dst.addWithWeight(&vertex[fidx0], 0.25f);
dst.addWithWeight(&vertex[fidx1], 0.25f);
dst.addWithWeight(&vertex[fidx2], 0.25f);
dst.addWithWeight(&vertex[fidx3], 0.25f);
vertex[offset + i] = dst;
}
}
}
__global__ void
computeQuadFace(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F0_IT, int offset, int tableOffset, int start, int end)
{
for (int i = start +threadIdx.x + blockIdx.x*blockDim.x;
i < end;
i += blockDim.x * gridDim.x){
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
// XXX: can we use local stack like alloca?
float *dstVertex = fVertex + (i+offset)*vertexStride;
clear(dstVertex, vertexLength);
float *dstVarying = fVarying + (i+offset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVertex, fVertex + fidx0*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + fidx1*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + fidx2*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVertex, fVertex + fidx3*vertexStride, 0.25f, vertexLength);
addWithWeight(dstVarying, fVarying + fidx0*varyingStride, 0.25f, varyingLength);
addWithWeight(dstVarying, fVarying + fidx1*varyingStride, 0.25f, varyingLength);
addWithWeight(dstVarying, fVarying + fidx2*varyingStride, 0.25f, varyingLength);
addWithWeight(dstVarying, fVarying + fidx3*varyingStride, 0.25f, varyingLength);
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeTriQuadFace(float *fVertex, float *fVaryings, int *F0_IT, int offset, int tableOffset, int start, int end)
{
DeviceVertex<NUM_VERTEX_ELEMENTS> *vertex = (DeviceVertex<NUM_VERTEX_ELEMENTS>*)fVertex;
DeviceVertex<NUM_VARYING_ELEMENTS> *varyings = (DeviceVertex<NUM_VARYING_ELEMENTS>*)fVaryings;
for (int i = start + threadIdx.x + blockIdx.x*blockDim.x;
i < end;
i += blockDim.x * gridDim.x) {
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
bool triangle = (fidx2 == fidx3);
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
DeviceVertex<NUM_VERTEX_ELEMENTS> dst;
dst.clear();
if(NUM_VARYING_ELEMENTS > 0){
DeviceVertex<NUM_VARYING_ELEMENTS> dstVarying;
dstVarying.clear();
dst.addWithWeight(&vertex[fidx0], weight);
dst.addWithWeight(&vertex[fidx1], weight);
dst.addWithWeight(&vertex[fidx2], weight);
dstVarying.addWithWeight(&varyings[fidx0], weight);
dstVarying.addWithWeight(&varyings[fidx1], weight);
dstVarying.addWithWeight(&varyings[fidx2], weight);
if (!triangle) {
dst.addWithWeight(&vertex[fidx3], weight);
dstVarying.addWithWeight(&varyings[fidx3], 0.25f);
}
vertex[offset + i] = dst;
varyings[offset + i] = dstVarying;
}else{
dst.addWithWeight(&vertex[fidx0], weight);
dst.addWithWeight(&vertex[fidx1], weight);
dst.addWithWeight(&vertex[fidx2], weight);
if (!triangle)
dst.addWithWeight(&vertex[fidx3], weight);
vertex[offset + i] = dst;
}
}
}
__global__ void
computeTriQuadFace(float *fVertex, float *fVarying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F0_IT, int offset, int tableOffset, int start, int end)
{
for (int i = start +threadIdx.x + blockIdx.x*blockDim.x;
i < end;
i += blockDim.x * gridDim.x){
int fidx0 = F0_IT[tableOffset + 4 * i + 0];
int fidx1 = F0_IT[tableOffset + 4 * i + 1];
int fidx2 = F0_IT[tableOffset + 4 * i + 2];
int fidx3 = F0_IT[tableOffset + 4 * i + 3];
bool triangle = (fidx2 == fidx3);
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
// XXX: can we use local stack like alloca?
float *dstVertex = fVertex + (i+offset)*vertexStride;
clear(dstVertex, vertexLength);
float *dstVarying = fVarying + (i+offset)*varyingStride;
clear(dstVarying, varyingLength);
addWithWeight(dstVertex, fVertex + fidx0*vertexStride, weight, vertexLength);
addWithWeight(dstVertex, fVertex + fidx1*vertexStride, weight, vertexLength);
addWithWeight(dstVertex, fVertex + fidx2*vertexStride, weight, vertexLength);
addWithWeight(dstVarying, fVarying + fidx0*varyingStride, weight, varyingLength);
addWithWeight(dstVarying, fVarying + fidx1*varyingStride, weight, varyingLength);
addWithWeight(dstVarying, fVarying + fidx2*varyingStride, weight, varyingLength);
if (!triangle) {
addWithWeight(dstVertex, fVertex + fidx3*vertexStride, weight, vertexLength);
addWithWeight(dstVarying, fVarying + fidx3*varyingStride, weight, varyingLength);
}
}
}
template <int NUM_VERTEX_ELEMENTS, int NUM_VARYING_ELEMENTS> __global__ void
computeEdge(float *fVertex, float *fVaryings, int *E0_IT, float *E0_S, int offset, int tableOffset, int start, int end)
{
@ -639,6 +796,40 @@ void OsdCudaComputeFace(float *vertex, float *varying,
F_IT, F_ITa, offset, tableOffset, start, end);
}
void OsdCudaComputeQuadFace(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F_IT, int offset, int tableOffset, int start, int end)
{
//computeQuadFace<3, 0><<<512,32>>>(vertex, varying, F_IT, offset, start, end);
OPT_KERNEL(0, 0, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(0, 3, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 0, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
// fallback kernel (slow)
computeQuadFace<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
F_IT, offset, tableOffset, start, end);
}
void OsdCudaComputeTriQuadFace(float *vertex, float *varying,
int vertexLength, int vertexStride,
int varyingLength, int varyingStride,
int *F_IT, int offset, int tableOffset, int start, int end)
{
//computeTriQuadFace<3, 0><<<512,32>>>(vertex, varying, F_IT, offset, start, end);
OPT_KERNEL(0, 0, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(0, 3, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 0, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
OPT_KERNEL(3, 3, computeTriQuadFace, 512, 32, (vertex, varying, F_IT, offset, tableOffset, start, end));
// fallback kernel (slow)
computeTriQuadFace<<<512, 32>>>(vertex, varying,
vertexLength, vertexStride, varyingLength, varyingStride,
F_IT, offset, tableOffset, start, end);
}
void OsdCudaComputeEdge(float *vertex, float *varying,
int vertexLength, int vertexStride,

View File

@ -139,6 +139,28 @@ OsdGLSLComputeController::ApplyCatmarkFaceVerticesKernel(
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkQuadFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkTriQuadFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(),
batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLComputeController::ApplyCatmarkEdgeVerticesKernel(
FarKernelBatch const &batch, OsdGLSLComputeContext const *context) const {

View File

@ -129,6 +129,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -178,6 +178,64 @@ void catmarkComputeFace()
writeVertex(vid, dst);
}
// Quad face-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeQuadFace()
{
int i = int(gl_GlobalInvocationID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
int fidx0 = _F_IT[tableOffset + i * 4 + 0];
int fidx1 = _F_IT[tableOffset + i * 4 + 1];
int fidx2 = _F_IT[tableOffset + i * 4 + 2];
int fidx3 = _F_IT[tableOffset + i * 4 + 3];
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(fidx0), 0.25);
addWithWeight(dst, readVertex(fidx1), 0.25);
addWithWeight(dst, readVertex(fidx2), 0.25);
addWithWeight(dst, readVertex(fidx3), 0.25);
addVaryingWithWeight(dst, readVertex(fidx0), 0.25);
addVaryingWithWeight(dst, readVertex(fidx1), 0.25);
addVaryingWithWeight(dst, readVertex(fidx2), 0.25);
addVaryingWithWeight(dst, readVertex(fidx3), 0.25);
writeVertex(vid, dst);
}
// Tri-quad face-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeTriQuadFace()
{
int i = int(gl_GlobalInvocationID.x) + indexStart;
if (i >= indexEnd) return;
int vid = i + vertexOffset;
int fidx0 = _F_IT[tableOffset + i * 4 + 0];
int fidx1 = _F_IT[tableOffset + i * 4 + 1];
int fidx2 = _F_IT[tableOffset + i * 4 + 2];
int fidx3 = _F_IT[tableOffset + i * 4 + 3];
bool triangle = (fidx2 == fidx3);
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(fidx0), weight);
addWithWeight(dst, readVertex(fidx1), weight);
addWithWeight(dst, readVertex(fidx2), weight);
if (!triangle)
addWithWeight(dst, readVertex(fidx3), weight);
addVaryingWithWeight(dst, readVertex(fidx0), weight);
addVaryingWithWeight(dst, readVertex(fidx1), weight);
addVaryingWithWeight(dst, readVertex(fidx2), weight);
if (!triangle)
addVaryingWithWeight(dst, readVertex(fidx3), weight);
writeVertex(vid, dst);
}
// Edge-vertices compute Kernepl
subroutine(computeKernelType)
void catmarkComputeEdge()

View File

@ -117,6 +117,12 @@ OsdGLSLComputeKernelBundle::Compile(
_subComputeFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeFace");
_subComputeQuadFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeQuadFace");
_subComputeTriQuadFace = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeTriQuadFace");
_subComputeEdge = glGetSubroutineIndex(_program,
GL_COMPUTE_SHADER,
"catmarkComputeEdge");
@ -229,6 +235,22 @@ OsdGLSLComputeKernelBundle::ApplyCatmarkFaceVerticesKernel(
// glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeQuadFace);
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkTriQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 1, &_subComputeTriQuadFace);
dispatchCompute(vertexOffset, tableOffset, start, end);
}
void
OsdGLSLComputeKernelBundle::ApplyCatmarkEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end) {

View File

@ -57,6 +57,12 @@ public:
void ApplyCatmarkFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkTriQuadFaceVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
void ApplyCatmarkEdgeVerticesKernel(
int vertexOffset, int tableOffset, int start, int end);
@ -130,6 +136,10 @@ protected:
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)

View File

@ -207,6 +207,29 @@ OsdGLSLTransformFeedbackComputeController::ApplyCatmarkFaceVerticesKernel(
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkQuadFaceVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void
OsdGLSLTransformFeedbackComputeController::ApplyCatmarkTriQuadFaceVerticesKernel(
FarKernelBatch const &batch, OsdGLSLTransformFeedbackComputeContext const *context) const {
assert(context);
_currentBindState.kernelBundle->ApplyCatmarkTriQuadFaceVerticesKernel(
_currentBindState.vertexBuffer, _currentBindState.varyingBuffer,
_currentBindState.vertexDesc.offset, _currentBindState.varyingDesc.offset,
batch.GetVertexOffset(), batch.GetTableOffset(), batch.GetStart(), batch.GetEnd());
}
void

View File

@ -127,6 +127,10 @@ protected:
void ApplyCatmarkFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkTriQuadFaceVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkEdgeVerticesKernel(FarKernelBatch const &batch, ComputeContext const *context) const;
void ApplyCatmarkVertexVerticesKernelB(FarKernelBatch const &batch, ComputeContext const *context) const;

View File

@ -186,6 +186,59 @@ void catmarkComputeFace()
writeVertex(dst);
}
// Quad face-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeQuadFace()
{
int i = gl_VertexID + indexStart;
int fidx0 = texelFetch(_F0_IT, tableOffset + 4 * i + 0);
int fidx1 = texelFetch(_F0_IT, tableOffset + 4 * i + 0);
int fidx2 = texelFetch(_F0_IT, tableOffset + 4 * i + 0);
int fidx3 = texelFetch(_F0_IT, tableOffset + 4 * i + 0);
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(fidx0), 0.25);
addWithWeight(dst, readVertex(fidx1), 0.25);
addWithWeight(dst, readVertex(fidx2), 0.25);
addWithWeight(dst, readVertex(fidx3), 0.25);
addVaryingWithWeight(dst, readVertex(fidx0), 0.25);
addVaryingWithWeight(dst, readVertex(fidx1), 0.25);
addVaryingWithWeight(dst, readVertex(fidx2), 0.25);
addVaryingWithWeight(dst, readVertex(fidx3), 0.25);
writeVertex(dst);
}
// Tri-quad face-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeTriQuadFace()
{
int i = gl_VertexID + indexStart;
int fidx0 = texelFetch(_F0_IT, tableOffset + 4 * i + 0);
int fidx1 = texelFetch(_F0_IT, tableOffset + 4 * i + 0);
int fidx2 = texelFetch(_F0_IT, tableOffset + 4 * i + 0);
int fidx3 = texelFetch(_F0_IT, tableOffset + 4 * i + 0);
bool triangle = (fidx2 == fidx3);
float weight = triangle ? 1.0f / 3.0f : 1.0f / 4.0f;
Vertex dst;
clear(dst);
addWithWeight(dst, readVertex(fidx0), weight);
addWithWeight(dst, readVertex(fidx1), weight);
addWithWeight(dst, readVertex(fidx2), weight);
if (!triangle)
addWithWeight(dst, readVertex(fidx3), weight);
addVaryingWithWeight(dst, readVertex(fidx0), weight);
addVaryingWithWeight(dst, readVertex(fidx1), weight);
addVaryingWithWeight(dst, readVertex(fidx2), weight);
if (!triangle)
addVaryingWithWeight(dst, readVertex(fidx3), weight);
writeVertex(dst);
}
// Edge-vertices compute Kernel
subroutine(computeKernelType)
void catmarkComputeEdge()

View File

@ -225,6 +225,8 @@ OsdGLSLTransformFeedbackKernelBundle::Compile(
_uniformVaryingBuffer = glGetUniformLocation(_program, "varyingData");
_subComputeFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeFace");
_subComputeQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeQuadFace");
_subComputeTriQuadFace = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeTriQuadFace");
_subComputeEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "catmarkComputeEdge");
_subComputeBilinearEdge = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeEdge");
_subComputeVertex = glGetSubroutineIndex(_program, GL_VERTEX_SHADER, "bilinearComputeVertex");
@ -356,6 +358,30 @@ OsdGLSLTransformFeedbackKernelBundle::ApplyBilinearVertexVerticesKernel(
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkQuadFaceVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeQuadFace);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkTriQuadFaceVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end) {
glUniformSubroutinesuiv(GL_VERTEX_SHADER, 1, &_subComputeTriQuadFace);
transformGpuBufferData(vertexBuffer, varyingBuffer,
vertexOffset, varyingOffset,
offset, tableOffset, start, end);
}
void
OsdGLSLTransformFeedbackKernelBundle::ApplyCatmarkFaceVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,

View File

@ -68,6 +68,16 @@ public:
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkQuadFaceVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkTriQuadFaceVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
int offset, int tableOffset, int start, int end);
void ApplyCatmarkEdgeVerticesKernel(
GLuint vertexBuffer, GLuint varyingBuffer,
int vertexOffset, int varyingOffset,
@ -181,6 +191,10 @@ protected:
GLuint _subComputeFace; // general face-vertex kernel (all schemes)
GLuint _subComputeQuadFace; // quad face-vertex kernel (catmark)
GLuint _subComputeTriQuadFace; // tri-quad face-vertex kernel (catmark)
GLuint _subComputeEdge; // edge-vertex kernel (catmark + loop schemes)
GLuint _subComputeBilinearEdge; // edge-vertex kernel (bilinear scheme)