gcd: naive implementation using dispatch_apply for all loops. Not faster, dispatch overhead too high

This commit is contained in:
Aras Pranckevicius 2013-02-02 23:22:04 +02:00
parent 54c7329ba4
commit b556e9d6cd
5 changed files with 94 additions and 60 deletions

View File

@ -99,9 +99,6 @@ public:
/// Waits until all running subdivision kernels finish. /// Waits until all running subdivision kernels finish.
void Synchronize(); void Synchronize();
private:
int _numThreads;
}; };
} // end namespace OPENSUBDIV_VERSION } // end namespace OPENSUBDIV_VERSION

View File

@ -66,6 +66,7 @@ namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION { namespace OPENSUBDIV_VERSION {
OsdGcdKernelDispatcher::OsdGcdKernelDispatcher() { OsdGcdKernelDispatcher::OsdGcdKernelDispatcher() {
_gcd_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
} }
OsdGcdKernelDispatcher::~OsdGcdKernelDispatcher() { OsdGcdKernelDispatcher::~OsdGcdKernelDispatcher() {
@ -100,7 +101,8 @@ OsdGcdKernelDispatcher::ApplyBilinearFaceVerticesKernel(
context->GetCurrentVaryingBuffer(), context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::F_IT, level-1), (const int*)context->GetTablePtr(Table::F_IT, level-1),
(const int*)context->GetTablePtr(Table::F_ITa, level-1), (const int*)context->GetTablePtr(Table::F_ITa, level-1),
offset, start, end); offset, start, end,
_gcd_queue);
} }
void void
@ -117,7 +119,8 @@ OsdGcdKernelDispatcher::ApplyBilinearEdgeVerticesKernel(
context->GetCurrentVertexBuffer(), context->GetCurrentVertexBuffer(),
context->GetCurrentVaryingBuffer(), context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::E_IT, level-1), (const int*)context->GetTablePtr(Table::E_IT, level-1),
offset, start, end); offset, start, end,
_gcd_queue);
} }
void void
@ -134,7 +137,8 @@ OsdGcdKernelDispatcher::ApplyBilinearVertexVerticesKernel(
context->GetCurrentVertexBuffer(), context->GetCurrentVertexBuffer(),
context->GetCurrentVaryingBuffer(), context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::V_ITa, level-1), (const int*)context->GetTablePtr(Table::V_ITa, level-1),
offset, start, end); offset, start, end,
_gcd_queue);
} }
void void
@ -152,7 +156,8 @@ OsdGcdKernelDispatcher::ApplyCatmarkFaceVerticesKernel(
context->GetCurrentVaryingBuffer(), context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::F_IT, level-1), (const int*)context->GetTablePtr(Table::F_IT, level-1),
(const int*)context->GetTablePtr(Table::F_ITa, level-1), (const int*)context->GetTablePtr(Table::F_ITa, level-1),
offset, start, end); offset, start, end,
_gcd_queue);
} }
void void
@ -170,7 +175,8 @@ OsdGcdKernelDispatcher::ApplyCatmarkEdgeVerticesKernel(
context->GetCurrentVaryingBuffer(), context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::E_IT, level-1), (const int*)context->GetTablePtr(Table::E_IT, level-1),
(const float*)context->GetTablePtr(Table::E_W, level-1), (const float*)context->GetTablePtr(Table::E_W, level-1),
offset, start, end); offset, start, end,
_gcd_queue);
} }
void void
@ -189,7 +195,8 @@ OsdGcdKernelDispatcher::ApplyCatmarkVertexVerticesKernelB(
(const int*)context->GetTablePtr(Table::V_ITa, level-1), (const int*)context->GetTablePtr(Table::V_ITa, level-1),
(const int*)context->GetTablePtr(Table::V_IT, level-1), (const int*)context->GetTablePtr(Table::V_IT, level-1),
(const float*)context->GetTablePtr(Table::V_W, level-1), (const float*)context->GetTablePtr(Table::V_W, level-1),
offset, start, end); offset, start, end,
_gcd_queue);
} }
void void
@ -207,7 +214,8 @@ OsdGcdKernelDispatcher::ApplyCatmarkVertexVerticesKernelA(
context->GetCurrentVaryingBuffer(), context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::V_ITa, level-1), (const int*)context->GetTablePtr(Table::V_ITa, level-1),
(const float*)context->GetTablePtr(Table::V_W, level-1), (const float*)context->GetTablePtr(Table::V_W, level-1),
offset, start, end, pass); offset, start, end, pass,
_gcd_queue);
} }
void void
@ -225,7 +233,8 @@ OsdGcdKernelDispatcher::ApplyLoopEdgeVerticesKernel(
context->GetCurrentVaryingBuffer(), context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::E_IT, level-1), (const int*)context->GetTablePtr(Table::E_IT, level-1),
(const float*)context->GetTablePtr(Table::E_W, level-1), (const float*)context->GetTablePtr(Table::E_W, level-1),
offset, start, end); offset, start, end,
_gcd_queue);
} }
void void
@ -244,7 +253,8 @@ OsdGcdKernelDispatcher::ApplyLoopVertexVerticesKernelB(
(const int*)context->GetTablePtr(Table::V_ITa, level-1), (const int*)context->GetTablePtr(Table::V_ITa, level-1),
(const int*)context->GetTablePtr(Table::V_IT, level-1), (const int*)context->GetTablePtr(Table::V_IT, level-1),
(const float*)context->GetTablePtr(Table::V_W, level-1), (const float*)context->GetTablePtr(Table::V_W, level-1),
offset, start, end); offset, start, end,
_gcd_queue);
} }
void void
@ -262,7 +272,8 @@ OsdGcdKernelDispatcher::ApplyLoopVertexVerticesKernelA(
context->GetCurrentVaryingBuffer(), context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::V_ITa, level-1), (const int*)context->GetTablePtr(Table::V_ITa, level-1),
(const float*)context->GetTablePtr(Table::V_W, level-1), (const float*)context->GetTablePtr(Table::V_W, level-1),
offset, start, end, pass); offset, start, end, pass,
_gcd_queue);
} }
void void
@ -293,7 +304,8 @@ OsdGcdKernelDispatcher::ApplyVertexEdits(
edit->GetPrimvarWidth(), edit->GetPrimvarWidth(),
primvarIndices.GetNumElements(level-1), primvarIndices.GetNumElements(level-1),
(const int*)primvarIndices[level-1], (const int*)primvarIndices[level-1],
(const float*)editValues[level-1]); (const float*)editValues[level-1],
_gcd_queue);
} else if (edit->GetOperation() == FarVertexEdit::Set) { } else if (edit->GetOperation() == FarVertexEdit::Set) {
OsdGcdEditVertexSet(context->GetVertexDescriptor(), OsdGcdEditVertexSet(context->GetVertexDescriptor(),
context->GetCurrentVertexBuffer(), context->GetCurrentVertexBuffer(),
@ -301,7 +313,8 @@ OsdGcdKernelDispatcher::ApplyVertexEdits(
edit->GetPrimvarWidth(), edit->GetPrimvarWidth(),
primvarIndices.GetNumElements(level-1), primvarIndices.GetNumElements(level-1),
(const int*)primvarIndices[level-1], (const int*)primvarIndices[level-1],
(const float*)editValues[level-1]); (const float*)editValues[level-1],
_gcd_queue);
} }
} }
} }

View File

@ -57,6 +57,8 @@
#ifndef OSD_GCD_DISPATCHER_H #ifndef OSD_GCD_DISPATCHER_H
#define OSD_GCD_DISPATCHER_H #define OSD_GCD_DISPATCHER_H
#include <dispatch/dispatch.h>
#include "../version.h" #include "../version.h"
#include "../osd/vertex.h" #include "../osd/vertex.h"
@ -125,6 +127,9 @@ protected:
FarMesh<OsdVertex> *mesh, int offset, int level, FarMesh<OsdVertex> *mesh, int offset, int level,
void * clientdata) const; void * clientdata) const;
private:
dispatch_queue_t _gcd_queue;
}; };
} // end namespace OPENSUBDIV_VERSION } // end namespace OPENSUBDIV_VERSION

View File

@ -65,10 +65,11 @@ namespace OPENSUBDIV_VERSION {
void OsdGcdComputeFace( void OsdGcdComputeFace(
const OsdVertexDescriptor *vdesc, float * vertex, float * varying, const OsdVertexDescriptor *vdesc, float * vertex, float * varying,
const int *F_IT, const int *F_ITa, int offset, int start, int end) { const int *F_IT, const int *F_ITa, int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
for (int i = start; i < end; i++) { int i = start+blockIdx;
int h = F_ITa[2*i]; int h = F_ITa[2*i];
int n = F_ITa[2*i+1]; int n = F_ITa[2*i+1];
@ -84,15 +85,16 @@ void OsdGcdComputeFace(
vdesc->AddWithWeight(vertex, dstIndex, index, weight); vdesc->AddWithWeight(vertex, dstIndex, index, weight);
vdesc->AddVaryingWithWeight(varying, dstIndex, index, weight); vdesc->AddVaryingWithWeight(varying, dstIndex, index, weight);
} }
} });
} }
void OsdGcdComputeEdge( void OsdGcdComputeEdge(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying, const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *E_IT, const float *E_W, int offset, int start, int end) { const int *E_IT, const float *E_W, int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
for (int i = start; i < end; i++) { int i = start+blockIdx;
int eidx0 = E_IT[4*i+0]; int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1]; int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2]; int eidx2 = E_IT[4*i+2];
@ -115,16 +117,17 @@ void OsdGcdComputeEdge(
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f); vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f); vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
} });
} }
void OsdGcdComputeVertexA( void OsdGcdComputeVertexA(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying, const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *V_ITa, const float *V_W, const int *V_ITa, const float *V_W,
int offset, int start, int end, int pass) { int offset, int start, int end, int pass,
dispatch_queue_t gcdq) {
#pragma omp parallel for dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
for (int i = start; i < end; i++) { int i = start+blockIdx;
int n = V_ITa[5*i+1]; int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2]; int p = V_ITa[5*i+2];
int eidx0 = V_ITa[5*i+3]; int eidx0 = V_ITa[5*i+3];
@ -152,16 +155,17 @@ void OsdGcdComputeVertexA(
if (not pass) if (not pass)
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
} });
} }
void OsdGcdComputeVertexB( void OsdGcdComputeVertexB(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying, const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *V_ITa, const int *V_IT, const float *V_W, const int *V_ITa, const int *V_IT, const float *V_W,
int offset, int start, int end) { int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
for (int i = start; i < end; i++) { int i = start+blockIdx;
int h = V_ITa[5*i]; int h = V_ITa[5*i];
int n = V_ITa[5*i+1]; int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2]; int p = V_ITa[5*i+2];
@ -180,16 +184,17 @@ void OsdGcdComputeVertexB(
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp); vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
} }
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
} });
} }
void OsdGcdComputeLoopVertexB( void OsdGcdComputeLoopVertexB(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying, const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *V_ITa, const int *V_IT, const float *V_W, const int *V_ITa, const int *V_IT, const float *V_W,
int offset, int start, int end) { int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
for (int i = start; i < end; i++) { int i = start+blockIdx;
int h = V_ITa[5*i]; int h = V_ITa[5*i];
int n = V_ITa[5*i+1]; int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2]; int p = V_ITa[5*i+2];
@ -209,15 +214,16 @@ void OsdGcdComputeLoopVertexB(
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta); vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
} });
} }
void OsdGcdComputeBilinearEdge( void OsdGcdComputeBilinearEdge(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying, const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *E_IT, int offset, int start, int end) { const int *E_IT, int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
for (int i = start; i < end; i++) { int i = start+blockIdx;
int eidx0 = E_IT[2*i+0]; int eidx0 = E_IT[2*i+0];
int eidx1 = E_IT[2*i+1]; int eidx1 = E_IT[2*i+1];
@ -229,15 +235,16 @@ void OsdGcdComputeBilinearEdge(
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f); vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f); vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
} });
} }
void OsdGcdComputeBilinearVertex( void OsdGcdComputeBilinearVertex(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying, const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *V_ITa, int offset, int start, int end) { const int *V_ITa, int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
for (int i = start; i < end; i++) { int i = start+blockIdx;
int p = V_ITa[i]; int p = V_ITa[i];
int dstIndex = offset + i; int dstIndex = offset + i;
@ -245,31 +252,33 @@ void OsdGcdComputeBilinearVertex(
vdesc->AddWithWeight(vertex, dstIndex, p, 1.0f); vdesc->AddWithWeight(vertex, dstIndex, p, 1.0f);
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f); vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
} });
} }
void OsdGcdEditVertexAdd( void OsdGcdEditVertexAdd(
const OsdVertexDescriptor *vdesc, float *vertex, const OsdVertexDescriptor *vdesc, float *vertex,
int primVarOffset, int primVarWidth, int vertexCount, int primVarOffset, int primVarWidth, int vertexCount,
const int *editIndices, const float *editValues) { const int *editIndices, const float *editValues,
dispatch_queue_t gcdq) {
#pragma omp parallel for dispatch_apply(vertexCount, gcdq, ^(size_t blockIdx){
for (int i = 0; i < vertexCount; i++) { int i = blockIdx;
vdesc->ApplyVertexEditAdd(vertex, primVarOffset, primVarWidth, vdesc->ApplyVertexEditAdd(vertex, primVarOffset, primVarWidth,
editIndices[i], &editValues[i*primVarWidth]); editIndices[i], &editValues[i*primVarWidth]);
} });
} }
void OsdGcdEditVertexSet( void OsdGcdEditVertexSet(
const OsdVertexDescriptor *vdesc, float *vertex, const OsdVertexDescriptor *vdesc, float *vertex,
int primVarOffset, int primVarWidth, int vertexCount, int primVarOffset, int primVarWidth, int vertexCount,
const int *editIndices, const float *editValues) { const int *editIndices, const float *editValues,
dispatch_queue_t gcdq) {
#pragma omp parallel for dispatch_apply(vertexCount, gcdq, ^(size_t blockIdx){
for (int i = 0; i < vertexCount; i++) { int i = blockIdx;
vdesc->ApplyVertexEditSet(vertex, primVarOffset, primVarWidth, vdesc->ApplyVertexEditSet(vertex, primVarOffset, primVarWidth,
editIndices[i], &editValues[i*primVarWidth]); editIndices[i], &editValues[i*primVarWidth]);
} });
} }

View File

@ -57,6 +57,7 @@
#ifndef OSD_GCD_KERNEL_H #ifndef OSD_GCD_KERNEL_H
#define OSD_GCD_KERNEL_H #define OSD_GCD_KERNEL_H
#include <dispatch/dispatch.h>
#include "../version.h" #include "../version.h"
namespace OpenSubdiv { namespace OpenSubdiv {
@ -67,46 +68,55 @@ struct OsdVertexDescriptor;
void OsdGcdComputeFace(const OsdVertexDescriptor *vdesc, void OsdGcdComputeFace(const OsdVertexDescriptor *vdesc,
float * vertex, float * varying, float * vertex, float * varying,
const int *F_IT, const int *F_ITa, const int *F_IT, const int *F_ITa,
int offset, int start, int end); int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeEdge(const OsdVertexDescriptor *vdesc, void OsdGcdComputeEdge(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying, float *vertex, float * varying,
const int *E_IT, const float *E_ITa, const int *E_IT, const float *E_ITa,
int offset, int start, int end); int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeVertexA(const OsdVertexDescriptor *vdesc, void OsdGcdComputeVertexA(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying, float *vertex, float * varying,
const int *V_ITa, const float *V_IT, const int *V_ITa, const float *V_IT,
int offset, int start, int end, int pass); int offset, int start, int end, int pass,
dispatch_queue_t gcdq);
void OsdGcdComputeVertexB(const OsdVertexDescriptor *vdesc, void OsdGcdComputeVertexB(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying, float *vertex, float * varying,
const int *V_ITa, const int *V_IT, const float *V_W, const int *V_ITa, const int *V_IT, const float *V_W,
int offset, int start, int end); int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeLoopVertexB(const OsdVertexDescriptor *vdesc, void OsdGcdComputeLoopVertexB(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying, float *vertex, float * varying,
const int *V_ITa, const int *V_IT, const int *V_ITa, const int *V_IT,
const float *V_W, const float *V_W,
int offset, int start, int end); int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeBilinearEdge(const OsdVertexDescriptor *vdesc, void OsdGcdComputeBilinearEdge(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying, float *vertex, float * varying,
const int *E_IT, const int *E_IT,
int offset, int start, int end); int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeBilinearVertex(const OsdVertexDescriptor *vdesc, void OsdGcdComputeBilinearVertex(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying, float *vertex, float * varying,
const int *V_ITa, const int *V_ITa,
int offset, int start, int end); int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdEditVertexAdd(const OsdVertexDescriptor *vdesc, float *vertex, void OsdGcdEditVertexAdd(const OsdVertexDescriptor *vdesc, float *vertex,
int primVarOffset, int primVarWidth, int count, int primVarOffset, int primVarWidth, int count,
const int *editIndices, const float *editValues); const int *editIndices, const float *editValues,
dispatch_queue_t gcdq);
void OsdGcdEditVertexSet(const OsdVertexDescriptor *vdesc, float *vertex, void OsdGcdEditVertexSet(const OsdVertexDescriptor *vdesc, float *vertex,
int primVarOffset, int primVarWidth, int count, int primVarOffset, int primVarWidth, int count,
const int *editIndices, const float *editValues); const int *editIndices, const float *editValues,
dispatch_queue_t gcdq);
} // end namespace OPENSUBDIV_VERSION } // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION; using namespace OPENSUBDIV_VERSION;