gcd: naive implementation using dispatch_apply for all loops. Not faster, dispatch overhead too high

This commit is contained in:
Aras Pranckevicius 2013-02-02 23:22:04 +02:00
parent 54c7329ba4
commit b556e9d6cd
5 changed files with 94 additions and 60 deletions

View File

@ -99,9 +99,6 @@ public:
/// Waits until all running subdivision kernels finish.
void Synchronize();
private:
int _numThreads;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -66,6 +66,7 @@ namespace OpenSubdiv {
namespace OPENSUBDIV_VERSION {
OsdGcdKernelDispatcher::OsdGcdKernelDispatcher() {
_gcd_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
}
OsdGcdKernelDispatcher::~OsdGcdKernelDispatcher() {
@ -100,7 +101,8 @@ OsdGcdKernelDispatcher::ApplyBilinearFaceVerticesKernel(
context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::F_IT, level-1),
(const int*)context->GetTablePtr(Table::F_ITa, level-1),
offset, start, end);
offset, start, end,
_gcd_queue);
}
void
@ -117,7 +119,8 @@ OsdGcdKernelDispatcher::ApplyBilinearEdgeVerticesKernel(
context->GetCurrentVertexBuffer(),
context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::E_IT, level-1),
offset, start, end);
offset, start, end,
_gcd_queue);
}
void
@ -134,7 +137,8 @@ OsdGcdKernelDispatcher::ApplyBilinearVertexVerticesKernel(
context->GetCurrentVertexBuffer(),
context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::V_ITa, level-1),
offset, start, end);
offset, start, end,
_gcd_queue);
}
void
@ -152,7 +156,8 @@ OsdGcdKernelDispatcher::ApplyCatmarkFaceVerticesKernel(
context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::F_IT, level-1),
(const int*)context->GetTablePtr(Table::F_ITa, level-1),
offset, start, end);
offset, start, end,
_gcd_queue);
}
void
@ -170,7 +175,8 @@ OsdGcdKernelDispatcher::ApplyCatmarkEdgeVerticesKernel(
context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::E_IT, level-1),
(const float*)context->GetTablePtr(Table::E_W, level-1),
offset, start, end);
offset, start, end,
_gcd_queue);
}
void
@ -189,7 +195,8 @@ OsdGcdKernelDispatcher::ApplyCatmarkVertexVerticesKernelB(
(const int*)context->GetTablePtr(Table::V_ITa, level-1),
(const int*)context->GetTablePtr(Table::V_IT, level-1),
(const float*)context->GetTablePtr(Table::V_W, level-1),
offset, start, end);
offset, start, end,
_gcd_queue);
}
void
@ -207,7 +214,8 @@ OsdGcdKernelDispatcher::ApplyCatmarkVertexVerticesKernelA(
context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::V_ITa, level-1),
(const float*)context->GetTablePtr(Table::V_W, level-1),
offset, start, end, pass);
offset, start, end, pass,
_gcd_queue);
}
void
@ -225,7 +233,8 @@ OsdGcdKernelDispatcher::ApplyLoopEdgeVerticesKernel(
context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::E_IT, level-1),
(const float*)context->GetTablePtr(Table::E_W, level-1),
offset, start, end);
offset, start, end,
_gcd_queue);
}
void
@ -244,7 +253,8 @@ OsdGcdKernelDispatcher::ApplyLoopVertexVerticesKernelB(
(const int*)context->GetTablePtr(Table::V_ITa, level-1),
(const int*)context->GetTablePtr(Table::V_IT, level-1),
(const float*)context->GetTablePtr(Table::V_W, level-1),
offset, start, end);
offset, start, end,
_gcd_queue);
}
void
@ -262,7 +272,8 @@ OsdGcdKernelDispatcher::ApplyLoopVertexVerticesKernelA(
context->GetCurrentVaryingBuffer(),
(const int*)context->GetTablePtr(Table::V_ITa, level-1),
(const float*)context->GetTablePtr(Table::V_W, level-1),
offset, start, end, pass);
offset, start, end, pass,
_gcd_queue);
}
void
@ -293,7 +304,8 @@ OsdGcdKernelDispatcher::ApplyVertexEdits(
edit->GetPrimvarWidth(),
primvarIndices.GetNumElements(level-1),
(const int*)primvarIndices[level-1],
(const float*)editValues[level-1]);
(const float*)editValues[level-1],
_gcd_queue);
} else if (edit->GetOperation() == FarVertexEdit::Set) {
OsdGcdEditVertexSet(context->GetVertexDescriptor(),
context->GetCurrentVertexBuffer(),
@ -301,7 +313,8 @@ OsdGcdKernelDispatcher::ApplyVertexEdits(
edit->GetPrimvarWidth(),
primvarIndices.GetNumElements(level-1),
(const int*)primvarIndices[level-1],
(const float*)editValues[level-1]);
(const float*)editValues[level-1],
_gcd_queue);
}
}
}

View File

@ -57,6 +57,8 @@
#ifndef OSD_GCD_DISPATCHER_H
#define OSD_GCD_DISPATCHER_H
#include <dispatch/dispatch.h>
#include "../version.h"
#include "../osd/vertex.h"
@ -125,6 +127,9 @@ protected:
FarMesh<OsdVertex> *mesh, int offset, int level,
void * clientdata) const;
private:
dispatch_queue_t _gcd_queue;
};
} // end namespace OPENSUBDIV_VERSION

View File

@ -65,10 +65,11 @@ namespace OPENSUBDIV_VERSION {
void OsdGcdComputeFace(
const OsdVertexDescriptor *vdesc, float * vertex, float * varying,
const int *F_IT, const int *F_ITa, int offset, int start, int end) {
const int *F_IT, const int *F_ITa, int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for
for (int i = start; i < end; i++) {
dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
int i = start+blockIdx;
int h = F_ITa[2*i];
int n = F_ITa[2*i+1];
@ -84,15 +85,16 @@ void OsdGcdComputeFace(
vdesc->AddWithWeight(vertex, dstIndex, index, weight);
vdesc->AddVaryingWithWeight(varying, dstIndex, index, weight);
}
}
});
}
void OsdGcdComputeEdge(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *E_IT, const float *E_W, int offset, int start, int end) {
const int *E_IT, const float *E_W, int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for
for (int i = start; i < end; i++) {
dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
int i = start+blockIdx;
int eidx0 = E_IT[4*i+0];
int eidx1 = E_IT[4*i+1];
int eidx2 = E_IT[4*i+2];
@ -115,16 +117,17 @@ void OsdGcdComputeEdge(
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
}
});
}
void OsdGcdComputeVertexA(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *V_ITa, const float *V_W,
int offset, int start, int end, int pass) {
int offset, int start, int end, int pass,
dispatch_queue_t gcdq) {
#pragma omp parallel for
for (int i = start; i < end; i++) {
dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
int i = start+blockIdx;
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
int eidx0 = V_ITa[5*i+3];
@ -152,16 +155,17 @@ void OsdGcdComputeVertexA(
if (not pass)
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
}
});
}
void OsdGcdComputeVertexB(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *V_ITa, const int *V_IT, const float *V_W,
int offset, int start, int end) {
int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for
for (int i = start; i < end; i++) {
dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
int i = start+blockIdx;
int h = V_ITa[5*i];
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
@ -180,16 +184,17 @@ void OsdGcdComputeVertexB(
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j*2+1], weight * wp);
}
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
}
});
}
void OsdGcdComputeLoopVertexB(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *V_ITa, const int *V_IT, const float *V_W,
int offset, int start, int end) {
int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for
for (int i = start; i < end; i++) {
dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
int i = start+blockIdx;
int h = V_ITa[5*i];
int n = V_ITa[5*i+1];
int p = V_ITa[5*i+2];
@ -209,15 +214,16 @@ void OsdGcdComputeLoopVertexB(
vdesc->AddWithWeight(vertex, dstIndex, V_IT[h+j], weight * beta);
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
}
});
}
void OsdGcdComputeBilinearEdge(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *E_IT, int offset, int start, int end) {
const int *E_IT, int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for
for (int i = start; i < end; i++) {
dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
int i = start+blockIdx;
int eidx0 = E_IT[2*i+0];
int eidx1 = E_IT[2*i+1];
@ -229,15 +235,16 @@ void OsdGcdComputeBilinearEdge(
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx0, 0.5f);
vdesc->AddVaryingWithWeight(varying, dstIndex, eidx1, 0.5f);
}
});
}
void OsdGcdComputeBilinearVertex(
const OsdVertexDescriptor *vdesc, float *vertex, float *varying,
const int *V_ITa, int offset, int start, int end) {
const int *V_ITa, int offset, int start, int end,
dispatch_queue_t gcdq) {
#pragma omp parallel for
for (int i = start; i < end; i++) {
dispatch_apply(end-start, gcdq, ^(size_t blockIdx){
int i = start+blockIdx;
int p = V_ITa[i];
int dstIndex = offset + i;
@ -245,31 +252,33 @@ void OsdGcdComputeBilinearVertex(
vdesc->AddWithWeight(vertex, dstIndex, p, 1.0f);
vdesc->AddVaryingWithWeight(varying, dstIndex, p, 1.0f);
}
});
}
void OsdGcdEditVertexAdd(
const OsdVertexDescriptor *vdesc, float *vertex,
int primVarOffset, int primVarWidth, int vertexCount,
const int *editIndices, const float *editValues) {
const int *editIndices, const float *editValues,
dispatch_queue_t gcdq) {
#pragma omp parallel for
for (int i = 0; i < vertexCount; i++) {
dispatch_apply(vertexCount, gcdq, ^(size_t blockIdx){
int i = blockIdx;
vdesc->ApplyVertexEditAdd(vertex, primVarOffset, primVarWidth,
editIndices[i], &editValues[i*primVarWidth]);
}
});
}
void OsdGcdEditVertexSet(
const OsdVertexDescriptor *vdesc, float *vertex,
int primVarOffset, int primVarWidth, int vertexCount,
const int *editIndices, const float *editValues) {
const int *editIndices, const float *editValues,
dispatch_queue_t gcdq) {
#pragma omp parallel for
for (int i = 0; i < vertexCount; i++) {
dispatch_apply(vertexCount, gcdq, ^(size_t blockIdx){
int i = blockIdx;
vdesc->ApplyVertexEditSet(vertex, primVarOffset, primVarWidth,
editIndices[i], &editValues[i*primVarWidth]);
}
});
}

View File

@ -57,6 +57,7 @@
#ifndef OSD_GCD_KERNEL_H
#define OSD_GCD_KERNEL_H
#include <dispatch/dispatch.h>
#include "../version.h"
namespace OpenSubdiv {
@ -67,46 +68,55 @@ struct OsdVertexDescriptor;
void OsdGcdComputeFace(const OsdVertexDescriptor *vdesc,
float * vertex, float * varying,
const int *F_IT, const int *F_ITa,
int offset, int start, int end);
int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeEdge(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying,
const int *E_IT, const float *E_ITa,
int offset, int start, int end);
int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeVertexA(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying,
const int *V_ITa, const float *V_IT,
int offset, int start, int end, int pass);
int offset, int start, int end, int pass,
dispatch_queue_t gcdq);
void OsdGcdComputeVertexB(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying,
const int *V_ITa, const int *V_IT, const float *V_W,
int offset, int start, int end);
int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeLoopVertexB(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying,
const int *V_ITa, const int *V_IT,
const float *V_W,
int offset, int start, int end);
int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeBilinearEdge(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying,
const int *E_IT,
int offset, int start, int end);
int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdComputeBilinearVertex(const OsdVertexDescriptor *vdesc,
float *vertex, float * varying,
const int *V_ITa,
int offset, int start, int end);
int offset, int start, int end,
dispatch_queue_t gcdq);
void OsdGcdEditVertexAdd(const OsdVertexDescriptor *vdesc, float *vertex,
int primVarOffset, int primVarWidth, int count,
const int *editIndices, const float *editValues);
const int *editIndices, const float *editValues,
dispatch_queue_t gcdq);
void OsdGcdEditVertexSet(const OsdVertexDescriptor *vdesc, float *vertex,
int primVarOffset, int primVarWidth, int count,
const int *editIndices, const float *editValues);
const int *editIndices, const float *editValues,
dispatch_queue_t gcdq);
} // end namespace OPENSUBDIV_VERSION
using namespace OPENSUBDIV_VERSION;