Got a license from Intel for vtune. The first performance analysis showed an unexpected bottleneck:

apparently the UnionFind / island management had unexpected overhead. Added path compression to the UnionFind::find operation, and iterative over the actual islands, rather then over all number of objects.
This commit is contained in:
ejcoumans 2006-08-29 23:37:32 +00:00
parent 4cdcee8871
commit e1b85d1969
3 changed files with 91 additions and 64 deletions

View File

@ -116,88 +116,96 @@ void SimulationIslandManager::BuildAndProcessIslands(Dispatcher* dispatcher,Coll
int numBodies = collisionObjects.size();
for (int islandId=0;islandId<numBodies;islandId++)
//first calculate the number of islands, and iterate over the islands id's
const UnionFind& uf = this->GetUnionFind();
for (int islandId=0;islandId<uf.getNumElements();islandId++)
{
std::vector<PersistentManifold*> islandmanifold;
//int numSleeping = 0;
bool allSleeping = true;
int i;
for (i=0;i<numBodies;i++)
if (uf.isRoot(islandId))
{
CollisionObject* colObj0 = collisionObjects[i];
if (colObj0->m_islandTag1 == islandId)
{
std::vector<PersistentManifold*> islandmanifold;
if (colObj0->GetActivationState()== ACTIVE_TAG)
{
allSleeping = false;
}
if (colObj0->GetActivationState()== DISABLE_DEACTIVATION)
{
allSleeping = false;
}
}
}
//int numSleeping = 0;
for (i=0;i<dispatcher->GetNumManifolds();i++)
{
PersistentManifold* manifold = dispatcher->GetManifoldByIndexInternal(i);
//filtering for response
CollisionObject* colObj0 = static_cast<CollisionObject*>(manifold->GetBody0());
CollisionObject* colObj1 = static_cast<CollisionObject*>(manifold->GetBody1());
{
if (((colObj0) && (colObj0)->m_islandTag1 == (islandId)) ||
((colObj1) && (colObj1)->m_islandTag1 == (islandId)))
{
if (dispatcher->NeedsResponse(*colObj0,*colObj1))
islandmanifold.push_back(manifold);
}
}
}
if (allSleeping)
{
int i;
for (i=0;i<numBodies;i++)
{
CollisionObject* colObj0 = collisionObjects[i];
if (colObj0->m_islandTag1 == islandId)
{
colObj0->SetActivationState( ISLAND_SLEEPING );
}
}
} else
{
bool allSleeping = true;
int i;
for (i=0;i<numBodies;i++)
{
CollisionObject* colObj0 = collisionObjects[i];
if (colObj0->m_islandTag1 == islandId)
{
if ( colObj0->GetActivationState() == ISLAND_SLEEPING)
if (colObj0->GetActivationState()== ACTIVE_TAG)
{
colObj0->SetActivationState( WANTS_DEACTIVATION);
allSleeping = false;
}
if (colObj0->GetActivationState()== DISABLE_DEACTIVATION)
{
allSleeping = false;
}
}
}
/// Process the actual simulation, only if not sleeping/deactivated
if (islandmanifold.size())
for (i=0;i<dispatcher->GetNumManifolds();i++)
{
callback->ProcessIsland(&islandmanifold[0],islandmanifold.size());
}
PersistentManifold* manifold = dispatcher->GetManifoldByIndexInternal(i);
//filtering for response
CollisionObject* colObj0 = static_cast<CollisionObject*>(manifold->GetBody0());
CollisionObject* colObj1 = static_cast<CollisionObject*>(manifold->GetBody1());
assert(colObj0);
assert(colObj1);
{
if (((colObj0)->m_islandTag1 == (islandId)) ||
((colObj1)->m_islandTag1 == (islandId)))
{
if (dispatcher->NeedsResponse(*colObj0,*colObj1))
islandmanifold.push_back(manifold);
}
}
}
if (allSleeping)
{
int i;
for (i=0;i<numBodies;i++)
{
CollisionObject* colObj0 = collisionObjects[i];
if (colObj0->m_islandTag1 == islandId)
{
colObj0->SetActivationState( ISLAND_SLEEPING );
}
}
} else
{
int i;
for (i=0;i<numBodies;i++)
{
CollisionObject* colObj0 = collisionObjects[i];
if (colObj0->m_islandTag1 == islandId)
{
if ( colObj0->GetActivationState() == ISLAND_SLEEPING)
{
colObj0->SetActivationState( WANTS_DEACTIVATION);
}
}
}
/// Process the actual simulation, only if not sleeping/deactivated
if (islandmanifold.size())
{
callback->ProcessIsland(&islandmanifold[0],islandmanifold.size());
}
}
}
}
}

View File

@ -24,6 +24,12 @@ int UnionFind::find(int x)
while (x != m_id[x])
{
//not really a reason not to use path compression, and it flattens the trees/improves find performance dramatically
#define USE_PATH_COMPRESSION 1
#ifdef USE_PATH_COMPRESSION
//
m_id[x] = m_id[m_id[x]];
#endif //
x = m_id[x];
assert(x < m_N);
assert(x >= 0);
@ -89,6 +95,8 @@ void UnionFind ::unite(int p, int q)
int i = find(p), j = find(q);
if (i == j)
return;
//weighted quick union, this keeps the 'trees' balanced, and keeps performance of unite O( log(n) )
if (m_sz[i] < m_sz[j])
{
m_id[i] = j; m_sz[j] += m_sz[i];

View File

@ -17,6 +17,8 @@ subject to the following restrictions:
#define UNION_FIND_H
///UnionFind calculates connected subsets
// Implements weighted Quick Union with path compression
// optimization: could use short ints instead of ints (halving memory, would limit the number of rigid bodies to 64k, sounds reasonable)
class UnionFind
{
private:
@ -32,6 +34,15 @@ class UnionFind
void reset(int N);
inline int getNumElements() const
{
return m_N;
}
inline bool isRoot(int x) const
{
return (x == m_id[x]);
}
int find(int p, int q);
void unite(int p, int q);