towards GPU joint (non-contact) constraint solving. The ConstraintDemo shows that CPU performance is very good, so not much performance win is to be expected.

This commit is contained in:
erwin coumans 2013-07-02 21:13:42 -07:00
parent 72042f3094
commit 28f6e0fbd3
5 changed files with 635 additions and 8 deletions

View File

@ -123,11 +123,11 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const
if (ci.m_useInstancedCollisionShapes)
colIndex = m_data->m_np->registerConvexHullShape(utilPtr);
for (int i=0;i<20;i++)
for (int i=0;i<ci.arraySizeZ;i++)
{
for (int k=0;k<20;k++)
for (int k=0;k<ci.arraySizeX;k++)
{
int prevBody = -1;
@ -147,7 +147,7 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const
//mass=0.f;
}
//b3Vector3 position((j&1)+i*2.2,1+j*2.,(j&1)+k*2.2);
b3Vector3 position(i*2.2,1+j*2.,k*2.2);
b3Vector3 position((-ci.arraySizeX/2*ci.gapX)+i*ci.gapX,1+j*2.,(-ci.arraySizeZ/2*ci.gapZ)+k*ci.gapZ);
b3Quaternion orn(0,0,0,1);

View File

@ -663,7 +663,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
if (maxNumBatches>globalMaxBatch )
{
globalMaxBatch = maxNumBatches;
printf("maxNumBatches = %d\n",maxNumBatches);
b3Printf("maxNumBatches = %d\n",maxNumBatches);
}
clFinish(m_data->m_queue);

View File

@ -0,0 +1,606 @@
#include "b3GpuPgsJacobiSolver.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h"
#include <new>
#include "Bullet3Common/b3AlignedObjectArray.h"
#include <string.h> //for memset
#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (bool usePgs)
:b3PgsJacobiSolver (usePgs)
{
}
b3GpuPgsJacobiSolver::~b3GpuPgsJacobiSolver ()
{
}
struct b3BatchConstraint
{
int m_bodyAPtrAndSignBit;
int m_bodyBPtrAndSignBit;
int m_offset;
short int m_numConstraintRows;
short int m_batchId;
short int& getBatchIdx()
{
return m_batchId;
}
};
static b3AlignedObjectArray<b3BatchConstraint> batchConstraints;
static b3AlignedObjectArray<int> batches;
b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3RigidBodyCL* bodies, b3InertiaCL* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds,b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal)
{
B3_PROFILE("GPU solveGroupCacheFriendlySetup");
batchConstraints.resize(numConstraints);
m_staticIdx = -1;
m_maxOverrideNumSolverIterations = 0;
m_tmpSolverBodyPool.resize(0);
m_bodyCount.resize(0);
m_bodyCount.resize(numBodies,0);
m_bodyCountCheck.resize(0);
m_bodyCountCheck.resize(numBodies,0);
m_deltaLinearVelocities.resize(0);
m_deltaLinearVelocities.resize(numBodies,b3Vector3(0,0,0));
m_deltaAngularVelocities.resize(0);
m_deltaAngularVelocities.resize(numBodies,b3Vector3(0,0,0));
int totalBodies = 0;
for (int i=0;i<numConstraints;i++)
{
int bodyIndexA = constraints[i]->getRigidBodyA();
int bodyIndexB = constraints[i]->getRigidBodyB();
if (m_usePgs)
{
m_bodyCount[bodyIndexA]=-1;
m_bodyCount[bodyIndexB]=-1;
} else
{
//didn't implement joints with Jacobi version yet
b3Assert(0);
}
}
for (int i=0;i<numManifolds;i++)
{
int bodyIndexA = manifoldPtr[i].getBodyA();
int bodyIndexB = manifoldPtr[i].getBodyB();
if (m_usePgs)
{
m_bodyCount[bodyIndexA]=-1;
m_bodyCount[bodyIndexB]=-1;
} else
{
if (bodies[bodyIndexA].getInvMass())
{
//m_bodyCount[bodyIndexA]+=manifoldPtr[i].getNPoints();
m_bodyCount[bodyIndexA]++;
}
else
m_bodyCount[bodyIndexA]=-1;
if (bodies[bodyIndexB].getInvMass())
// m_bodyCount[bodyIndexB]+=manifoldPtr[i].getNPoints();
m_bodyCount[bodyIndexB]++;
else
m_bodyCount[bodyIndexB]=-1;
}
}
if (1)
{
int j;
for (j=0;j<numConstraints;j++)
{
b3TypedConstraint* constraint = constraints[j];
constraint->internalSetAppliedImpulse(0.0f);
}
}
//b3RigidBody* rb0=0,*rb1=0;
//if (1)
{
{
int totalNumRows = 0;
int i;
m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
//calculate the total number of contraint rows
for (i=0;i<numConstraints;i++)
{
b3TypedConstraint::b3ConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
b3JointFeedback* fb = constraints[i]->getJointFeedback();
if (fb)
{
fb->m_appliedForceBodyA.setZero();
fb->m_appliedTorqueBodyA.setZero();
fb->m_appliedForceBodyB.setZero();
fb->m_appliedTorqueBodyB.setZero();
}
if (constraints[i]->isEnabled())
{
}
if (constraints[i]->isEnabled())
{
constraints[i]->getInfo1(&info1,bodies);
} else
{
info1.m_numConstraintRows = 0;
info1.nub = 0;
}
batchConstraints[i].m_numConstraintRows = info1.m_numConstraintRows;
batchConstraints[i].m_offset = totalNumRows;
totalNumRows += info1.m_numConstraintRows;
}
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
#ifndef DISABLE_JOINTS
///setup the b3SolverConstraints
int currentRow = 0;
for (i=0;i<numConstraints;i++)
{
const b3TypedConstraint::b3ConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
if (info1.m_numConstraintRows)
{
b3Assert(currentRow<totalNumRows);
b3SolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
b3TypedConstraint* constraint = constraints[i];
b3RigidBodyCL& rbA = bodies[ constraint->getRigidBodyA()];
//b3RigidBody& rbA = constraint->getRigidBodyA();
// b3RigidBody& rbB = constraint->getRigidBodyB();
b3RigidBodyCL& rbB = bodies[ constraint->getRigidBodyB()];
int solverBodyIdA = getOrInitSolverBody(constraint->getRigidBodyA(),bodies,inertias);
int solverBodyIdB = getOrInitSolverBody(constraint->getRigidBodyB(),bodies,inertias);
b3SolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
b3SolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
if (rbA.getInvMass())
{
batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;
} else
{
if (!solverBodyIdA)
m_staticIdx = 0;
batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;
}
if (rbB.getInvMass())
{
batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;
} else
{
if (!solverBodyIdB)
m_staticIdx = 0;
batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;
}
int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
int j;
for ( j=0;j<info1.m_numConstraintRows;j++)
{
memset(&currentConstraintRow[j],0,sizeof(b3SolverConstraint));
currentConstraintRow[j].m_lowerLimit = -B3_INFINITY;
currentConstraintRow[j].m_upperLimit = B3_INFINITY;
currentConstraintRow[j].m_appliedImpulse = 0.f;
currentConstraintRow[j].m_appliedPushImpulse = 0.f;
currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
}
bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
b3TypedConstraint::b3ConstraintInfo2 info2;
info2.fps = 1.f/infoGlobal.m_timeStep;
info2.erp = infoGlobal.m_erp;
info2.m_J1linearAxis = currentConstraintRow->m_contactNormal;
info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
info2.m_J2linearAxis = 0;
info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
info2.rowskip = sizeof(b3SolverConstraint)/sizeof(b3Scalar);//check this
///the size of b3SolverConstraint needs be a multiple of b3Scalar
b3Assert(info2.rowskip*sizeof(b3Scalar)== sizeof(b3SolverConstraint));
info2.m_constraintError = &currentConstraintRow->m_rhs;
currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
info2.m_damping = infoGlobal.m_damping;
info2.cfm = &currentConstraintRow->m_cfm;
info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
info2.m_numIterations = infoGlobal.m_numIterations;
constraints[i]->getInfo2(&info2,bodies);
///finalize the constraint setup
for ( j=0;j<info1.m_numConstraintRows;j++)
{
b3SolverConstraint& solverConstraint = currentConstraintRow[j];
if (solverConstraint.m_upperLimit>=constraints[i]->getBreakingImpulseThreshold())
{
solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold();
}
if (solverConstraint.m_lowerLimit<=-constraints[i]->getBreakingImpulseThreshold())
{
solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold();
}
solverConstraint.m_originalContactPoint = constraint;
b3Matrix3x3& invInertiaWorldA= inertias[constraint->getRigidBodyA()].m_invInertiaWorld;
{
//b3Vector3 angularFactorA(1,1,1);
const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
solverConstraint.m_angularComponentA = invInertiaWorldA*ftorqueAxis1;//*angularFactorA;
}
b3Matrix3x3& invInertiaWorldB= inertias[constraint->getRigidBodyB()].m_invInertiaWorld;
{
const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
solverConstraint.m_angularComponentB = invInertiaWorldB*ftorqueAxis2;//*constraint->getRigidBodyB().getAngularFactor();
}
{
//it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal
//because it gets multiplied iMJlB
b3Vector3 iMJlA = solverConstraint.m_contactNormal*rbA.getInvMass();
b3Vector3 iMJaA = invInertiaWorldA*solverConstraint.m_relpos1CrossNormal;
b3Vector3 iMJlB = solverConstraint.m_contactNormal*rbB.getInvMass();//sign of normal?
b3Vector3 iMJaB = invInertiaWorldB*solverConstraint.m_relpos2CrossNormal;
b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal);
sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
sum += iMJlB.dot(solverConstraint.m_contactNormal);
sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
b3Scalar fsum = b3Fabs(sum);
b3Assert(fsum > B3_EPSILON);
solverConstraint.m_jacDiagABInv = fsum>B3_EPSILON?b3Scalar(1.)/sum : 0.f;
}
///fix rhs
///todo: add force/torque accelerators
{
b3Scalar rel_vel;
b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel);
b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel);
rel_vel = vel1Dotn+vel2Dotn;
b3Scalar restitution = 0.f;
b3Scalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
b3Scalar velocityError = restitution - rel_vel * info2.m_damping;
b3Scalar penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
b3Scalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
solverConstraint.m_appliedImpulse = 0.f;
}
}
}
currentRow+=m_tmpConstraintSizesPool[i].m_numConstraintRows;
}
#endif //DISABLE_JOINTS
}
{
int i;
for (i=0;i<numManifolds;i++)
{
b3Contact4& manifold = manifoldPtr[i];
convertContact(bodies,inertias,&manifold,infoGlobal);
}
}
}
// b3ContactSolverInfo info = infoGlobal;
int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
int numConstraintPool = m_tmpSolverContactConstraintPool.size();
int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
m_orderNonContactConstraintPool.resizeNoInitialize(numNonContactPool);
if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS))
m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool*2);
else
m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool);
m_orderFrictionConstraintPool.resizeNoInitialize(numFrictionPool);
{
int i;
for (i=0;i<numNonContactPool;i++)
{
m_orderNonContactConstraintPool[i] = i;
}
for (i=0;i<numConstraintPool;i++)
{
m_orderTmpConstraintPool[i] = i;
}
for (i=0;i<numFrictionPool;i++)
{
m_orderFrictionConstraintPool[i] = i;
}
}
return 0.f;
}
b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3TypedConstraint** cpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal)
{
bool useCpu = false;
bool createBatches = batches.size()==0;
if (useCpu)
{
return b3PgsJacobiSolver::solveGroupCacheFriendlyIterations(cpuConstraints,numConstraints,infoGlobal);
} else
{
B3_PROFILE("GpuSolveGroupCacheFriendlyIterations");
if (createBatches)
{
batches.resize(0);
{
B3_PROFILE("batch joints");
b3Assert(batchConstraints.size()==numConstraints);
int simdWidth =numConstraints;
int numBodies = m_tmpSolverBodyPool.size();
sortConstraintByBatch3( &batchConstraints[0], numConstraints, simdWidth , m_staticIdx, numBodies);
}
}
int maxIterations = infoGlobal.m_numIterations;
bool useBatching = false;
if (useBatching )
{
for ( int iteration = 0 ; iteration< maxIterations ; iteration++)
{
int bc = 0;
int batchConstraintOffset=0;
int numBatches = batches.size();
for (int bb=0;bb<numBatches;bb++)
{
int localConstraintIndex=0;
int numConstraintsInBatch = batches[bb];
for (int b=0;b<numConstraintsInBatch;b++)
{
const b3BatchConstraint& c = batchConstraints[bc+b];
b3Assert(c.m_batchId==bb);
//can be done in parallel...
for (int jj=0;jj<c.m_numConstraintRows;jj++)
{
b3SolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[batchConstraintOffset+localConstraintIndex];
resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
localConstraintIndex++;
}
}
bc+=numConstraintsInBatch;
batchConstraintOffset+=numConstraintsInBatch;
}
}
} else
{
for ( int iteration = 0 ; iteration< maxIterations ; iteration++)
{
int numJoints = m_tmpSolverNonContactConstraintPool.size();
for (int j=0;j<numJoints;j++)
{
b3SolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j];
resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
}
if (!m_usePgs)
{
averageVelocities();
}
}
}
}
return 0.f;
}
static b3AlignedObjectArray<int> bodyUsed;
static b3AlignedObjectArray<int> curUsed;
inline int b3GpuPgsJacobiSolver::sortConstraintByBatch3( b3BatchConstraint* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
{
int sz = sizeof(b3BatchConstraint);
B3_PROFILE("sortConstraintByBatch3");
static int maxSwaps = 0;
int numSwaps = 0;
curUsed.resize(2*simdWidth);
static int maxNumConstraints = 0;
if (maxNumConstraints<numConstraints)
{
maxNumConstraints = numConstraints;
//printf("maxNumConstraints = %d\n",maxNumConstraints );
}
int numUsedArray = numBodies/32+1;
bodyUsed.resize(numUsedArray);
for (int q=0;q<numUsedArray;q++)
bodyUsed[q]=0;
int curBodyUsed = 0;
int numIter = 0;
#if defined(_DEBUG)
for(int i=0; i<numConstraints; i++)
cs[i].getBatchIdx() = -1;
#endif
int numValidConstraints = 0;
int unprocessedConstraintIndex = 0;
int batchIdx = 0;
{
B3_PROFILE("cpu batch innerloop");
while( numValidConstraints < numConstraints)
{
numIter++;
int nCurrentBatch = 0;
// clear flag
for(int i=0; i<curBodyUsed; i++)
bodyUsed[curUsed[i]/32] = 0;
curBodyUsed = 0;
for(int i=numValidConstraints; i<numConstraints; i++)
{
int idx = i;
b3Assert( idx < numConstraints );
// check if it can go
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
int bodyA = abs(bodyAS);
int bodyB = abs(bodyBS);
bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
int aUnavailable = 0;
int bUnavailable = 0;
if (!aIsStatic)
{
aUnavailable = bodyUsed[ bodyA/32 ] & (1<<(bodyA&31));
}
if (!aUnavailable)
if (!bIsStatic)
{
bUnavailable = bodyUsed[ bodyB/32 ] & (1<<(bodyB&31));
}
if( aUnavailable==0 && bUnavailable==0 ) // ok
{
if (!aIsStatic)
{
bodyUsed[ bodyA/32 ] |= (1<<(bodyA&31));
curUsed[curBodyUsed++]=bodyA;
}
if (!bIsStatic)
{
bodyUsed[ bodyB/32 ] |= (1<<(bodyB&31));
curUsed[curBodyUsed++]=bodyB;
}
cs[idx].getBatchIdx() = batchIdx;
if (i!=numValidConstraints)
{
b3Swap(cs[i],cs[numValidConstraints]);
numSwaps++;
}
numValidConstraints++;
{
nCurrentBatch++;
if( nCurrentBatch == simdWidth )
{
nCurrentBatch = 0;
for(int i=0; i<curBodyUsed; i++)
bodyUsed[curUsed[i]/32] = 0;
curBodyUsed = 0;
}
}
}
}
batches.push_back(nCurrentBatch);
batchIdx ++;
}
}
#if defined(_DEBUG)
// debugPrintf( "nBatches: %d\n", batchIdx );
for(int i=0; i<numConstraints; i++)
{
b3Assert( cs[i].getBatchIdx() != -1 );
}
#endif
if (maxSwaps<numSwaps)
{
maxSwaps = numSwaps;
//printf("maxSwaps = %d\n", maxSwaps);
}
return batchIdx;
}

View File

@ -0,0 +1,19 @@
#ifndef B3_GPU_PGS_JACOBI_SOLVER_H
#define B3_GPU_PGS_JACOBI_SOLVER_H
#include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h"
class b3GpuPgsJacobiSolver : public b3PgsJacobiSolver
{
int m_staticIdx;
public:
b3GpuPgsJacobiSolver (bool usePgs);
virtual~b3GpuPgsJacobiSolver ();
virtual b3Scalar solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
virtual b3Scalar solveGroupCacheFriendlySetup(b3RigidBodyCL* bodies, b3InertiaCL* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds,b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
int sortConstraintByBatch3( struct b3BatchConstraint* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies);
};
#endif //B3_GPU_PGS_JACOBI_SOLVER_H

View File

@ -28,6 +28,8 @@ bool dumpContactStats = false;
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
#include "Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h"
#include "b3GpuBatchingPgsSolver.h"
#include "b3Solver.h"
@ -44,7 +46,7 @@ b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id devic
m_data->m_device = device;
m_data->m_queue = q;
m_data->m_solver = new b3PgsJacobiSolver(true);
m_data->m_solver = new b3GpuPgsJacobiSolver(true);//new b3PgsJacobiSolver(true);
m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx,q,config.m_maxConvexBodies);
m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx,q,config.m_maxBroadphasePairs);
@ -226,11 +228,11 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
gpuBodies.copyToHost(hostBodies);
b3AlignedObjectArray<b3InertiaCL> hostInertias;
gpuInertias.copyToHost(hostInertias);
b3AlignedObjectArray<b3Contact4> hostContacts;
gpuContacts.copyToHost(hostContacts);
// b3AlignedObjectArray<b3Contact4> hostContacts;
//gpuContacts.copyToHost(hostContacts);
{
b3TypedConstraint** joints = numJoints? &m_data->m_joints[0] : 0;
b3Contact4* contacts = numContacts? &hostContacts[0]: 0;
// b3Contact4* contacts = numContacts? &hostContacts[0]: 0;
//m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,contacts,numJoints, joints);
m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(),&hostBodies[0],&hostInertias[0],0,0,numJoints, joints);