mirror of
https://github.com/bulletphysics/bullet3
synced 2025-01-18 21:10:05 +00:00
towards GPU joint (non-contact) constraint solving. The ConstraintDemo shows that CPU performance is very good, so not much performance win is to be expected.
This commit is contained in:
parent
72042f3094
commit
28f6e0fbd3
@ -123,11 +123,11 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const
|
||||
if (ci.m_useInstancedCollisionShapes)
|
||||
colIndex = m_data->m_np->registerConvexHullShape(utilPtr);
|
||||
|
||||
for (int i=0;i<20;i++)
|
||||
for (int i=0;i<ci.arraySizeZ;i++)
|
||||
{
|
||||
|
||||
|
||||
for (int k=0;k<20;k++)
|
||||
for (int k=0;k<ci.arraySizeX;k++)
|
||||
{
|
||||
|
||||
int prevBody = -1;
|
||||
@ -147,7 +147,7 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const
|
||||
//mass=0.f;
|
||||
}
|
||||
//b3Vector3 position((j&1)+i*2.2,1+j*2.,(j&1)+k*2.2);
|
||||
b3Vector3 position(i*2.2,1+j*2.,k*2.2);
|
||||
b3Vector3 position((-ci.arraySizeX/2*ci.gapX)+i*ci.gapX,1+j*2.,(-ci.arraySizeZ/2*ci.gapZ)+k*ci.gapZ);
|
||||
|
||||
b3Quaternion orn(0,0,0,1);
|
||||
|
||||
|
@ -663,7 +663,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
|
||||
if (maxNumBatches>globalMaxBatch )
|
||||
{
|
||||
globalMaxBatch = maxNumBatches;
|
||||
printf("maxNumBatches = %d\n",maxNumBatches);
|
||||
b3Printf("maxNumBatches = %d\n",maxNumBatches);
|
||||
}
|
||||
|
||||
clFinish(m_data->m_queue);
|
||||
|
606
src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp
Normal file
606
src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp
Normal file
@ -0,0 +1,606 @@
|
||||
#include "b3GpuPgsJacobiSolver.h"
|
||||
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
|
||||
|
||||
#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h"
|
||||
#include <new>
|
||||
#include "Bullet3Common/b3AlignedObjectArray.h"
|
||||
#include <string.h> //for memset
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
|
||||
|
||||
b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (bool usePgs)
|
||||
:b3PgsJacobiSolver (usePgs)
|
||||
{
|
||||
}
|
||||
|
||||
b3GpuPgsJacobiSolver::~b3GpuPgsJacobiSolver ()
|
||||
{
|
||||
}
|
||||
|
||||
struct b3BatchConstraint
|
||||
{
|
||||
int m_bodyAPtrAndSignBit;
|
||||
int m_bodyBPtrAndSignBit;
|
||||
int m_offset;
|
||||
short int m_numConstraintRows;
|
||||
short int m_batchId;
|
||||
|
||||
short int& getBatchIdx()
|
||||
{
|
||||
return m_batchId;
|
||||
}
|
||||
};
|
||||
|
||||
static b3AlignedObjectArray<b3BatchConstraint> batchConstraints;
|
||||
static b3AlignedObjectArray<int> batches;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3RigidBodyCL* bodies, b3InertiaCL* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds,b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal)
|
||||
{
|
||||
B3_PROFILE("GPU solveGroupCacheFriendlySetup");
|
||||
batchConstraints.resize(numConstraints);
|
||||
m_staticIdx = -1;
|
||||
m_maxOverrideNumSolverIterations = 0;
|
||||
|
||||
|
||||
|
||||
m_tmpSolverBodyPool.resize(0);
|
||||
|
||||
|
||||
m_bodyCount.resize(0);
|
||||
m_bodyCount.resize(numBodies,0);
|
||||
m_bodyCountCheck.resize(0);
|
||||
m_bodyCountCheck.resize(numBodies,0);
|
||||
|
||||
m_deltaLinearVelocities.resize(0);
|
||||
m_deltaLinearVelocities.resize(numBodies,b3Vector3(0,0,0));
|
||||
m_deltaAngularVelocities.resize(0);
|
||||
m_deltaAngularVelocities.resize(numBodies,b3Vector3(0,0,0));
|
||||
|
||||
int totalBodies = 0;
|
||||
|
||||
for (int i=0;i<numConstraints;i++)
|
||||
{
|
||||
int bodyIndexA = constraints[i]->getRigidBodyA();
|
||||
int bodyIndexB = constraints[i]->getRigidBodyB();
|
||||
if (m_usePgs)
|
||||
{
|
||||
m_bodyCount[bodyIndexA]=-1;
|
||||
m_bodyCount[bodyIndexB]=-1;
|
||||
} else
|
||||
{
|
||||
//didn't implement joints with Jacobi version yet
|
||||
b3Assert(0);
|
||||
}
|
||||
|
||||
}
|
||||
for (int i=0;i<numManifolds;i++)
|
||||
{
|
||||
int bodyIndexA = manifoldPtr[i].getBodyA();
|
||||
int bodyIndexB = manifoldPtr[i].getBodyB();
|
||||
if (m_usePgs)
|
||||
{
|
||||
m_bodyCount[bodyIndexA]=-1;
|
||||
m_bodyCount[bodyIndexB]=-1;
|
||||
} else
|
||||
{
|
||||
if (bodies[bodyIndexA].getInvMass())
|
||||
{
|
||||
//m_bodyCount[bodyIndexA]+=manifoldPtr[i].getNPoints();
|
||||
m_bodyCount[bodyIndexA]++;
|
||||
}
|
||||
else
|
||||
m_bodyCount[bodyIndexA]=-1;
|
||||
|
||||
if (bodies[bodyIndexB].getInvMass())
|
||||
// m_bodyCount[bodyIndexB]+=manifoldPtr[i].getNPoints();
|
||||
m_bodyCount[bodyIndexB]++;
|
||||
else
|
||||
m_bodyCount[bodyIndexB]=-1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (1)
|
||||
{
|
||||
int j;
|
||||
for (j=0;j<numConstraints;j++)
|
||||
{
|
||||
b3TypedConstraint* constraint = constraints[j];
|
||||
|
||||
constraint->internalSetAppliedImpulse(0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
//b3RigidBody* rb0=0,*rb1=0;
|
||||
//if (1)
|
||||
{
|
||||
{
|
||||
|
||||
int totalNumRows = 0;
|
||||
int i;
|
||||
|
||||
m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
|
||||
//calculate the total number of contraint rows
|
||||
for (i=0;i<numConstraints;i++)
|
||||
{
|
||||
b3TypedConstraint::b3ConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
|
||||
|
||||
b3JointFeedback* fb = constraints[i]->getJointFeedback();
|
||||
if (fb)
|
||||
{
|
||||
fb->m_appliedForceBodyA.setZero();
|
||||
fb->m_appliedTorqueBodyA.setZero();
|
||||
fb->m_appliedForceBodyB.setZero();
|
||||
fb->m_appliedTorqueBodyB.setZero();
|
||||
}
|
||||
|
||||
if (constraints[i]->isEnabled())
|
||||
{
|
||||
}
|
||||
if (constraints[i]->isEnabled())
|
||||
{
|
||||
constraints[i]->getInfo1(&info1,bodies);
|
||||
} else
|
||||
{
|
||||
info1.m_numConstraintRows = 0;
|
||||
info1.nub = 0;
|
||||
}
|
||||
|
||||
batchConstraints[i].m_numConstraintRows = info1.m_numConstraintRows;
|
||||
batchConstraints[i].m_offset = totalNumRows;
|
||||
totalNumRows += info1.m_numConstraintRows;
|
||||
}
|
||||
m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
|
||||
|
||||
|
||||
#ifndef DISABLE_JOINTS
|
||||
///setup the b3SolverConstraints
|
||||
int currentRow = 0;
|
||||
|
||||
for (i=0;i<numConstraints;i++)
|
||||
{
|
||||
const b3TypedConstraint::b3ConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
|
||||
|
||||
if (info1.m_numConstraintRows)
|
||||
{
|
||||
b3Assert(currentRow<totalNumRows);
|
||||
|
||||
b3SolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
|
||||
b3TypedConstraint* constraint = constraints[i];
|
||||
|
||||
b3RigidBodyCL& rbA = bodies[ constraint->getRigidBodyA()];
|
||||
//b3RigidBody& rbA = constraint->getRigidBodyA();
|
||||
// b3RigidBody& rbB = constraint->getRigidBodyB();
|
||||
b3RigidBodyCL& rbB = bodies[ constraint->getRigidBodyB()];
|
||||
|
||||
|
||||
|
||||
int solverBodyIdA = getOrInitSolverBody(constraint->getRigidBodyA(),bodies,inertias);
|
||||
int solverBodyIdB = getOrInitSolverBody(constraint->getRigidBodyB(),bodies,inertias);
|
||||
|
||||
b3SolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
|
||||
b3SolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
|
||||
|
||||
if (rbA.getInvMass())
|
||||
{
|
||||
batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;
|
||||
} else
|
||||
{
|
||||
if (!solverBodyIdA)
|
||||
m_staticIdx = 0;
|
||||
batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;
|
||||
}
|
||||
|
||||
if (rbB.getInvMass())
|
||||
{
|
||||
batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;
|
||||
} else
|
||||
{
|
||||
if (!solverBodyIdB)
|
||||
m_staticIdx = 0;
|
||||
batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;
|
||||
}
|
||||
|
||||
|
||||
int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
|
||||
if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
|
||||
m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
|
||||
|
||||
|
||||
int j;
|
||||
for ( j=0;j<info1.m_numConstraintRows;j++)
|
||||
{
|
||||
memset(¤tConstraintRow[j],0,sizeof(b3SolverConstraint));
|
||||
currentConstraintRow[j].m_lowerLimit = -B3_INFINITY;
|
||||
currentConstraintRow[j].m_upperLimit = B3_INFINITY;
|
||||
currentConstraintRow[j].m_appliedImpulse = 0.f;
|
||||
currentConstraintRow[j].m_appliedPushImpulse = 0.f;
|
||||
currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
|
||||
currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
|
||||
currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
|
||||
}
|
||||
|
||||
bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
|
||||
bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
|
||||
|
||||
|
||||
b3TypedConstraint::b3ConstraintInfo2 info2;
|
||||
info2.fps = 1.f/infoGlobal.m_timeStep;
|
||||
info2.erp = infoGlobal.m_erp;
|
||||
info2.m_J1linearAxis = currentConstraintRow->m_contactNormal;
|
||||
info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
|
||||
info2.m_J2linearAxis = 0;
|
||||
info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
|
||||
info2.rowskip = sizeof(b3SolverConstraint)/sizeof(b3Scalar);//check this
|
||||
///the size of b3SolverConstraint needs be a multiple of b3Scalar
|
||||
b3Assert(info2.rowskip*sizeof(b3Scalar)== sizeof(b3SolverConstraint));
|
||||
info2.m_constraintError = ¤tConstraintRow->m_rhs;
|
||||
currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
|
||||
info2.m_damping = infoGlobal.m_damping;
|
||||
info2.cfm = ¤tConstraintRow->m_cfm;
|
||||
info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit;
|
||||
info2.m_upperLimit = ¤tConstraintRow->m_upperLimit;
|
||||
info2.m_numIterations = infoGlobal.m_numIterations;
|
||||
constraints[i]->getInfo2(&info2,bodies);
|
||||
|
||||
///finalize the constraint setup
|
||||
for ( j=0;j<info1.m_numConstraintRows;j++)
|
||||
{
|
||||
b3SolverConstraint& solverConstraint = currentConstraintRow[j];
|
||||
|
||||
if (solverConstraint.m_upperLimit>=constraints[i]->getBreakingImpulseThreshold())
|
||||
{
|
||||
solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold();
|
||||
}
|
||||
|
||||
if (solverConstraint.m_lowerLimit<=-constraints[i]->getBreakingImpulseThreshold())
|
||||
{
|
||||
solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold();
|
||||
}
|
||||
|
||||
solverConstraint.m_originalContactPoint = constraint;
|
||||
|
||||
b3Matrix3x3& invInertiaWorldA= inertias[constraint->getRigidBodyA()].m_invInertiaWorld;
|
||||
{
|
||||
|
||||
//b3Vector3 angularFactorA(1,1,1);
|
||||
const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
|
||||
solverConstraint.m_angularComponentA = invInertiaWorldA*ftorqueAxis1;//*angularFactorA;
|
||||
}
|
||||
|
||||
b3Matrix3x3& invInertiaWorldB= inertias[constraint->getRigidBodyB()].m_invInertiaWorld;
|
||||
{
|
||||
|
||||
const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
|
||||
solverConstraint.m_angularComponentB = invInertiaWorldB*ftorqueAxis2;//*constraint->getRigidBodyB().getAngularFactor();
|
||||
}
|
||||
|
||||
{
|
||||
//it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal
|
||||
//because it gets multiplied iMJlB
|
||||
b3Vector3 iMJlA = solverConstraint.m_contactNormal*rbA.getInvMass();
|
||||
b3Vector3 iMJaA = invInertiaWorldA*solverConstraint.m_relpos1CrossNormal;
|
||||
b3Vector3 iMJlB = solverConstraint.m_contactNormal*rbB.getInvMass();//sign of normal?
|
||||
b3Vector3 iMJaB = invInertiaWorldB*solverConstraint.m_relpos2CrossNormal;
|
||||
|
||||
b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal);
|
||||
sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
|
||||
sum += iMJlB.dot(solverConstraint.m_contactNormal);
|
||||
sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
|
||||
b3Scalar fsum = b3Fabs(sum);
|
||||
b3Assert(fsum > B3_EPSILON);
|
||||
solverConstraint.m_jacDiagABInv = fsum>B3_EPSILON?b3Scalar(1.)/sum : 0.f;
|
||||
}
|
||||
|
||||
|
||||
///fix rhs
|
||||
///todo: add force/torque accelerators
|
||||
{
|
||||
b3Scalar rel_vel;
|
||||
b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel);
|
||||
b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel);
|
||||
|
||||
rel_vel = vel1Dotn+vel2Dotn;
|
||||
|
||||
b3Scalar restitution = 0.f;
|
||||
b3Scalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
|
||||
b3Scalar velocityError = restitution - rel_vel * info2.m_damping;
|
||||
b3Scalar penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
|
||||
b3Scalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
|
||||
solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
|
||||
solverConstraint.m_appliedImpulse = 0.f;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
currentRow+=m_tmpConstraintSizesPool[i].m_numConstraintRows;
|
||||
}
|
||||
#endif //DISABLE_JOINTS
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0;i<numManifolds;i++)
|
||||
{
|
||||
b3Contact4& manifold = manifoldPtr[i];
|
||||
convertContact(bodies,inertias,&manifold,infoGlobal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// b3ContactSolverInfo info = infoGlobal;
|
||||
|
||||
|
||||
int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
|
||||
int numConstraintPool = m_tmpSolverContactConstraintPool.size();
|
||||
int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
|
||||
|
||||
///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
|
||||
m_orderNonContactConstraintPool.resizeNoInitialize(numNonContactPool);
|
||||
if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS))
|
||||
m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool*2);
|
||||
else
|
||||
m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool);
|
||||
|
||||
m_orderFrictionConstraintPool.resizeNoInitialize(numFrictionPool);
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<numNonContactPool;i++)
|
||||
{
|
||||
m_orderNonContactConstraintPool[i] = i;
|
||||
}
|
||||
for (i=0;i<numConstraintPool;i++)
|
||||
{
|
||||
m_orderTmpConstraintPool[i] = i;
|
||||
}
|
||||
for (i=0;i<numFrictionPool;i++)
|
||||
{
|
||||
m_orderFrictionConstraintPool[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
return 0.f;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlyIterations(b3TypedConstraint** cpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal)
|
||||
{
|
||||
bool useCpu = false;
|
||||
bool createBatches = batches.size()==0;
|
||||
if (useCpu)
|
||||
{
|
||||
return b3PgsJacobiSolver::solveGroupCacheFriendlyIterations(cpuConstraints,numConstraints,infoGlobal);
|
||||
} else
|
||||
{
|
||||
B3_PROFILE("GpuSolveGroupCacheFriendlyIterations");
|
||||
if (createBatches)
|
||||
{
|
||||
|
||||
batches.resize(0);
|
||||
|
||||
{
|
||||
B3_PROFILE("batch joints");
|
||||
b3Assert(batchConstraints.size()==numConstraints);
|
||||
int simdWidth =numConstraints;
|
||||
int numBodies = m_tmpSolverBodyPool.size();
|
||||
sortConstraintByBatch3( &batchConstraints[0], numConstraints, simdWidth , m_staticIdx, numBodies);
|
||||
}
|
||||
}
|
||||
int maxIterations = infoGlobal.m_numIterations;
|
||||
bool useBatching = false;
|
||||
if (useBatching )
|
||||
{
|
||||
for ( int iteration = 0 ; iteration< maxIterations ; iteration++)
|
||||
{
|
||||
|
||||
int bc = 0;
|
||||
int batchConstraintOffset=0;
|
||||
int numBatches = batches.size();
|
||||
for (int bb=0;bb<numBatches;bb++)
|
||||
{
|
||||
int localConstraintIndex=0;
|
||||
|
||||
int numConstraintsInBatch = batches[bb];
|
||||
for (int b=0;b<numConstraintsInBatch;b++)
|
||||
{
|
||||
const b3BatchConstraint& c = batchConstraints[bc+b];
|
||||
b3Assert(c.m_batchId==bb);
|
||||
|
||||
|
||||
//can be done in parallel...
|
||||
for (int jj=0;jj<c.m_numConstraintRows;jj++)
|
||||
{
|
||||
b3SolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[batchConstraintOffset+localConstraintIndex];
|
||||
resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
|
||||
localConstraintIndex++;
|
||||
}
|
||||
}
|
||||
bc+=numConstraintsInBatch;
|
||||
batchConstraintOffset+=numConstraintsInBatch;
|
||||
}
|
||||
}
|
||||
|
||||
} else
|
||||
{
|
||||
for ( int iteration = 0 ; iteration< maxIterations ; iteration++)
|
||||
{
|
||||
int numJoints = m_tmpSolverNonContactConstraintPool.size();
|
||||
for (int j=0;j<numJoints;j++)
|
||||
{
|
||||
b3SolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j];
|
||||
resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
|
||||
}
|
||||
|
||||
if (!m_usePgs)
|
||||
{
|
||||
averageVelocities();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static b3AlignedObjectArray<int> bodyUsed;
|
||||
static b3AlignedObjectArray<int> curUsed;
|
||||
|
||||
|
||||
|
||||
inline int b3GpuPgsJacobiSolver::sortConstraintByBatch3( b3BatchConstraint* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
|
||||
{
|
||||
int sz = sizeof(b3BatchConstraint);
|
||||
|
||||
B3_PROFILE("sortConstraintByBatch3");
|
||||
|
||||
static int maxSwaps = 0;
|
||||
int numSwaps = 0;
|
||||
|
||||
curUsed.resize(2*simdWidth);
|
||||
|
||||
static int maxNumConstraints = 0;
|
||||
if (maxNumConstraints<numConstraints)
|
||||
{
|
||||
maxNumConstraints = numConstraints;
|
||||
//printf("maxNumConstraints = %d\n",maxNumConstraints );
|
||||
}
|
||||
|
||||
int numUsedArray = numBodies/32+1;
|
||||
bodyUsed.resize(numUsedArray);
|
||||
|
||||
for (int q=0;q<numUsedArray;q++)
|
||||
bodyUsed[q]=0;
|
||||
|
||||
|
||||
int curBodyUsed = 0;
|
||||
|
||||
int numIter = 0;
|
||||
|
||||
|
||||
#if defined(_DEBUG)
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
cs[i].getBatchIdx() = -1;
|
||||
#endif
|
||||
|
||||
int numValidConstraints = 0;
|
||||
int unprocessedConstraintIndex = 0;
|
||||
|
||||
int batchIdx = 0;
|
||||
|
||||
|
||||
{
|
||||
B3_PROFILE("cpu batch innerloop");
|
||||
|
||||
while( numValidConstraints < numConstraints)
|
||||
{
|
||||
numIter++;
|
||||
int nCurrentBatch = 0;
|
||||
// clear flag
|
||||
for(int i=0; i<curBodyUsed; i++)
|
||||
bodyUsed[curUsed[i]/32] = 0;
|
||||
|
||||
curBodyUsed = 0;
|
||||
|
||||
for(int i=numValidConstraints; i<numConstraints; i++)
|
||||
{
|
||||
int idx = i;
|
||||
b3Assert( idx < numConstraints );
|
||||
// check if it can go
|
||||
int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
|
||||
int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
|
||||
int bodyA = abs(bodyAS);
|
||||
int bodyB = abs(bodyBS);
|
||||
bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
|
||||
bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
|
||||
int aUnavailable = 0;
|
||||
int bUnavailable = 0;
|
||||
if (!aIsStatic)
|
||||
{
|
||||
aUnavailable = bodyUsed[ bodyA/32 ] & (1<<(bodyA&31));
|
||||
}
|
||||
if (!aUnavailable)
|
||||
if (!bIsStatic)
|
||||
{
|
||||
bUnavailable = bodyUsed[ bodyB/32 ] & (1<<(bodyB&31));
|
||||
}
|
||||
|
||||
if( aUnavailable==0 && bUnavailable==0 ) // ok
|
||||
{
|
||||
if (!aIsStatic)
|
||||
{
|
||||
bodyUsed[ bodyA/32 ] |= (1<<(bodyA&31));
|
||||
curUsed[curBodyUsed++]=bodyA;
|
||||
}
|
||||
if (!bIsStatic)
|
||||
{
|
||||
bodyUsed[ bodyB/32 ] |= (1<<(bodyB&31));
|
||||
curUsed[curBodyUsed++]=bodyB;
|
||||
}
|
||||
|
||||
cs[idx].getBatchIdx() = batchIdx;
|
||||
|
||||
if (i!=numValidConstraints)
|
||||
{
|
||||
b3Swap(cs[i],cs[numValidConstraints]);
|
||||
numSwaps++;
|
||||
}
|
||||
|
||||
numValidConstraints++;
|
||||
{
|
||||
nCurrentBatch++;
|
||||
if( nCurrentBatch == simdWidth )
|
||||
{
|
||||
nCurrentBatch = 0;
|
||||
for(int i=0; i<curBodyUsed; i++)
|
||||
bodyUsed[curUsed[i]/32] = 0;
|
||||
curBodyUsed = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
batches.push_back(nCurrentBatch);
|
||||
batchIdx ++;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_DEBUG)
|
||||
// debugPrintf( "nBatches: %d\n", batchIdx );
|
||||
for(int i=0; i<numConstraints; i++)
|
||||
{
|
||||
b3Assert( cs[i].getBatchIdx() != -1 );
|
||||
}
|
||||
#endif
|
||||
|
||||
if (maxSwaps<numSwaps)
|
||||
{
|
||||
maxSwaps = numSwaps;
|
||||
//printf("maxSwaps = %d\n", maxSwaps);
|
||||
}
|
||||
|
||||
return batchIdx;
|
||||
}
|
19
src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h
Normal file
19
src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h
Normal file
@ -0,0 +1,19 @@
|
||||
#ifndef B3_GPU_PGS_JACOBI_SOLVER_H
|
||||
#define B3_GPU_PGS_JACOBI_SOLVER_H
|
||||
|
||||
#include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h"
|
||||
class b3GpuPgsJacobiSolver : public b3PgsJacobiSolver
|
||||
{
|
||||
int m_staticIdx;
|
||||
public:
|
||||
b3GpuPgsJacobiSolver (bool usePgs);
|
||||
virtual~b3GpuPgsJacobiSolver ();
|
||||
|
||||
virtual b3Scalar solveGroupCacheFriendlyIterations(b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
|
||||
virtual b3Scalar solveGroupCacheFriendlySetup(b3RigidBodyCL* bodies, b3InertiaCL* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds,b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal);
|
||||
|
||||
|
||||
int sortConstraintByBatch3( struct b3BatchConstraint* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies);
|
||||
};
|
||||
|
||||
#endif //B3_GPU_PGS_JACOBI_SOLVER_H
|
@ -28,6 +28,8 @@ bool dumpContactStats = false;
|
||||
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h"
|
||||
#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
|
||||
#include "Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.h"
|
||||
|
||||
#include "b3GpuBatchingPgsSolver.h"
|
||||
#include "b3Solver.h"
|
||||
|
||||
@ -44,7 +46,7 @@ b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id devic
|
||||
m_data->m_device = device;
|
||||
m_data->m_queue = q;
|
||||
|
||||
m_data->m_solver = new b3PgsJacobiSolver(true);
|
||||
m_data->m_solver = new b3GpuPgsJacobiSolver(true);//new b3PgsJacobiSolver(true);
|
||||
|
||||
m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx,q,config.m_maxConvexBodies);
|
||||
m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx,q,config.m_maxBroadphasePairs);
|
||||
@ -226,11 +228,11 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
|
||||
gpuBodies.copyToHost(hostBodies);
|
||||
b3AlignedObjectArray<b3InertiaCL> hostInertias;
|
||||
gpuInertias.copyToHost(hostInertias);
|
||||
b3AlignedObjectArray<b3Contact4> hostContacts;
|
||||
gpuContacts.copyToHost(hostContacts);
|
||||
// b3AlignedObjectArray<b3Contact4> hostContacts;
|
||||
//gpuContacts.copyToHost(hostContacts);
|
||||
{
|
||||
b3TypedConstraint** joints = numJoints? &m_data->m_joints[0] : 0;
|
||||
b3Contact4* contacts = numContacts? &hostContacts[0]: 0;
|
||||
// b3Contact4* contacts = numContacts? &hostContacts[0]: 0;
|
||||
//m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,contacts,numJoints, joints);
|
||||
m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(),&hostBodies[0],&hostInertias[0],0,0,numJoints, joints);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user