diff --git a/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp b/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp index bca0788b6..d1c069937 100644 --- a/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp +++ b/Demos3/GpuDemos/constraints/ConstraintsDemo.cpp @@ -123,11 +123,11 @@ int GpuConstraintsDemo::createDynamicsObjects2(const ConstructionInfo& ci, const if (ci.m_useInstancedCollisionShapes) colIndex = m_data->m_np->registerConvexHullShape(utilPtr); - for (int i=0;i<20;i++) + for (int i=0;iglobalMaxBatch ) { globalMaxBatch = maxNumBatches; - printf("maxNumBatches = %d\n",maxNumBatches); + b3Printf("maxNumBatches = %d\n",maxNumBatches); } clFinish(m_data->m_queue); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp new file mode 100644 index 000000000..a2f236754 --- /dev/null +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp @@ -0,0 +1,606 @@ +#include "b3GpuPgsJacobiSolver.h" + +#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" + +#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" +#include +#include "Bullet3Common/b3AlignedObjectArray.h" +#include //for memset +#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" + +b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (bool usePgs) + :b3PgsJacobiSolver (usePgs) +{ +} + +b3GpuPgsJacobiSolver::~b3GpuPgsJacobiSolver () +{ +} + +struct b3BatchConstraint +{ + int m_bodyAPtrAndSignBit; + int m_bodyBPtrAndSignBit; + int m_offset; + short int m_numConstraintRows; + short int m_batchId; + + short int& getBatchIdx() + { + return m_batchId; + } +}; + +static b3AlignedObjectArray batchConstraints; +static b3AlignedObjectArray batches; + + + + + + +b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3RigidBodyCL* bodies, b3InertiaCL* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds,b3TypedConstraint** constraints,int numConstraints,const b3ContactSolverInfo& infoGlobal) +{ + B3_PROFILE("GPU solveGroupCacheFriendlySetup"); + batchConstraints.resize(numConstraints); + m_staticIdx = -1; + m_maxOverrideNumSolverIterations = 0; + + + + m_tmpSolverBodyPool.resize(0); + + + m_bodyCount.resize(0); + m_bodyCount.resize(numBodies,0); + m_bodyCountCheck.resize(0); + m_bodyCountCheck.resize(numBodies,0); + + m_deltaLinearVelocities.resize(0); + m_deltaLinearVelocities.resize(numBodies,b3Vector3(0,0,0)); + m_deltaAngularVelocities.resize(0); + m_deltaAngularVelocities.resize(numBodies,b3Vector3(0,0,0)); + + int totalBodies = 0; + + for (int i=0;igetRigidBodyA(); + int bodyIndexB = constraints[i]->getRigidBodyB(); + if (m_usePgs) + { + m_bodyCount[bodyIndexA]=-1; + m_bodyCount[bodyIndexB]=-1; + } else + { + //didn't implement joints with Jacobi version yet + b3Assert(0); + } + + } + for (int i=0;iinternalSetAppliedImpulse(0.0f); + } + } + + //b3RigidBody* rb0=0,*rb1=0; + //if (1) + { + { + + int totalNumRows = 0; + int i; + + m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints); + //calculate the total number of contraint rows + for (i=0;igetJointFeedback(); + if (fb) + { + fb->m_appliedForceBodyA.setZero(); + fb->m_appliedTorqueBodyA.setZero(); + fb->m_appliedForceBodyB.setZero(); + fb->m_appliedTorqueBodyB.setZero(); + } + + if (constraints[i]->isEnabled()) + { + } + if (constraints[i]->isEnabled()) + { + constraints[i]->getInfo1(&info1,bodies); + } else + { + info1.m_numConstraintRows = 0; + info1.nub = 0; + } + + batchConstraints[i].m_numConstraintRows = info1.m_numConstraintRows; + batchConstraints[i].m_offset = totalNumRows; + totalNumRows += info1.m_numConstraintRows; + } + m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows); + + +#ifndef DISABLE_JOINTS + ///setup the b3SolverConstraints + int currentRow = 0; + + for (i=0;igetRigidBodyA()]; + //b3RigidBody& rbA = constraint->getRigidBodyA(); + // b3RigidBody& rbB = constraint->getRigidBodyB(); + b3RigidBodyCL& rbB = bodies[ constraint->getRigidBodyB()]; + + + + int solverBodyIdA = getOrInitSolverBody(constraint->getRigidBodyA(),bodies,inertias); + int solverBodyIdB = getOrInitSolverBody(constraint->getRigidBodyB(),bodies,inertias); + + b3SolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA]; + b3SolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB]; + + if (rbA.getInvMass()) + { + batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; + } else + { + if (!solverBodyIdA) + m_staticIdx = 0; + batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; + } + + if (rbB.getInvMass()) + { + batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; + } else + { + if (!solverBodyIdB) + m_staticIdx = 0; + batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; + } + + + int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; + if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations) + m_maxOverrideNumSolverIterations = overrideNumSolverIterations; + + + int j; + for ( j=0;jinternalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f); + bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f); + bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f); + bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f); + bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f); + bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f); + bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f); + bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f); + + + b3TypedConstraint::b3ConstraintInfo2 info2; + info2.fps = 1.f/infoGlobal.m_timeStep; + info2.erp = infoGlobal.m_erp; + info2.m_J1linearAxis = currentConstraintRow->m_contactNormal; + info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal; + info2.m_J2linearAxis = 0; + info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal; + info2.rowskip = sizeof(b3SolverConstraint)/sizeof(b3Scalar);//check this + ///the size of b3SolverConstraint needs be a multiple of b3Scalar + b3Assert(info2.rowskip*sizeof(b3Scalar)== sizeof(b3SolverConstraint)); + info2.m_constraintError = ¤tConstraintRow->m_rhs; + currentConstraintRow->m_cfm = infoGlobal.m_globalCfm; + info2.m_damping = infoGlobal.m_damping; + info2.cfm = ¤tConstraintRow->m_cfm; + info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit; + info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; + info2.m_numIterations = infoGlobal.m_numIterations; + constraints[i]->getInfo2(&info2,bodies); + + ///finalize the constraint setup + for ( j=0;j=constraints[i]->getBreakingImpulseThreshold()) + { + solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold(); + } + + if (solverConstraint.m_lowerLimit<=-constraints[i]->getBreakingImpulseThreshold()) + { + solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold(); + } + + solverConstraint.m_originalContactPoint = constraint; + + b3Matrix3x3& invInertiaWorldA= inertias[constraint->getRigidBodyA()].m_invInertiaWorld; + { + + //b3Vector3 angularFactorA(1,1,1); + const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal; + solverConstraint.m_angularComponentA = invInertiaWorldA*ftorqueAxis1;//*angularFactorA; + } + + b3Matrix3x3& invInertiaWorldB= inertias[constraint->getRigidBodyB()].m_invInertiaWorld; + { + + const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal; + solverConstraint.m_angularComponentB = invInertiaWorldB*ftorqueAxis2;//*constraint->getRigidBodyB().getAngularFactor(); + } + + { + //it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal + //because it gets multiplied iMJlB + b3Vector3 iMJlA = solverConstraint.m_contactNormal*rbA.getInvMass(); + b3Vector3 iMJaA = invInertiaWorldA*solverConstraint.m_relpos1CrossNormal; + b3Vector3 iMJlB = solverConstraint.m_contactNormal*rbB.getInvMass();//sign of normal? + b3Vector3 iMJaB = invInertiaWorldB*solverConstraint.m_relpos2CrossNormal; + + b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal); + sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal); + sum += iMJlB.dot(solverConstraint.m_contactNormal); + sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal); + b3Scalar fsum = b3Fabs(sum); + b3Assert(fsum > B3_EPSILON); + solverConstraint.m_jacDiagABInv = fsum>B3_EPSILON?b3Scalar(1.)/sum : 0.f; + } + + + ///fix rhs + ///todo: add force/torque accelerators + { + b3Scalar rel_vel; + b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel); + b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel); + + rel_vel = vel1Dotn+vel2Dotn; + + b3Scalar restitution = 0.f; + b3Scalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2 + b3Scalar velocityError = restitution - rel_vel * info2.m_damping; + b3Scalar penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv; + b3Scalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv; + solverConstraint.m_rhs = penetrationImpulse+velocityImpulse; + solverConstraint.m_appliedImpulse = 0.f; + + } + } + } + currentRow+=m_tmpConstraintSizesPool[i].m_numConstraintRows; + } +#endif //DISABLE_JOINTS + } + + + { + int i; + + for (i=0;i bodyUsed; +static b3AlignedObjectArray curUsed; + + + +inline int b3GpuPgsJacobiSolver::sortConstraintByBatch3( b3BatchConstraint* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) +{ + int sz = sizeof(b3BatchConstraint); + + B3_PROFILE("sortConstraintByBatch3"); + + static int maxSwaps = 0; + int numSwaps = 0; + + curUsed.resize(2*simdWidth); + + static int maxNumConstraints = 0; + if (maxNumConstraintsm_device = device; m_data->m_queue = q; - m_data->m_solver = new b3PgsJacobiSolver(true); + m_data->m_solver = new b3GpuPgsJacobiSolver(true);//new b3PgsJacobiSolver(true); m_data->m_allAabbsGPU = new b3OpenCLArray(ctx,q,config.m_maxConvexBodies); m_data->m_overlappingPairsGPU = new b3OpenCLArray(ctx,q,config.m_maxBroadphasePairs); @@ -226,11 +228,11 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) gpuBodies.copyToHost(hostBodies); b3AlignedObjectArray hostInertias; gpuInertias.copyToHost(hostInertias); - b3AlignedObjectArray hostContacts; - gpuContacts.copyToHost(hostContacts); + // b3AlignedObjectArray hostContacts; + //gpuContacts.copyToHost(hostContacts); { b3TypedConstraint** joints = numJoints? &m_data->m_joints[0] : 0; - b3Contact4* contacts = numContacts? &hostContacts[0]: 0; +// b3Contact4* contacts = numContacts? &hostContacts[0]: 0; //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,contacts,numJoints, joints); m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(),&hostBodies[0],&hostInertias[0],0,0,numJoints, joints);