diff --git a/Extras/BulletMultiThreaded/CMakeLists.txt b/Extras/BulletMultiThreaded/CMakeLists.txt index 3ae2a40fe..dc602b5b1 100644 --- a/Extras/BulletMultiThreaded/CMakeLists.txt +++ b/Extras/BulletMultiThreaded/CMakeLists.txt @@ -21,6 +21,8 @@ ADD_LIBRARY(LibBulletMultiThreaded SpuSampleTaskProcess.h SpuSampleTaskProcess.cpp + SpuCollisionObjectWrapper.cpp + SpuCollisionObjectWrapper.h SpuCollisionTaskProcess.h SpuCollisionTaskProcess.cpp SpuGatheringCollisionDispatcher.h @@ -39,15 +41,20 @@ ADD_LIBRARY(LibBulletMultiThreaded SpuNarrowPhaseCollisionTask/SpuVoronoiSimplexSolver.h SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.cpp SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.h - SpuNarrowPhaseCollisionTask/SpuLocalSupport.h + SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp + SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h SpuParallelSolver.cpp SpuParallelSolver.h SpuSolverTask/SpuParallellSolverTask.cpp SpuSolverTask/SpuParallellSolverTask.h -# SpuRaycastTaskProcess.cpp -# SpuRaycastTaskProcess.h -# SpuRaycastTask/SpuRaycastTask.cpp -# SpuRaycastTask/SpuRaycastTask.h + SpuBatchRaycaster.cpp + SpuBatchRaycaster.h + SpuRaycastTaskProcess.cpp + SpuRaycastTaskProcess.h + SpuRaycastTask/SpuRaycastTask.cpp + SpuRaycastTask/SpuRaycastTask.h + SpuRaycastTask/SpuSubSimplexConvexCast.cpp + SpuRaycastTask/SpuSubSimplexConvexCast.h ) diff --git a/Extras/BulletMultiThreaded/SequentialThreadSupport.cpp b/Extras/BulletMultiThreaded/SequentialThreadSupport.cpp index b7158cd61..32447299e 100644 --- a/Extras/BulletMultiThreaded/SequentialThreadSupport.cpp +++ b/Extras/BulletMultiThreaded/SequentialThreadSupport.cpp @@ -55,7 +55,6 @@ void SequentialThreadSupport::sendRequest(uint32_t uiCommand, uint32_t uiArgumen } - ///check for messages from SPUs void SequentialThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) { @@ -65,8 +64,6 @@ void SequentialThreadSupport::waitForResponse(unsigned int *puiArgument0, unsign *puiArgument1 = spuStatus.m_status; } - - void SequentialThreadSupport::startThreads(SequentialThreadConstructionInfo& threadConstructionInfo) { m_activeSpuStatus.resize(1); @@ -78,7 +75,7 @@ void SequentialThreadSupport::startThreads(SequentialThreadConstructionInfo& thr spuStatus.m_status = 0; spuStatus.m_lsMemory = threadConstructionInfo.m_lsMemoryFunc(); spuStatus.m_userThreadFunc = threadConstructionInfo.m_userThreadFunc; - printf("STS: Created local store at %p for function %p\n",spuStatus.m_lsMemory, spuStatus.m_userThreadFunc); + printf("STS: Created local store at %p for task %s\n", spuStatus.m_lsMemory, threadConstructionInfo.m_uniqueName); } void SequentialThreadSupport::startSPU() diff --git a/Extras/BulletMultiThreaded/SpuBatchRaycaster.cpp b/Extras/BulletMultiThreaded/SpuBatchRaycaster.cpp index d03944a26..dd7c76ca0 100644 --- a/Extras/BulletMultiThreaded/SpuBatchRaycaster.cpp +++ b/Extras/BulletMultiThreaded/SpuBatchRaycaster.cpp @@ -39,7 +39,7 @@ void SpuBatchRaycaster::addRay (const btVector3& rayFrom, const btVector3& rayTo) { SpuRaycastTaskWorkUnitOut workUnitOut; - workUnitOut.hitFraction = 0.99; + workUnitOut.hitFraction = 1.0; workUnitOut.hitNormal = btVector3(0.0, 1.0, 0.0); rayBatchOutput.push_back (workUnitOut); diff --git a/Extras/BulletMultiThreaded/SpuCollisionObjectWrapper.h b/Extras/BulletMultiThreaded/SpuCollisionObjectWrapper.h index 3b069a34a..840c0e4b3 100644 --- a/Extras/BulletMultiThreaded/SpuCollisionObjectWrapper.h +++ b/Extras/BulletMultiThreaded/SpuCollisionObjectWrapper.h @@ -16,7 +16,7 @@ subject to the following restrictions: #include "PlatformDefinitions.h" #include "BulletCollision/CollisionDispatch/btCollisionObject.h" -class SpuCollisionObjectWrapper +ATTRIBUTE_ALIGNED16(class) SpuCollisionObjectWrapper { protected: int m_shapeType; diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp index 344e5c9c0..a190ec7f1 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp @@ -1,221 +1,221 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "SpuCollisionShapes.h" - -btPoint3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData)//, int *featureIndex) -{ - switch (shapeType) - { - case SPHERE_SHAPE_PROXYTYPE: - { - return btPoint3(0,0,0); - } - case BOX_SHAPE_PROXYTYPE: - { -// spu_printf("SPU: getSupport BOX_SHAPE_PROXYTYPE\n"); - btConvexInternalShape* convexShape = (btConvexInternalShape*)shape; - const btVector3& halfExtents = convexShape->getImplicitShapeDimensions(); - - return btPoint3( - localDir.getX() < 0.0f ? -halfExtents.x() : halfExtents.x(), - localDir.getY() < 0.0f ? -halfExtents.y() : halfExtents.y(), - localDir.getZ() < 0.0f ? -halfExtents.z() : halfExtents.z()); - } - - case TRIANGLE_SHAPE_PROXYTYPE: - { - - btVector3 dir(localDir.getX(),localDir.getY(),localDir.getZ()); - btVector3* vertices = (btVector3*)shape; - btVector3 dots(dir.dot(vertices[0]), dir.dot(vertices[1]), dir.dot(vertices[2])); - btVector3 sup = vertices[dots.maxAxis()]; - return btPoint3(sup.getX(),sup.getY(),sup.getZ()); - break; - } - - case CYLINDER_SHAPE_PROXYTYPE: - { - btCylinderShape* cylShape = (btCylinderShape*)shape; - - //mapping of halfextents/dimension onto radius/height depends on how cylinder local orientation is (upAxis) - - btVector3 halfExtents = cylShape->getImplicitShapeDimensions(); - btVector3 v(localDir.getX(),localDir.getY(),localDir.getZ()); - - int cylinderUpAxis = cylShape->getUpAxis(); - int XX(1),YY(0),ZZ(2); - - switch (cylinderUpAxis) - { - case 0: - { - XX = 1; - YY = 0; - ZZ = 2; - break; - } - case 1: - { - XX = 0; - YY = 1; - ZZ = 2; - break; - } - case 2: - { - XX = 0; - YY = 2; - ZZ = 1; - break; - } - default: - btAssert(0); - //printf("SPU:localGetSupportingVertexWithoutMargin unknown Cylinder up-axis\n"); - }; - - btScalar radius = halfExtents[XX]; - btScalar halfHeight = halfExtents[cylinderUpAxis]; - - btVector3 tmp; - btScalar d ; - - btScalar s = btSqrt(v[XX] * v[XX] + v[ZZ] * v[ZZ]); - if (s != btScalar(0.0)) - { - d = radius / s; - tmp[XX] = v[XX] * d; - tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight; - tmp[ZZ] = v[ZZ] * d; - return btPoint3(tmp.getX(),tmp.getY(),tmp.getZ()); - } - else - { - tmp[XX] = radius; - tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight; - tmp[ZZ] = btScalar(0.0); - return btPoint3(tmp.getX(),tmp.getY(),tmp.getZ()); - } - } - - case CAPSULE_SHAPE_PROXYTYPE: - { - //spu_printf("SPU: todo: getSupport CAPSULE_SHAPE_PROXYTYPE\n"); - btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ()); - - btConvexInternalShape* cnvxShape = (btConvexInternalShape*)shape; - btVector3 halfExtents = cnvxShape->getImplicitShapeDimensions(); - btScalar halfHeight = halfExtents.getY(); - btScalar radius = halfExtents.getX(); - btVector3 supVec(0,0,0); - - btScalar maxDot(btScalar(-1e30)); - - btVector3 vec = vec0; - btScalar lenSqr = vec.length2(); - if (lenSqr < btScalar(0.0001)) - { - vec.setValue(1,0,0); - } else - { - btScalar rlen = btScalar(1.) / btSqrt(lenSqr ); - vec *= rlen; - } - btVector3 vtx; - btScalar newDot; - { - btVector3 pos(0,halfHeight,0); - vtx = pos +vec*(radius); - newDot = vec.dot(vtx); - if (newDot > maxDot) - { - maxDot = newDot; - supVec = vtx; - } - } - { - btVector3 pos(0,-halfHeight,0); - vtx = pos +vec*(radius); - newDot = vec.dot(vtx); - if (newDot > maxDot) - { - maxDot = newDot; - supVec = vtx; - } - } - return btPoint3(supVec.getX(),supVec.getY(),supVec.getZ()); - break; - }; - - case CONVEX_HULL_SHAPE_PROXYTYPE: - { - //spu_printf("SPU: todo: getSupport CONVEX_HULL_SHAPE_PROXYTYPE\n"); - - - - btPoint3* points = 0; - int numPoints = 0; - points = convexVertexData->gConvexPoints; - numPoints = convexVertexData->gNumConvexPoints; - - // spu_printf("numPoints = %d\n",numPoints); - - btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.)); - btScalar newDot,maxDot = btScalar(-1e30); - - btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ()); - btVector3 vec = vec0; - btScalar lenSqr = vec.length2(); - if (lenSqr < btScalar(0.0001)) - { - vec.setValue(1,0,0); - } else - { - btScalar rlen = btScalar(1.) / btSqrt(lenSqr ); - vec *= rlen; - } - - - for (int i=0;i maxDot) - { - maxDot = newDot; - supVec = vtx; - } - } - return btPoint3(supVec.getX(),supVec.getY(),supVec.getZ()); - - break; - }; - - default: - - //spu_printf("SPU:(type %i) missing support function\n",shapeType); - - -#if __ASSERT - spu_printf("localGetSupportingVertexWithoutMargin() - Unsupported bound type: %d.\n", shapeType); -#endif // __ASSERT - return btPoint3(0.f, 0.f, 0.f); - } -} - +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SpuCollisionShapes.h" + +btPoint3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData)//, int *featureIndex) +{ + switch (shapeType) + { + case SPHERE_SHAPE_PROXYTYPE: + { + return btPoint3(0,0,0); + } + case BOX_SHAPE_PROXYTYPE: + { +// spu_printf("SPU: getSupport BOX_SHAPE_PROXYTYPE\n"); + btConvexInternalShape* convexShape = (btConvexInternalShape*)shape; + const btVector3& halfExtents = convexShape->getImplicitShapeDimensions(); + + return btPoint3( + localDir.getX() < 0.0f ? -halfExtents.x() : halfExtents.x(), + localDir.getY() < 0.0f ? -halfExtents.y() : halfExtents.y(), + localDir.getZ() < 0.0f ? -halfExtents.z() : halfExtents.z()); + } + + case TRIANGLE_SHAPE_PROXYTYPE: + { + + btVector3 dir(localDir.getX(),localDir.getY(),localDir.getZ()); + btVector3* vertices = (btVector3*)shape; + btVector3 dots(dir.dot(vertices[0]), dir.dot(vertices[1]), dir.dot(vertices[2])); + btVector3 sup = vertices[dots.maxAxis()]; + return btPoint3(sup.getX(),sup.getY(),sup.getZ()); + break; + } + + case CYLINDER_SHAPE_PROXYTYPE: + { + btCylinderShape* cylShape = (btCylinderShape*)shape; + + //mapping of halfextents/dimension onto radius/height depends on how cylinder local orientation is (upAxis) + + btVector3 halfExtents = cylShape->getImplicitShapeDimensions(); + btVector3 v(localDir.getX(),localDir.getY(),localDir.getZ()); + + int cylinderUpAxis = cylShape->getUpAxis(); + int XX(1),YY(0),ZZ(2); + + switch (cylinderUpAxis) + { + case 0: + { + XX = 1; + YY = 0; + ZZ = 2; + break; + } + case 1: + { + XX = 0; + YY = 1; + ZZ = 2; + break; + } + case 2: + { + XX = 0; + YY = 2; + ZZ = 1; + break; + } + default: + btAssert(0); + //printf("SPU:localGetSupportingVertexWithoutMargin unknown Cylinder up-axis\n"); + }; + + btScalar radius = halfExtents[XX]; + btScalar halfHeight = halfExtents[cylinderUpAxis]; + + btVector3 tmp; + btScalar d ; + + btScalar s = btSqrt(v[XX] * v[XX] + v[ZZ] * v[ZZ]); + if (s != btScalar(0.0)) + { + d = radius / s; + tmp[XX] = v[XX] * d; + tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight; + tmp[ZZ] = v[ZZ] * d; + return btPoint3(tmp.getX(),tmp.getY(),tmp.getZ()); + } + else + { + tmp[XX] = radius; + tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight; + tmp[ZZ] = btScalar(0.0); + return btPoint3(tmp.getX(),tmp.getY(),tmp.getZ()); + } + } + + case CAPSULE_SHAPE_PROXYTYPE: + { + //spu_printf("SPU: todo: getSupport CAPSULE_SHAPE_PROXYTYPE\n"); + btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ()); + + btConvexInternalShape* cnvxShape = (btConvexInternalShape*)shape; + btVector3 halfExtents = cnvxShape->getImplicitShapeDimensions(); + btScalar halfHeight = halfExtents.getY(); + btScalar radius = halfExtents.getX(); + btVector3 supVec(0,0,0); + + btScalar maxDot(btScalar(-1e30)); + + btVector3 vec = vec0; + btScalar lenSqr = vec.length2(); + if (lenSqr < btScalar(0.0001)) + { + vec.setValue(1,0,0); + } else + { + btScalar rlen = btScalar(1.) / btSqrt(lenSqr ); + vec *= rlen; + } + btVector3 vtx; + btScalar newDot; + { + btVector3 pos(0,halfHeight,0); + vtx = pos +vec*(radius); + newDot = vec.dot(vtx); + if (newDot > maxDot) + { + maxDot = newDot; + supVec = vtx; + } + } + { + btVector3 pos(0,-halfHeight,0); + vtx = pos +vec*(radius); + newDot = vec.dot(vtx); + if (newDot > maxDot) + { + maxDot = newDot; + supVec = vtx; + } + } + return btPoint3(supVec.getX(),supVec.getY(),supVec.getZ()); + break; + }; + + case CONVEX_HULL_SHAPE_PROXYTYPE: + { + //spu_printf("SPU: todo: getSupport CONVEX_HULL_SHAPE_PROXYTYPE\n"); + + + + btPoint3* points = 0; + int numPoints = 0; + points = convexVertexData->gConvexPoints; + numPoints = convexVertexData->gNumConvexPoints; + + // spu_printf("numPoints = %d\n",numPoints); + + btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.)); + btScalar newDot,maxDot = btScalar(-1e30); + + btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ()); + btVector3 vec = vec0; + btScalar lenSqr = vec.length2(); + if (lenSqr < btScalar(0.0001)) + { + vec.setValue(1,0,0); + } else + { + btScalar rlen = btScalar(1.) / btSqrt(lenSqr ); + vec *= rlen; + } + + + for (int i=0;i maxDot) + { + maxDot = newDot; + supVec = vtx; + } + } + return btPoint3(supVec.getX(),supVec.getY(),supVec.getZ()); + + break; + }; + + default: + + //spu_printf("SPU:(type %i) missing support function\n",shapeType); + + +#if __ASSERT + spu_printf("localGetSupportingVertexWithoutMargin() - Unsupported bound type: %d.\n", shapeType); +#endif // __ASSERT + return btPoint3(0.f, 0.f, 0.f); + } +} + void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape* convexShape, ppu_address_t convexShapePtr, int shapeType, btTransform xform) { //calculate the aabb, given the types... @@ -390,7 +390,6 @@ void dmaConvexVertexData (SpuConvexPolyhedronVertexData* convexVertexData, btCon register int dmaSize = convexVertexData->gNumConvexPoints*sizeof(btPoint3); ppu_address_t pointsPPU = (ppu_address_t) convexShapeSPU->getPoints(); cellDmaGet(&convexVertexData->g_convexPointBuffer[0], pointsPPU , dmaSize, DMA_TAG(2), 0, 0); - } void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionShapePtr, uint32_t dmaTag, int shapeType) @@ -422,6 +421,7 @@ void dmaCompoundSubShapes (CompoundShape_LocalStoreMemory* compoundShapeLocation } } + void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex) { diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp index 31b05123d..7ebc54da9 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp @@ -1,36 +1,36 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "SpuContactResult.h" - - -//#define DEBUG_SPU_COLLISION_DETECTION 1 - - -SpuContactResult::SpuContactResult() -{ - m_manifoldAddress = 0; - m_spuManifold = NULL; - m_RequiresWriteBack = false; -} - - SpuContactResult::~SpuContactResult() -{ - g_manifoldDmaExport.swapBuffers(); -} - +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SpuContactResult.h" + + +//#define DEBUG_SPU_COLLISION_DETECTION 1 + + +SpuContactResult::SpuContactResult() +{ + m_manifoldAddress = 0; + m_spuManifold = NULL; + m_RequiresWriteBack = false; +} + + SpuContactResult::~SpuContactResult() +{ + g_manifoldDmaExport.swapBuffers(); +} + ///User can override this material combiner by implementing gContactAddedCallback and setting body0->m_collisionFlags |= btCollisionObject::customMaterialCallback; inline btScalar calculateCombinedFriction(btScalar friction0,btScalar friction1) { @@ -50,179 +50,179 @@ inline btScalar calculateCombinedRestitution(btScalar restitution0,btScalar rest { return restitution0*restitution1; } - - - - void SpuContactResult::setContactInfo(btPersistentManifold* spuManifold, uint64_t manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction1, bool isSwapped) - { - //spu_printf("SpuContactResult::setContactInfo ManifoldAddress: %lu\n", manifoldAddress); - m_rootWorldTransform0 = worldTrans0; - m_rootWorldTransform1 = worldTrans1; - m_manifoldAddress = manifoldAddress; - m_spuManifold = spuManifold; - - m_combinedFriction = calculateCombinedFriction(friction0,friction1); - m_combinedRestitution = calculateCombinedRestitution(restitution0,restitution1); - m_isSwapped = isSwapped; - } - - void SpuContactResult::setShapeIdentifiers(int partId0,int index0, int partId1,int index1) - { - - } - - - - ///return true if it requires a dma transfer back -bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld, - const btVector3& pointInWorld, - float depth, - btPersistentManifold* manifoldPtr, - btTransform& transA, - btTransform& transB, - btScalar combinedFriction, - btScalar combinedRestitution, - bool isSwapped) -{ - - float contactTreshold = manifoldPtr->getContactBreakingThreshold(); - - //spu_printf("SPU: add contactpoint, depth:%f, contactTreshold %f, manifoldPtr %llx\n",depth,contactTreshold,manifoldPtr); - -#ifdef DEBUG_SPU_COLLISION_DETECTION - spu_printf("SPU: contactTreshold %f\n",contactTreshold); -#endif //DEBUG_SPU_COLLISION_DETECTION - if (depth > manifoldPtr->getContactBreakingThreshold()) - return false; - - //provide inverses or just calculate? - btTransform transAInv = transA.inverse();//m_body0->m_cachedInvertedWorldTransform; - btTransform transBInv= transB.inverse();//m_body1->m_cachedInvertedWorldTransform; - - btVector3 pointA; - btVector3 localA; - btVector3 localB; - btVector3 normal; - - if (isSwapped) - { - normal = normalOnBInWorld * -1; - pointA = pointInWorld + normal * depth; - localA = transAInv(pointA ); - localB = transBInv(pointInWorld); - /*localA = transBInv(pointA ); - localB = transAInv(pointInWorld);*/ - } - else - { - normal = normalOnBInWorld; - pointA = pointInWorld + normal * depth; - localA = transAInv(pointA ); - localB = transBInv(pointInWorld); - } - - btManifoldPoint newPt(localA,localB,normal,depth); - - int insertIndex = manifoldPtr->getCacheEntry(newPt); - if (insertIndex >= 0) - { -// manifoldPtr->replaceContactPoint(newPt,insertIndex); -// return true; - -#ifdef DEBUG_SPU_COLLISION_DETECTION - spu_printf("SPU: same contact detected, nothing done\n"); -#endif //DEBUG_SPU_COLLISION_DETECTION - // This is not needed, just use the old info! saves a DMA transfer as well - } else - { - - newPt.m_combinedFriction = combinedFriction; - newPt.m_combinedRestitution = combinedRestitution; - - /* - //potential TODO: SPU callbacks, either immediate (local on the SPU), or deferred - //User can override friction and/or restitution - if (gContactAddedCallback && - //and if either of the two bodies requires custom material - ((m_body0->m_collisionFlags & btCollisionObject::customMaterialCallback) || - (m_body1->m_collisionFlags & btCollisionObject::customMaterialCallback))) - { - //experimental feature info, for per-triangle material etc. - (*gContactAddedCallback)(newPt,m_body0,m_partId0,m_index0,m_body1,m_partId1,m_index1); - } - */ - manifoldPtr->AddManifoldPoint(newPt); - return true; - - } - return false; - -} - - -void SpuContactResult::writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold) -{ - memcpy(g_manifoldDmaExport.getFront(),lsManifold,sizeof(btPersistentManifold)); - - g_manifoldDmaExport.swapBuffers(); - uint64_t mmAddr = (uint32_t)mmManifold; - g_manifoldDmaExport.backBufferDmaPut(mmAddr, sizeof(btPersistentManifold), DMA_TAG(9)); - // Should there be any kind of wait here? What if somebody tries to use this tag again? What if we call this function again really soon? - //no, the swapBuffers does the wait -} - -void SpuContactResult::addContactPoint(const btVector3& normalOnBInWorld,const btPoint3& pointInWorld,float depth) -{ - //spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth); - -#ifdef DEBUG_SPU_COLLISION_DETECTION - // int sman = sizeof(rage::phManifold); -// spu_printf("sizeof_manifold = %i\n",sman); -#endif //DEBUG_SPU_COLLISION_DETECTION - - btPersistentManifold* localManifold = m_spuManifold; - - btVector3 normalB(normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ()); - btVector3 pointWrld(pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ()); - - //process the contact point - const bool retVal = ManifoldResultAddContactPoint(normalB, - pointWrld, - depth, - localManifold, - m_rootWorldTransform0, - m_rootWorldTransform1, - m_combinedFriction, - m_combinedRestitution, - m_isSwapped); - m_RequiresWriteBack = m_RequiresWriteBack || retVal; -} - -void SpuContactResult::flush() -{ - - if (m_spuManifold && m_spuManifold->getNumContacts()) - { - m_spuManifold->refreshContactPoints(m_rootWorldTransform0,m_rootWorldTransform1); - m_RequiresWriteBack = true; - } - - - if (m_RequiresWriteBack) - { -#ifdef DEBUG_SPU_COLLISION_DETECTION - spu_printf("SPU: Start SpuContactResult::flush (Put) DMA\n"); - spu_printf("Num contacts:%d\n", m_spuManifold->getNumContacts()); - spu_printf("Manifold address: %llu\n", m_manifoldAddress); -#endif //DEBUG_SPU_COLLISION_DETECTION - // spu_printf("writeDoubleBufferedManifold\n"); - writeDoubleBufferedManifold(m_spuManifold, (btPersistentManifold*)m_manifoldAddress); -#ifdef DEBUG_SPU_COLLISION_DETECTION - spu_printf("SPU: Finished (Put) DMA\n"); -#endif //DEBUG_SPU_COLLISION_DETECTION - } - m_spuManifold = NULL; - m_RequiresWriteBack = false; -} - - + + + + void SpuContactResult::setContactInfo(btPersistentManifold* spuManifold, ppu_address_t manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction1, bool isSwapped) + { + //spu_printf("SpuContactResult::setContactInfo ManifoldAddress: %lu\n", manifoldAddress); + m_rootWorldTransform0 = worldTrans0; + m_rootWorldTransform1 = worldTrans1; + m_manifoldAddress = manifoldAddress; + m_spuManifold = spuManifold; + + m_combinedFriction = calculateCombinedFriction(friction0,friction1); + m_combinedRestitution = calculateCombinedRestitution(restitution0,restitution1); + m_isSwapped = isSwapped; + } + + void SpuContactResult::setShapeIdentifiers(int partId0,int index0, int partId1,int index1) + { + + } + + + + ///return true if it requires a dma transfer back +bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld, + const btVector3& pointInWorld, + float depth, + btPersistentManifold* manifoldPtr, + btTransform& transA, + btTransform& transB, + btScalar combinedFriction, + btScalar combinedRestitution, + bool isSwapped) +{ + + float contactTreshold = manifoldPtr->getContactBreakingThreshold(); + + //spu_printf("SPU: add contactpoint, depth:%f, contactTreshold %f, manifoldPtr %llx\n",depth,contactTreshold,manifoldPtr); + +#ifdef DEBUG_SPU_COLLISION_DETECTION + spu_printf("SPU: contactTreshold %f\n",contactTreshold); +#endif //DEBUG_SPU_COLLISION_DETECTION + if (depth > manifoldPtr->getContactBreakingThreshold()) + return false; + + //provide inverses or just calculate? + btTransform transAInv = transA.inverse();//m_body0->m_cachedInvertedWorldTransform; + btTransform transBInv= transB.inverse();//m_body1->m_cachedInvertedWorldTransform; + + btVector3 pointA; + btVector3 localA; + btVector3 localB; + btVector3 normal; + + if (isSwapped) + { + normal = normalOnBInWorld * -1; + pointA = pointInWorld + normal * depth; + localA = transAInv(pointA ); + localB = transBInv(pointInWorld); + /*localA = transBInv(pointA ); + localB = transAInv(pointInWorld);*/ + } + else + { + normal = normalOnBInWorld; + pointA = pointInWorld + normal * depth; + localA = transAInv(pointA ); + localB = transBInv(pointInWorld); + } + + btManifoldPoint newPt(localA,localB,normal,depth); + + int insertIndex = manifoldPtr->getCacheEntry(newPt); + if (insertIndex >= 0) + { +// manifoldPtr->replaceContactPoint(newPt,insertIndex); +// return true; + +#ifdef DEBUG_SPU_COLLISION_DETECTION + spu_printf("SPU: same contact detected, nothing done\n"); +#endif //DEBUG_SPU_COLLISION_DETECTION + // This is not needed, just use the old info! saves a DMA transfer as well + } else + { + + newPt.m_combinedFriction = combinedFriction; + newPt.m_combinedRestitution = combinedRestitution; + + /* + //potential TODO: SPU callbacks, either immediate (local on the SPU), or deferred + //User can override friction and/or restitution + if (gContactAddedCallback && + //and if either of the two bodies requires custom material + ((m_body0->m_collisionFlags & btCollisionObject::customMaterialCallback) || + (m_body1->m_collisionFlags & btCollisionObject::customMaterialCallback))) + { + //experimental feature info, for per-triangle material etc. + (*gContactAddedCallback)(newPt,m_body0,m_partId0,m_index0,m_body1,m_partId1,m_index1); + } + */ + manifoldPtr->AddManifoldPoint(newPt); + return true; + + } + return false; + +} + + +void SpuContactResult::writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold) +{ + memcpy(g_manifoldDmaExport.getFront(),lsManifold,sizeof(btPersistentManifold)); + + g_manifoldDmaExport.swapBuffers(); + uint64_t mmAddr = (uint32_t)mmManifold; + g_manifoldDmaExport.backBufferDmaPut(mmAddr, sizeof(btPersistentManifold), DMA_TAG(9)); + // Should there be any kind of wait here? What if somebody tries to use this tag again? What if we call this function again really soon? + //no, the swapBuffers does the wait +} + +void SpuContactResult::addContactPoint(const btVector3& normalOnBInWorld,const btPoint3& pointInWorld,float depth) +{ + //spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth); + +#ifdef DEBUG_SPU_COLLISION_DETECTION + // int sman = sizeof(rage::phManifold); +// spu_printf("sizeof_manifold = %i\n",sman); +#endif //DEBUG_SPU_COLLISION_DETECTION + + btPersistentManifold* localManifold = m_spuManifold; + + btVector3 normalB(normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ()); + btVector3 pointWrld(pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ()); + + //process the contact point + const bool retVal = ManifoldResultAddContactPoint(normalB, + pointWrld, + depth, + localManifold, + m_rootWorldTransform0, + m_rootWorldTransform1, + m_combinedFriction, + m_combinedRestitution, + m_isSwapped); + m_RequiresWriteBack = m_RequiresWriteBack || retVal; +} + +void SpuContactResult::flush() +{ + + if (m_spuManifold && m_spuManifold->getNumContacts()) + { + m_spuManifold->refreshContactPoints(m_rootWorldTransform0,m_rootWorldTransform1); + m_RequiresWriteBack = true; + } + + + if (m_RequiresWriteBack) + { +#ifdef DEBUG_SPU_COLLISION_DETECTION + spu_printf("SPU: Start SpuContactResult::flush (Put) DMA\n"); + spu_printf("Num contacts:%d\n", m_spuManifold->getNumContacts()); + spu_printf("Manifold address: %llu\n", m_manifoldAddress); +#endif //DEBUG_SPU_COLLISION_DETECTION + // spu_printf("writeDoubleBufferedManifold\n"); + writeDoubleBufferedManifold(m_spuManifold, (btPersistentManifold*)m_manifoldAddress); +#ifdef DEBUG_SPU_COLLISION_DETECTION + spu_printf("SPU: Finished (Put) DMA\n"); +#endif //DEBUG_SPU_COLLISION_DETECTION + } + m_spuManifold = NULL; + m_RequiresWriteBack = false; +} + + diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h index fb69a5516..072212e34 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h @@ -35,10 +35,10 @@ subject to the following restrictions: struct SpuCollisionPairInput { - uint64_t m_collisionShapes[2]; + ppu_address_t m_collisionShapes[2]; void* m_spuCollisionShapes[2]; - uint64_t m_persistentManifoldPtr; + ppu_address_t m_persistentManifoldPtr; btVector3 m_primitiveDimensions0; btVector3 m_primitiveDimensions1; int m_shapeType0; @@ -50,9 +50,6 @@ struct SpuCollisionPairInput btTransform m_worldTransform1; bool m_isSwapped; - - - }; @@ -68,7 +65,7 @@ struct SpuClosestPointInput btTransform m_transformB; float m_maximumDistanceSquared; class btStackAlloc* m_stackAlloc; - struct SpuConvexPolyhedronVertexData* m_convexVertexData; + struct SpuConvexPolyhedronVertexData* m_convexVertexData[2]; }; ///SpuContactResult exports the contact points using double-buffered DMA transfers, only when needed @@ -77,7 +74,7 @@ class SpuContactResult { btTransform m_rootWorldTransform0; btTransform m_rootWorldTransform1; - uint64_t m_manifoldAddress; + ppu_address_t m_manifoldAddress; btPersistentManifold* m_spuManifold; bool m_RequiresWriteBack; @@ -99,7 +96,7 @@ class SpuContactResult virtual void setShapeIdentifiers(int partId0,int index0, int partId1,int index1); - void setContactInfo(btPersistentManifold* spuManifold, uint64_t manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction01, bool isSwapped); + void setContactInfo(btPersistentManifold* spuManifold, ppu_address_t manifoldAddress,const btTransform& worldTrans0,const btTransform& worldTrans1, btScalar restitution0,btScalar restitution1, btScalar friction0,btScalar friction01, bool isSwapped); void writeDoubleBufferedManifold(btPersistentManifold* lsManifold, btPersistentManifold* mmManifold); diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h index 6152851f2..2a18fa2ba 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h @@ -39,7 +39,8 @@ public: btTransform& transA,const btTransform& transB, btVector3& v, btPoint3& pa, btPoint3& pb, class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc, - struct SpuConvexPolyhedronVertexData* convexVertexData + struct SpuConvexPolyhedronVertexData* convexVertexDataA, + struct SpuConvexPolyhedronVertexData* convexVertexDataB ) const = 0; diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp index 11c8227f4..516b632b6 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp @@ -26,7 +26,7 @@ #include "SpuGjkPairDetector.h" #include "SpuVoronoiSimplexSolver.h" -#include "SpuLocalSupport.h" //definition of SpuConvexPolyhedronVertexData +#include "SpuCollisionShapes.h" //definition of SpuConvexPolyhedronVertexData #ifdef __CELLOS_LV2__ ///Software caching from the IBM Cell SDK, it reduces 25% SPU time for our test cases @@ -92,16 +92,11 @@ int g_CacheHits=0; #include #endif -#define MAX_SHAPE_SIZE 256 - //int gNumConvexPoints0=0; - - ///Make sure no destructors are called on this memory struct CollisionTask_LocalStoreMemory { - ATTRIBUTE_ALIGNED16(char bufferProxy0[16]); ATTRIBUTE_ALIGNED16(char bufferProxy1[16]); @@ -138,41 +133,16 @@ struct CollisionTask_LocalStoreMemory } btPersistentManifold gPersistentManifold; - ATTRIBUTE_ALIGNED16(char gCollisionShape0[MAX_SHAPE_SIZE]); - ATTRIBUTE_ALIGNED16(char gCollisionShape1[MAX_SHAPE_SIZE]); + CollisionShape_LocalStoreMemory gCollisionShapes[2]; ATTRIBUTE_ALIGNED16(int spuIndices[16]); - //ATTRIBUTE_ALIGNED16(btOptimizedBvh gOptimizedBvh); - ATTRIBUTE_ALIGNED16(char gOptimizedBvh[sizeof(btOptimizedBvh)+16]); - btOptimizedBvh* getOptimizedBvh() - { - return (btOptimizedBvh*) gOptimizedBvh; - } - - ATTRIBUTE_ALIGNED16(btTriangleIndexVertexArray gTriangleMeshInterfaceStorage); - btTriangleIndexVertexArray* gTriangleMeshInterfacePtr; - ///only a single mesh part for now, we can add support for multiple parts, but quantized trees don't support this at the moment - ATTRIBUTE_ALIGNED16(btIndexedMesh gIndexMesh); - -#define MAX_SPU_SUBTREE_HEADERS 32 - //1024 - ATTRIBUTE_ALIGNED16(btBvhSubtreeInfo gSubtreeHeaders[MAX_SPU_SUBTREE_HEADERS]); - ATTRIBUTE_ALIGNED16(btQuantizedBvhNode gSubtreeNodes[MAX_SUBTREE_SIZE_IN_BYTES/sizeof(btQuantizedBvhNode)]); - - SpuConvexPolyhedronVertexData convexVertexData; - - // Compound data -#define MAX_SPU_COMPOUND_SUBSHAPES 16 - ATTRIBUTE_ALIGNED16(btCompoundShapeChild gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES*2]); - ATTRIBUTE_ALIGNED16(char gSubshapeShape[MAX_SPU_COMPOUND_SUBSHAPES*2][MAX_SHAPE_SIZE]); - + bvhMeshShape_LocalStoreMemory bvhShapeData; + SpuConvexPolyhedronVertexData convexVertexData[2]; + CompoundShape_LocalStoreMemory compoundShapeData[2]; }; - - - #if defined(__CELLOS_LV2__) || defined(USE_LIBSPE2) ATTRIBUTE_ALIGNED16(CollisionTask_LocalStoreMemory gLocalStoreMemory); @@ -189,73 +159,8 @@ void* createCollisionLocalStoreMemory() #endif - void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts); -#define USE_BRANCHFREE_TEST 1 -#ifdef USE_BRANCHFREE_TEST -SIMD_FORCE_INLINE unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(unsigned short int* aabbMin1,unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2) -{ - return btSelect((unsigned)((aabbMin1[0] <= aabbMax2[0]) & (aabbMax1[0] >= aabbMin2[0]) - & (aabbMin1[2] <= aabbMax2[2]) & (aabbMax1[2] >= aabbMin2[2]) - & (aabbMin1[1] <= aabbMax2[1]) & (aabbMax1[1] >= aabbMin2[1])), - 1, 0); -} -#else - -unsigned int spuTestQuantizedAabbAgainstQuantizedAabb(const unsigned short int* aabbMin1,const unsigned short int* aabbMax1,const unsigned short int* aabbMin2,const unsigned short int* aabbMax2) -{ - unsigned int overlap = 1; - overlap = (aabbMin1[0] > aabbMax2[0] || aabbMax1[0] < aabbMin2[0]) ? 0 : overlap; - overlap = (aabbMin1[2] > aabbMax2[2] || aabbMax1[2] < aabbMin2[2]) ? 0 : overlap; - overlap = (aabbMin1[1] > aabbMax2[1] || aabbMax1[1] < aabbMin2[1]) ? 0 : overlap; - return overlap; -} -#endif - - - -void spuWalkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode,int startNodeIndex,int endNodeIndex) -{ - - int curIndex = startNodeIndex; - int walkIterations = 0; - int subTreeSize = endNodeIndex - startNodeIndex; - - int escapeIndex; - - unsigned int aabbOverlap, isLeafNode; - - while (curIndex < endNodeIndex) - { - //catch bugs in tree data - assert (walkIterations < subTreeSize); - - walkIterations++; - aabbOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); - isLeafNode = rootNode->isLeafNode(); - - if (isLeafNode && aabbOverlap) - { - //printf("overlap with node %d\n",rootNode->getTriangleIndex()); - nodeCallback->processNode(0,rootNode->getTriangleIndex()); - // spu_printf("SPU: overlap detected with triangleIndex:%d\n",rootNode->getTriangleIndex()); - } - - if (aabbOverlap || isLeafNode) - { - rootNode++; - curIndex++; - } else - { - escapeIndex = rootNode->getEscapeIndex(); - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - -} - SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size) { @@ -271,7 +176,6 @@ SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t s #endif } - SIMD_FORCE_INLINE void small_cache_read_triple( void* ls0, ppu_address_t ea0, void* ls1, ppu_address_t ea1, void* ls2, ppu_address_t ea2, @@ -326,7 +230,7 @@ class spuNodeCallback : public btNodeOverlapCallback ATTRIBUTE_ALIGNED16(btVector3 spuTriangleVertices[3]); ATTRIBUTE_ALIGNED16(btScalar spuUnscaledVertex[4]); - ATTRIBUTE_ALIGNED16(int spuIndices[16]); + //ATTRIBUTE_ALIGNED16(int spuIndices[16]); public: @@ -346,7 +250,7 @@ public: - int* indexBasePtr = (int*)(m_lsMemPtr->gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->gIndexMesh.m_triangleIndexStride); + int* indexBasePtr = (int*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0], &m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1], @@ -358,13 +262,13 @@ public: // spu_printf("SPU index2=%d ,",spuIndices[2]); // spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr); - const btVector3& meshScaling = m_lsMemPtr->gTriangleMeshInterfacePtr->getScaling(); + const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling(); for (int j=2;btLikely( j>=0 );j--) { int graphicsindex = m_lsMemPtr->spuIndices[j]; // spu_printf("SPU index=%d ,",graphicsindex); - btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->gIndexMesh.m_vertexStride); + btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride); // spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr); @@ -405,38 +309,18 @@ public: }; - - //////////////////////// /// Convex versus Concave triangle mesh collision detection (handles concave triangle mesh versus sphere, box, cylinder, triangle, cone, convex polyhedron etc) /////////////////// void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts) { //order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite - - register int dmaSize; register ppu_address_t dmaPpuAddress2; btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)wuInput->m_spuCollisionShapes[1]; //need the mesh interface, for access to triangle vertices - - dmaSize = sizeof(btTriangleIndexVertexArray); - dmaPpuAddress2 = reinterpret_cast(trimeshShape->getMeshInterface()); - // spu_printf("trimeshShape->getMeshInterface() == %llx\n",dmaPpuAddress2); - lsMemPtr->gTriangleMeshInterfacePtr = (btTriangleIndexVertexArray*)cellDmaGetReadOnly(&lsMemPtr->gTriangleMeshInterfaceStorage, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(1)); - - - ///now DMA over the BVH - - dmaSize = sizeof(btOptimizedBvh); - dmaPpuAddress2 = reinterpret_cast(trimeshShape->getOptimizedBvh()); - //spu_printf("trimeshShape->getOptimizedBvh() == %llx\n",dmaPpuAddress2); - cellDmaGet(&lsMemPtr->gOptimizedBvh, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(2)); - cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); - + dmaBvhShapeData (&lsMemPtr->bvhShapeData, trimeshShape); btVector3 aabbMin(-1,-400,-1); btVector3 aabbMax(1,400,1); @@ -446,82 +330,9 @@ void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT btTransform convexInTriangleSpace; convexInTriangleSpace = wuInput->m_worldTransform1.inverse() * wuInput->m_worldTransform0; btConvexInternalShape* convexShape = (btConvexInternalShape*)wuInput->m_spuCollisionShapes[0]; - //calculate the aabb, given the types... - switch (wuInput->m_shapeType0) - { - case CYLINDER_SHAPE_PROXYTYPE: - case BOX_SHAPE_PROXYTYPE: - { - float margin=convexShape->getMarginNV(); - btVector3 halfExtents = convexShape->getImplicitShapeDimensions(); - btTransform& t = convexInTriangleSpace; - btMatrix3x3 abs_b = t.getBasis().absolute(); - btPoint3 center = t.getOrigin(); - btVector3 extent = btVector3(abs_b[0].dot(halfExtents), - abs_b[1].dot(halfExtents), - abs_b[2].dot(halfExtents)); - extent += btVector3(margin,margin,margin); - aabbMin = center - extent; - aabbMax = center + extent; - break; - } + computeAabb (aabbMin, aabbMax, convexShape, wuInput->m_collisionShapes[0], wuInput->m_shapeType0, convexInTriangleSpace); - case CAPSULE_SHAPE_PROXYTYPE: - { - float margin=convexShape->getMarginNV(); - btVector3 halfExtents = convexShape->getImplicitShapeDimensions(); - //add the radius to y-axis to get full height - btScalar radius = halfExtents[0]; - halfExtents[1] += radius; - btTransform& t = convexInTriangleSpace; - btMatrix3x3 abs_b = t.getBasis().absolute(); - btPoint3 center = t.getOrigin(); - btVector3 extent = btVector3(abs_b[0].dot(halfExtents), - abs_b[1].dot(halfExtents), - abs_b[2].dot(halfExtents)); - extent += btVector3(margin,margin,margin); - aabbMin = center - extent; - aabbMax = center + extent; - break; - } - - - case SPHERE_SHAPE_PROXYTYPE: - { - float radius = convexShape->getImplicitShapeDimensions().getX();// * convexShape->getLocalScaling().getX(); - float margin = radius + convexShape->getMarginNV(); - btTransform& t = convexInTriangleSpace; - const btVector3& center = t.getOrigin(); - btVector3 extent(margin,margin,margin); - aabbMin = center - extent; - aabbMax = center + extent; - break; - } - case CONVEX_HULL_SHAPE_PROXYTYPE: - { - dmaSize = sizeof(btConvexHullShape); - dmaPpuAddress2 = wuInput->m_collisionShapes[0]; - ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]); - - cellDmaGet(&convexHullShape0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); - cellDmaWaitTagStatusAll(DMA_MASK(1)); - btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0; - btTransform& t = convexInTriangleSpace; - - btScalar margin = convexShape->getMarginNV(); - - localPtr->getNonvirtualAabb(t,aabbMin,aabbMax,margin); - - //spu_printf("SPU convex aabbMin=%f,%f,%f=\n",aabbMin.getX(),aabbMin.getY(),aabbMin.getZ()); - //spu_printf("SPU convex aabbMax=%f,%f,%f=\n",aabbMax.getX(),aabbMax.getY(),aabbMax.getZ()); - - break; - } - - default: - spu_printf("SPU: unsupported shapetype %d in AABB calculation\n"); - }; //CollisionShape* triangleShape = static_cast(triBody->m_collisionShape); //convexShape->getAabb(convexInTriangleSpace,m_aabbMin,m_aabbMax); @@ -531,51 +342,38 @@ void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT // aabbMax += extra; // aabbMin -= extra; - - ///quantize query AABB unsigned short int quantizedQueryAabbMin[3]; unsigned short int quantizedQueryAabbMax[3]; - lsMemPtr->getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin,aabbMin); - lsMemPtr->getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax,aabbMax); + lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin,aabbMin); + lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax,aabbMax); - QuantizedNodeArray& nodeArray = lsMemPtr->getOptimizedBvh()->getQuantizedNodeArray(); + QuantizedNodeArray& nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray(); //spu_printf("SPU: numNodes = %d\n",nodeArray.size()); - BvhSubtreeInfoArray& subTrees = lsMemPtr->getOptimizedBvh()->getSubtreeInfoArray(); + BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray(); spuNodeCallback nodeCallback(wuInput,lsMemPtr,spuContacts); - IndexedMeshArray& indexArray = lsMemPtr->gTriangleMeshInterfacePtr->getIndexedMeshArray(); + IndexedMeshArray& indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray(); //spu_printf("SPU:indexArray.size() = %d\n",indexArray.size()); - // spu_printf("SPU: numSubTrees = %d\n",subTrees.size()); //not likely to happen if (subTrees.size() && indexArray.size() == 1) { ///DMA in the index info - - dmaSize = sizeof(btIndexedMesh); - dmaPpuAddress2 = reinterpret_cast(&indexArray[0]); - cellDmaGet(&lsMemPtr->gIndexMesh, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */); cellDmaWaitTagStatusAll(DMA_MASK(1)); - - //spu_printf("SPU gIndexMesh dma finished\n"); - //display the headers int numBatch = subTrees.size(); for (int i=0;i(&subTrees[i]); - // spu_printf("&subtree[i]=%llx, dmaSize = %d\n",dmaPpuAddress2,dmaSize); - cellDmaGet(&lsMemPtr->gSubtreeHeaders[0], dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + dmaBvhSubTreeHeaders (&lsMemPtr->bvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1); cellDmaWaitTagStatusAll(DMA_MASK(1)); @@ -583,7 +381,7 @@ void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT for (int j=0;jgSubtreeHeaders[j]; + const btBvhSubtreeInfo& subtree = lsMemPtr->bvhShapeData.gSubtreeHeaders[j]; unsigned int overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); if (overlap) @@ -591,23 +389,15 @@ void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT btAssert(subtree.m_subtreeSize); //dma the actual nodes of this subtree - - dmaSize = subtree.m_subtreeSize* sizeof(btQuantizedBvhNode); - dmaPpuAddress2 = reinterpret_cast(&nodeArray[subtree.m_rootNodeIndex]); - cellDmaGet(&lsMemPtr->gSubtreeNodes[0], dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); + dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2); cellDmaWaitTagStatusAll(DMA_MASK(2)); - - - + /* Walk this subtree */ spuWalkStacklessQuantizedTree(&nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax, - &lsMemPtr->gSubtreeNodes[0], + &lsMemPtr->bvhShapeData.gSubtreeNodes[0], 0, subtree.m_subtreeSize); - } - - // spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize); } @@ -619,73 +409,10 @@ void ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT } //pre-fetch first tree, then loop and double buffer - - - } } -///getShapeTypeSize could easily be optimized, but it is not likely a bottleneck -SIMD_FORCE_INLINE int getShapeTypeSize(int shapeType) -{ - - - switch (shapeType) - { - case CYLINDER_SHAPE_PROXYTYPE: - { - int shapeSize = sizeof(btCylinderShape); - btAssert(shapeSize < MAX_SHAPE_SIZE); - return shapeSize; - } - case BOX_SHAPE_PROXYTYPE: - { - int shapeSize = sizeof(btBoxShape); - btAssert(shapeSize < MAX_SHAPE_SIZE); - return shapeSize; - } - case SPHERE_SHAPE_PROXYTYPE: - { - int shapeSize = sizeof(btSphereShape); - btAssert(shapeSize < MAX_SHAPE_SIZE); - return shapeSize; - } - case TRIANGLE_MESH_SHAPE_PROXYTYPE: - { - int shapeSize = sizeof(btBvhTriangleMeshShape); - btAssert(shapeSize < MAX_SHAPE_SIZE); - return shapeSize; - } - case CAPSULE_SHAPE_PROXYTYPE: - { - int shapeSize = sizeof(btCapsuleShape); - btAssert(shapeSize < MAX_SHAPE_SIZE); - return shapeSize; - } - - case CONVEX_HULL_SHAPE_PROXYTYPE: - { - int shapeSize = sizeof(btConvexHullShape); - btAssert(shapeSize < MAX_SHAPE_SIZE); - return shapeSize; - } - - case COMPOUND_SHAPE_PROXYTYPE: - { - int shapeSize = sizeof(btCompoundShape); - btAssert(shapeSize < MAX_SHAPE_SIZE); - return shapeSize; - } - - default: - btAssert(0); - //unsupported shapetype, please add here - return 0; - } -} - - //////////////////////// @@ -693,8 +420,6 @@ SIMD_FORCE_INLINE int getShapeTypeSize(int shapeType) /////////////////// void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts) { - - register int dmaSize; register ppu_address_t dmaPpuAddress2; @@ -705,12 +430,8 @@ void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTa //CollisionShape* shape1 = (CollisionShape*)wuInput->m_collisionShapes[1]; btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr; - - bool genericGjk = true; - - if (genericGjk) { //try generic GJK @@ -718,8 +439,6 @@ void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTa SpuVoronoiSimplexSolver vsSolver; SpuMinkowskiPenetrationDepthSolver penetrationSolver; - - ///DMA in the vertices for convex shapes ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]); ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]); @@ -735,12 +454,8 @@ void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTa //cellDmaWaitTagStatusAll(DMA_MASK(1)); } - - if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) { - - // spu_printf("SPU: DMA btConvexHullShape\n"); dmaSize = sizeof(btConvexHullShape); dmaPpuAddress2 = wuInput->m_collisionShapes[1]; @@ -748,68 +463,31 @@ void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTa //cellDmaWaitTagStatusAll(DMA_MASK(1)); } - - if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) { - cellDmaWaitTagStatusAll(DMA_MASK(1)); - btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape0; - - lsMemPtr->convexVertexData.gNumConvexPoints0 = localPtr->getNumPoints(); - if (lsMemPtr->convexVertexData.gNumConvexPoints0>MAX_NUM_SPU_CONVEX_POINTS) - { - btAssert(0); - spu_printf("SPU: Error: MAX_NUM_SPU_CONVEX_POINTS(%d) exceeded: %d\n",MAX_NUM_SPU_CONVEX_POINTS,lsMemPtr->convexVertexData.gNumConvexPoints0); - return; - } - - dmaSize = lsMemPtr->convexVertexData.gNumConvexPoints0*sizeof(btPoint3); - dmaPpuAddress2 = (ppu_address_t) localPtr->getPoints(); - cellDmaGet(&lsMemPtr->convexVertexData.g_convexPointBuffer0, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); - - lsMemPtr->convexVertexData.gSpuConvexShapePtr0 = wuInput->m_spuCollisionShapes[0]; - - + dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0); + lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0]; } if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) { - cellDmaWaitTagStatusAll(DMA_MASK(1)); - btConvexHullShape* localPtr = (btConvexHullShape*)&convexHullShape1; - - lsMemPtr->convexVertexData.gNumConvexPoints1 = localPtr->getNumPoints(); - if (lsMemPtr->convexVertexData.gNumConvexPoints1>MAX_NUM_SPU_CONVEX_POINTS) - { - btAssert(0); - spu_printf("SPU: Error: MAX_NUM_SPU_CONVEX_POINTS(%d) exceeded: %d\n",MAX_NUM_SPU_CONVEX_POINTS,lsMemPtr->convexVertexData.gNumConvexPoints1); - return; - } - - - dmaSize = lsMemPtr->convexVertexData.gNumConvexPoints1*sizeof(btPoint3); - dmaPpuAddress2 = (ppu_address_t) localPtr->getPoints(); - cellDmaGet(&lsMemPtr->convexVertexData.g_convexPointBuffer1, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); - - lsMemPtr->convexVertexData.gSpuConvexShapePtr1 = wuInput->m_spuCollisionShapes[1]; - - + dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1); + lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1]; } if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) { cellDmaWaitTagStatusAll(DMA_MASK(2)); - - lsMemPtr->convexVertexData.gConvexPoints0 = &lsMemPtr->convexVertexData.g_convexPointBuffer0[0]; + lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0]; } if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) ) { - cellDmaWaitTagStatusAll(DMA_MASK(2)); - - lsMemPtr->convexVertexData.gConvexPoints1 = &lsMemPtr->convexVertexData.g_convexPointBuffer1[0]; + cellDmaWaitTagStatusAll(DMA_MASK(2)); + lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0]; } @@ -821,7 +499,8 @@ void ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTa float marginB = wuInput->m_collisionMargin1; SpuClosestPointInput cpInput; - cpInput.m_convexVertexData = &lsMemPtr->convexVertexData; + cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0]; + cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1]; cpInput.m_transformA = wuInput->m_worldTransform0; cpInput.m_transformB = wuInput->m_worldTransform1; float sumMargin = (marginA+marginB+lsMemPtr->gPersistentManifold.getContactBreakingThreshold()); @@ -858,27 +537,18 @@ SIMD_FORCE_INLINE void dmaAndSetupCollisionObjects(SpuCollisionPairInput& collis register int dmaSize; register ppu_address_t dmaPpuAddress2; - - dmaSize = sizeof(btCollisionObject); - dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.gProxyPtr0->m_clientObject; - cellDmaGet(&lsMem.gColObj0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); - - - dmaSize = sizeof(btCollisionObject); - dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.gProxyPtr1->m_clientObject; - cellDmaGet(&lsMem.gColObj1, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); - + dmaSize = sizeof(btCollisionObject); + dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr1->m_clientObject :*/ (ppu_address_t)lsMem.gProxyPtr0->m_clientObject; + cellDmaGet(&lsMem.gColObj0, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); + dmaSize = sizeof(btCollisionObject); + dmaPpuAddress2 = /*collisionPairInput.m_isSwapped ? (ppu_address_t)lsMem.gProxyPtr0->m_clientObject :*/ (ppu_address_t)lsMem.gProxyPtr1->m_clientObject; + cellDmaGet(&lsMem.gColObj1, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); collisionPairInput.m_worldTransform0 = lsMem.getColObj0()->getWorldTransform(); collisionPairInput.m_worldTransform1 = lsMem.getColObj1()->getWorldTransform(); - - - -#ifdef DEBUG_SPU_COLLISION_DETECTION -#endif //DEBUG_SPU_COLLISION_DETECTION - } @@ -894,26 +564,11 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas if (btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType0) && btBroadphaseProxy::isConvex(collisionPairInput.m_shapeType1)) { - - //dmaAndSetupCollisionObjects(collisionPairInput, lsMem); - if (dmaShapes) { - - dmaSize = getShapeTypeSize(collisionPairInput.m_shapeType0); - //uint64_t dmaPpuAddress2 = (uint64_t)lsMem.gColObj0.getCollisionShape(); - dmaPpuAddress2 = collisionShape0Ptr; - cellDmaGet(collisionShape0Loc, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(1)); - - - dmaSize = getShapeTypeSize(collisionPairInput.m_shapeType1); - dmaPpuAddress2 = collisionShape1Ptr; - cellDmaGet(collisionShape1Loc, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(2)); - - cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); - + dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0); + dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); } btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc; @@ -935,82 +590,41 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas { //snPause(); + dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0); + dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + // Both are compounds, do N^2 CD for now // TODO: add some AABB-based pruning - - dmaSize = getShapeTypeSize(collisionPairInput.m_shapeType0); - dmaPpuAddress2 = collisionShape0Ptr; - cellDmaGet(collisionShape0Loc, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(1)); - - - dmaSize = getShapeTypeSize(collisionPairInput.m_shapeType1); - dmaPpuAddress2 = collisionShape1Ptr; - cellDmaGet(collisionShape1Loc, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(2)); - - cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); - - + btCompoundShape* spuCompoundShape0 = (btCompoundShape*)collisionShape0Loc; btCompoundShape* spuCompoundShape1 = (btCompoundShape*)collisionShape1Loc; + dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape0, 1); + dmaCompoundShapeInfo (&lsMem.compoundShapeData[1], spuCompoundShape1, 2); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); + + + dmaCompoundSubShapes (&lsMem.compoundShapeData[0], spuCompoundShape0, 1); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + dmaCompoundSubShapes (&lsMem.compoundShapeData[1], spuCompoundShape1, 1); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + int childShapeCount0 = spuCompoundShape0->getNumChildShapes(); int childShapeCount1 = spuCompoundShape1->getNumChildShapes(); - // dma the first list of child shapes - - dmaSize = childShapeCount0 * sizeof(btCompoundShapeChild); - dmaPpuAddress2 = (ppu_address_t)spuCompoundShape0->getChildList(); - cellDmaGet(lsMem.gSubshapes, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(1)); - - - // dma the second list of child shapes - - dmaSize = childShapeCount1 * sizeof(btCompoundShapeChild); - dmaPpuAddress2 = (ppu_address_t)spuCompoundShape1->getChildList(); - cellDmaGet(&lsMem.gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES], dmaPpuAddress2, dmaSize, DMA_TAG(2), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(2)); - cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); - - - int i; - - // DMA all the subshapes - for ( i = 0; i < childShapeCount0; ++i) - { - btCompoundShapeChild& childShape = lsMem.gSubshapes[i]; - - dmaSize = getShapeTypeSize(childShape.m_childShapeType); - dmaPpuAddress2 = (ppu_address_t)childShape.m_childShape; - cellDmaGet(lsMem.gSubshapeShape[i], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(1)); - } - cellDmaWaitTagStatusAll(DMA_MASK(1)); - - for ( i = 0; i < childShapeCount1; ++i) - { - btCompoundShapeChild& childShape = lsMem.gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES+i]; - - dmaSize = getShapeTypeSize(childShape.m_childShapeType); - dmaPpuAddress2 = (ppu_address_t)childShape.m_childShape; - - cellDmaGet(lsMem.gSubshapeShape[MAX_SPU_COMPOUND_SUBSHAPES+i], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(1)); - } - cellDmaWaitTagStatusAll(DMA_MASK(1)); - // Start the N^2 - for ( i = 0; i < childShapeCount0; ++i) + for (int i = 0; i < childShapeCount0; ++i) { - btCompoundShapeChild& childShape0 = lsMem.gSubshapes[i]; + btCompoundShapeChild& childShape0 = lsMem.compoundShapeData[0].gSubshapes[i]; for (int j = 0; j < childShapeCount1; ++j) { - btCompoundShapeChild& childShape1 = lsMem.gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES+j]; + btCompoundShapeChild& childShape1 = lsMem.compoundShapeData[1].gSubshapes[j]; + /* Create a new collision pair input struct using the two child shapes */ SpuCollisionPairInput cinput (collisionPairInput); + cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape0.m_transform; cinput.m_shapeType0 = childShape0.m_childShapeType; cinput.m_collisionMargin0 = childShape0.m_childMargin; @@ -1018,10 +632,10 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape1.m_transform; cinput.m_shapeType1 = childShape1.m_childShapeType; cinput.m_collisionMargin1 = childShape1.m_childMargin; - + /* Recursively call handleCollisionPair () with new collision pair input */ handleCollisionPair(cinput, lsMem, spuContacts, - (ppu_address_t)childShape0.m_childShape, lsMem.gSubshapeShape[i], - (ppu_address_t)childShape1.m_childShape, lsMem.gSubshapeShape[MAX_SPU_COMPOUND_SUBSHAPES+i], false); + (ppu_address_t)childShape0.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], + (ppu_address_t)childShape1.m_childShape, lsMem.compoundShapeData[1].gSubshapeShape[j], false); // bug fix: changed index to j. } } } @@ -1029,55 +643,32 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas { //snPause(); - dmaSize = getShapeTypeSize(collisionPairInput.m_shapeType0); - dmaPpuAddress2 = collisionShape0Ptr; - cellDmaGet(collisionShape0Loc, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(1)); - - - dmaSize = getShapeTypeSize(collisionPairInput.m_shapeType1); - dmaPpuAddress2 = collisionShape1Ptr; - cellDmaGet(collisionShape1Loc, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); -// cellDmaWaitTagStatusAll(DMA_MASK(2)); - cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); - + dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0); + dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); // object 0 compound, object 1 non-compound btCompoundShape* spuCompoundShape = (btCompoundShape*)collisionShape0Loc; + dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape, 1); + cellDmaWaitTagStatusAll(DMA_MASK(1)); int childShapeCount = spuCompoundShape->getNumChildShapes(); - // dma the list of child shapes - - dmaSize = childShapeCount * sizeof(btCompoundShapeChild); - - dmaPpuAddress2 = (ppu_address_t)spuCompoundShape->getChildList(); - - cellDmaGet(lsMem.gSubshapes, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); - cellDmaWaitTagStatusAll(DMA_MASK(1)); - - for (int i = 0; i < childShapeCount; ++i) { - btCompoundShapeChild& childShape = lsMem.gSubshapes[i]; + btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i]; // Dma the child shape + dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType); + cellDmaWaitTagStatusAll(DMA_MASK(1)); - dmaSize = getShapeTypeSize(childShape.m_childShapeType); - dmaPpuAddress2 = (ppu_address_t)childShape.m_childShape; - - cellDmaGet(lsMem.gSubshapeShape[i], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); - cellDmaWaitTagStatusAll(DMA_MASK(1)); - - SpuCollisionPairInput cinput (collisionPairInput); cinput.m_worldTransform0 = collisionPairInput.m_worldTransform0 * childShape.m_transform; cinput.m_shapeType0 = childShape.m_childShapeType; cinput.m_collisionMargin0 = childShape.m_childMargin; - handleCollisionPair(cinput, lsMem, spuContacts, - (ppu_address_t)childShape.m_childShape, lsMem.gSubshapeShape[i], + (ppu_address_t)childShape.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], collisionShape1Ptr, collisionShape1Loc, false); } } @@ -1085,57 +676,30 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas { //snPause(); - dmaSize = getShapeTypeSize(collisionPairInput.m_shapeType0); - dmaPpuAddress2 = collisionShape0Ptr; - cellDmaGet(collisionShape0Loc, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(1)); - - - dmaSize = getShapeTypeSize(collisionPairInput.m_shapeType1); - dmaPpuAddress2 = collisionShape1Ptr; - - cellDmaGet(collisionShape1Loc, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(2)); - cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); - - + dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0); + dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); // object 0 non-compound, object 1 compound btCompoundShape* spuCompoundShape = (btCompoundShape*)collisionShape1Loc; - + dmaCompoundShapeInfo (&lsMem.compoundShapeData[0], spuCompoundShape, 1); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + int childShapeCount = spuCompoundShape->getNumChildShapes(); - // dma the list of child shapes - - dmaSize = childShapeCount * sizeof(btCompoundShapeChild); - - dmaPpuAddress2 = (ppu_address_t)spuCompoundShape->getChildList(); - - cellDmaGet(lsMem.gSubshapes, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); - cellDmaWaitTagStatusAll(DMA_MASK(1)); - - for (int i = 0; i < childShapeCount; ++i) { - btCompoundShapeChild& childShape = lsMem.gSubshapes[i]; - + btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i]; // Dma the child shape - - dmaSize = getShapeTypeSize(childShape.m_childShapeType); - dmaPpuAddress2 = (ppu_address_t)childShape.m_childShape; - - cellDmaGet(lsMem.gSubshapeShape[i], dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); - cellDmaWaitTagStatusAll(DMA_MASK(1)); - + dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType); + cellDmaWaitTagStatusAll(DMA_MASK(1)); SpuCollisionPairInput cinput (collisionPairInput); cinput.m_worldTransform1 = collisionPairInput.m_worldTransform1 * childShape.m_transform; cinput.m_shapeType1 = childShape.m_childShapeType; cinput.m_collisionMargin1 = childShape.m_childMargin; - handleCollisionPair(cinput, lsMem, spuContacts, collisionShape0Ptr, collisionShape0Loc, - (ppu_address_t)childShape.m_childShape, lsMem.gSubshapeShape[i], false); - + (ppu_address_t)childShape.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], false); } } @@ -1166,29 +730,11 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas } if (handleConvexConcave) { - if (dmaShapes) { - ///dma and initialize the convex object - - dmaSize = getShapeTypeSize(collisionPairInput.m_shapeType0); - //uint64_t dmaPpuAddress2 = (uint64_t)lsMem.gColObj0.getCollisionShape(); - - dmaPpuAddress2 = collisionShape0Ptr; - - cellDmaGet(collisionShape0Loc, dmaPpuAddress2 , dmaSize, DMA_TAG(1), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(1)); - - ///dma and initialize the concave object - - dmaSize = getShapeTypeSize(collisionPairInput.m_shapeType1); - - dmaPpuAddress2 = collisionShape1Ptr; - - cellDmaGet(collisionShape1Loc, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); - //cellDmaWaitTagStatusAll(DMA_MASK(2)); - cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); - + dmaCollisionShape (collisionShape0Loc, collisionShape0Ptr, 1, collisionPairInput.m_shapeType0); + dmaCollisionShape (collisionShape1Loc, collisionShape1Ptr, 2, collisionPairInput.m_shapeType1); + cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2)); } btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc; @@ -1210,7 +756,6 @@ void handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas } - void processCollisionTask(void* userPtr, void* lsMemPtr) { @@ -1225,7 +770,7 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) //////////////////// - uint64_t dmaInPtr = taskDesc.inPtr; + ppu_address_t dmaInPtr = taskDesc.inPtr; unsigned int numPages = taskDesc.numPages; unsigned int numOnLastPage = taskDesc.numOnLastPage; @@ -1336,7 +881,7 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) lsMem.gProxyPtr0 = (btBroadphaseProxy*) lsMem.bufferProxy0; stallingUnalignedDmaSmallGet(lsMem.gProxyPtr0, dmaPpuAddress2 , dmaSize); - collisionPairInput.m_persistentManifoldPtr = (uint64_t) lsMem.gSpuContactManifoldAlgo.getContactManifoldPtr(); + collisionPairInput.m_persistentManifoldPtr = (ppu_address_t) lsMem.gSpuContactManifoldAlgo.getContactManifoldPtr(); collisionPairInput.m_isSwapped = false; @@ -1387,10 +932,10 @@ void processCollisionTask(void* userPtr, void* lsMemPtr) dmaAndSetupCollisionObjects(collisionPairInput, lsMem); handleCollisionPair(collisionPairInput, lsMem, spuContacts, - (ppu_address_t)lsMem.getColObj0()->getCollisionShape(), lsMem.gCollisionShape0, - (ppu_address_t)lsMem.getColObj1()->getCollisionShape(), lsMem.gCollisionShape1); + (ppu_address_t)lsMem.getColObj0()->getCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape, + (ppu_address_t)lsMem.getColObj1()->getCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape); - } + } } } diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h index 05262d85e..34e60ef94 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h @@ -23,11 +23,11 @@ subject to the following restrictions: ///Task Description for SPU collision detection struct SpuGatherAndProcessPairsTaskDesc { - uint64_t inPtr;//m_pairArrayPtr; + ppu_address_t inPtr;//m_pairArrayPtr; //mutex variable uint32_t m_someMutexVariableInMainMemory; - uint64_t m_dispatcher; + ppu_address_t m_dispatcher; uint32_t numOnLastPage; diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.cpp b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.cpp index b598e4f23..4d6d6d92e 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.cpp +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGjkPairDetector.cpp @@ -15,7 +15,7 @@ subject to the following restrictions: #include "SpuGjkPairDetector.h" #include "SpuConvexPenetrationDepthSolver.h" -#include "SpuLocalSupport.h" +#include "SpuCollisionShapes.h" @@ -106,8 +106,8 @@ void SpuGjkPairDetector::getClosestPoints(const SpuClosestPointInput& input,SpuC // btVector3 pInA = m_minkowskiA->localGetSupportingVertexWithoutMargin(seperatingAxisInA); // btVector3 qInB = m_minkowskiB->localGetSupportingVertexWithoutMargin(seperatingAxisInB); - btVector3 pInA = localGetSupportingVertexWithoutMargin(m_shapeTypeA, m_minkowskiA, seperatingAxisInA,input.m_convexVertexData);//, &featureIndexA); - btVector3 qInB = localGetSupportingVertexWithoutMargin(m_shapeTypeB, m_minkowskiB, seperatingAxisInB,input.m_convexVertexData);//, &featureIndexB); + btVector3 pInA = localGetSupportingVertexWithoutMargin(m_shapeTypeA, m_minkowskiA, seperatingAxisInA,input.m_convexVertexData[0]);//, &featureIndexA); + btVector3 qInB = localGetSupportingVertexWithoutMargin(m_shapeTypeB, m_minkowskiB, seperatingAxisInB,input.m_convexVertexData[1]);//, &featureIndexB); btPoint3 pWorld = localTransA(pInA); @@ -250,7 +250,7 @@ void SpuGjkPairDetector::getClosestPoints(const SpuClosestPointInput& input,SpuC marginA, marginB, localTransA,localTransB, m_cachedSeparatingAxis, tmpPointOnA, tmpPointOnB, - 0,input.m_stackAlloc,input.m_convexVertexData + 0,input.m_stackAlloc,input.m_convexVertexData[0], input.m_convexVertexData[1] ); if (isValid2) diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h index 7ad95dd7f..8b89de03f 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuLocalSupport.h @@ -16,233 +16,4 @@ subject to the following restrictions: -#include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" -#include "BulletCollision/CollisionShapes/btConvexInternalShape.h" -#include "BulletCollision/CollisionShapes/btCylinderShape.h" - -#define MAX_NUM_SPU_CONVEX_POINTS 128 - -struct SpuConvexPolyhedronVertexData -{ - void* gSpuConvexShapePtr0; - void* gSpuConvexShapePtr1; - btPoint3* gConvexPoints0; - btPoint3* gConvexPoints1; - int gNumConvexPoints0; - int gNumConvexPoints1; - ATTRIBUTE_ALIGNED16(btPoint3 g_convexPointBuffer0[MAX_NUM_SPU_CONVEX_POINTS]); - ATTRIBUTE_ALIGNED16(btPoint3 g_convexPointBuffer1[MAX_NUM_SPU_CONVEX_POINTS]); - -}; - - -inline btPoint3 localGetSupportingVertexWithoutMargin(int shapeType, void* shape, btVector3& localDir,struct SpuConvexPolyhedronVertexData* convexVertexData)//, int *featureIndex) -{ - switch (shapeType) - { - case SPHERE_SHAPE_PROXYTYPE: - { - return btPoint3(0,0,0); - } - case BOX_SHAPE_PROXYTYPE: - { -// spu_printf("SPU: getSupport BOX_SHAPE_PROXYTYPE\n"); - btConvexInternalShape* convexShape = (btConvexInternalShape*)shape; - const btVector3& halfExtents = convexShape->getImplicitShapeDimensions(); - - return btPoint3( - localDir.getX() < 0.0f ? -halfExtents.x() : halfExtents.x(), - localDir.getY() < 0.0f ? -halfExtents.y() : halfExtents.y(), - localDir.getZ() < 0.0f ? -halfExtents.z() : halfExtents.z()); - } - - case TRIANGLE_SHAPE_PROXYTYPE: - { - - btVector3 dir(localDir.getX(),localDir.getY(),localDir.getZ()); - btVector3* vertices = (btVector3*)shape; - btVector3 dots(dir.dot(vertices[0]), dir.dot(vertices[1]), dir.dot(vertices[2])); - btVector3 sup = vertices[dots.maxAxis()]; - return btPoint3(sup.getX(),sup.getY(),sup.getZ()); - break; - } - - case CYLINDER_SHAPE_PROXYTYPE: - { - btCylinderShape* cylShape = (btCylinderShape*)shape; - - //mapping of halfextents/dimension onto radius/height depends on how cylinder local orientation is (upAxis) - - btVector3 halfExtents = cylShape->getImplicitShapeDimensions(); - btVector3 v(localDir.getX(),localDir.getY(),localDir.getZ()); - - int cylinderUpAxis = cylShape->getUpAxis(); - int XX(1),YY(0),ZZ(2); - - switch (cylinderUpAxis) - { - case 0: - { - XX = 1; - YY = 0; - ZZ = 2; - break; - } - case 1: - { - XX = 0; - YY = 1; - ZZ = 2; - break; - } - case 2: - { - XX = 0; - YY = 2; - ZZ = 1; - break; - } - default: - btAssert(0); - //printf("SPU:localGetSupportingVertexWithoutMargin unknown Cylinder up-axis\n"); - }; - - btScalar radius = halfExtents[XX]; - btScalar halfHeight = halfExtents[cylinderUpAxis]; - - btVector3 tmp; - btScalar d ; - - btScalar s = btSqrt(v[XX] * v[XX] + v[ZZ] * v[ZZ]); - if (s != btScalar(0.0)) - { - d = radius / s; - tmp[XX] = v[XX] * d; - tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight; - tmp[ZZ] = v[ZZ] * d; - return btPoint3(tmp.getX(),tmp.getY(),tmp.getZ()); - } - else - { - tmp[XX] = radius; - tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight; - tmp[ZZ] = btScalar(0.0); - return btPoint3(tmp.getX(),tmp.getY(),tmp.getZ()); - } - } - - case CAPSULE_SHAPE_PROXYTYPE: - { - //spu_printf("SPU: todo: getSupport CAPSULE_SHAPE_PROXYTYPE\n"); - btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ()); - - btConvexInternalShape* cnvxShape = (btConvexInternalShape*)shape; - btVector3 halfExtents = cnvxShape->getImplicitShapeDimensions(); - btScalar halfHeight = halfExtents.getY(); - btScalar radius = halfExtents.getX(); - btVector3 supVec(0,0,0); - - btScalar maxDot(btScalar(-1e30)); - - btVector3 vec = vec0; - btScalar lenSqr = vec.length2(); - if (lenSqr < btScalar(0.0001)) - { - vec.setValue(1,0,0); - } else - { - btScalar rlen = btScalar(1.) / btSqrt(lenSqr ); - vec *= rlen; - } - btVector3 vtx; - btScalar newDot; - { - btVector3 pos(0,halfHeight,0); - vtx = pos +vec*(radius); - newDot = vec.dot(vtx); - if (newDot > maxDot) - { - maxDot = newDot; - supVec = vtx; - } - } - { - btVector3 pos(0,-halfHeight,0); - vtx = pos +vec*(radius); - newDot = vec.dot(vtx); - if (newDot > maxDot) - { - maxDot = newDot; - supVec = vtx; - } - } - return btPoint3(supVec.getX(),supVec.getY(),supVec.getZ()); - break; - }; - - case CONVEX_HULL_SHAPE_PROXYTYPE: - { - //spu_printf("SPU: todo: getSupport CONVEX_HULL_SHAPE_PROXYTYPE\n"); - - - - btPoint3* points = 0; - int numPoints = 0; - if (shape==convexVertexData->gSpuConvexShapePtr0) - { - points = convexVertexData->gConvexPoints0; - numPoints = convexVertexData->gNumConvexPoints0; - } - if (shape == convexVertexData->gSpuConvexShapePtr1) - { - points = convexVertexData->gConvexPoints1; - numPoints = convexVertexData->gNumConvexPoints1; - } - - // spu_printf("numPoints = %d\n",numPoints); - - btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.)); - btScalar newDot,maxDot = btScalar(-1e30); - - btVector3 vec0(localDir.getX(),localDir.getY(),localDir.getZ()); - btVector3 vec = vec0; - btScalar lenSqr = vec.length2(); - if (lenSqr < btScalar(0.0001)) - { - vec.setValue(1,0,0); - } else - { - btScalar rlen = btScalar(1.) / btSqrt(lenSqr ); - vec *= rlen; - } - - - for (int i=0;i maxDot) - { - maxDot = newDot; - supVec = vtx; - } - } - return btPoint3(supVec.getX(),supVec.getY(),supVec.getZ()); - - break; - }; - - default: - - //spu_printf("SPU:(type %i) missing support function\n",shapeType); - - -#if __ASSERT - spu_printf("localGetSupportingVertexWithoutMargin() - Unsupported bound type: %d.\n", shapeType); -#endif // __ASSERT - return btPoint3(0.f, 0.f, 0.f); - } -} - diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp index 553269f21..cc39afb71 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp @@ -20,7 +20,7 @@ subject to the following restrictions: #include "SpuPreferredPenetrationDirections.h" -#include "SpuLocalSupport.h" +#include "SpuCollisionShapes.h" #define NUM_UNITSPHERE_POINTS 42 static btVector3 sPenetrationDirections[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] = @@ -74,7 +74,8 @@ bool SpuMinkowskiPenetrationDepthSolver::calcPenDepth( SpuVoronoiSimplexSolver& btTransform& transA,const btTransform& transB, btVector3& v, btPoint3& pa, btPoint3& pb, class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc, - struct SpuConvexPolyhedronVertexData* convexVertexData + struct SpuConvexPolyhedronVertexData* convexVertexDataA, + struct SpuConvexPolyhedronVertexData* convexVertexDataB ) const { @@ -241,8 +242,8 @@ bool SpuMinkowskiPenetrationDepthSolver::calcPenDepth( SpuVoronoiSimplexSolver& seperatingAxisInA = (-norm)* transA.getBasis(); seperatingAxisInB = norm* transB.getBasis(); - pInA = localGetSupportingVertexWithoutMargin(shapeTypeA, convexA, seperatingAxisInA,convexVertexData);//, NULL); - qInB = localGetSupportingVertexWithoutMargin(shapeTypeB, convexB, seperatingAxisInB,convexVertexData);//, NULL); + pInA = localGetSupportingVertexWithoutMargin(shapeTypeA, convexA, seperatingAxisInA,convexVertexDataA);//, NULL); + qInB = localGetSupportingVertexWithoutMargin(shapeTypeB, convexB, seperatingAxisInB,convexVertexDataB);//, NULL); // pInA = convexA->localGetSupportingVertexWithoutMargin(seperatingAxisInA); // qInB = convexB->localGetSupportingVertexWithoutMargin(seperatingAxisInB); @@ -299,7 +300,8 @@ bool SpuMinkowskiPenetrationDepthSolver::calcPenDepth( SpuVoronoiSimplexSolver& SpuClosestPointInput input; - input.m_convexVertexData = convexVertexData; + input.m_convexVertexData[0] = convexVertexDataA; + input.m_convexVertexData[1] = convexVertexDataB; btVector3 newOrg = transA.getOrigin() + offset; btTransform displacedTrans = transA; diff --git a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h index c862713b1..6193741ae 100644 --- a/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h +++ b/Extras/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h @@ -35,7 +35,8 @@ public: btTransform& transA,const btTransform& transB, btVector3& v, btPoint3& pa, btPoint3& pb, class btIDebugDraw* debugDraw,btStackAlloc* stackAlloc, - struct SpuConvexPolyhedronVertexData* convexVertexData + struct SpuConvexPolyhedronVertexData* convexVertexDataA, + struct SpuConvexPolyhedronVertexData* convexVertexDataB ) const; diff --git a/Extras/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.cpp b/Extras/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.cpp index f7c1d7dad..c640ca9c8 100644 --- a/Extras/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.cpp +++ b/Extras/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.cpp @@ -1,10 +1,21 @@ -#include + #include "SpuRaycastTask.h" #include "SpuCollisionObjectWrapper.h" #include "SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h" +#include "SpuSubSimplexConvexCast.h" +#include "LinearMath/btAabbUtil2.h" +/* Future optimization strategies: +1. BBOX prune before loading shape data +2. When doing bvh tree traversal do it once for entire batch of rays. +*/ + +/* Future work: +1. support first hit, closest hit, etc rather than just closest hit. +2. support compound objects +*/ struct RaycastTask_LocalStoreMemory { @@ -14,7 +25,7 @@ struct RaycastTask_LocalStoreMemory return (btCollisionObject*) gColObj; } - SpuCollisionObjectWrapper gCollisionObjectWrapper; + ATTRIBUTE_ALIGNED16(SpuCollisionObjectWrapper gCollisionObjectWrapper); SpuCollisionObjectWrapper* getCollisionObjectWrapper () { return &gCollisionObjectWrapper; @@ -41,7 +52,7 @@ void* createRaycastLocalStoreMemory() } #endif -void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData& gatheredObjectData, RaycastTask_LocalStoreMemory& lsMem, ppu_address_t objectWrapper) +void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData* gatheredObjectData, RaycastTask_LocalStoreMemory* lsMemPtr, ppu_address_t objectWrapper) { register int dmaSize; register ppu_address_t dmaPpuAddress2; @@ -49,27 +60,32 @@ void GatherCollisionObjectAndShapeData (RaycastGatheredObjectData& gatheredObjec /* DMA Collision object wrapper into local store */ dmaSize = sizeof(SpuCollisionObjectWrapper); dmaPpuAddress2 = objectWrapper; - cellDmaGet(&lsMem.gCollisionObjectWrapper, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaGet(&lsMemPtr->gCollisionObjectWrapper, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(1)); /* DMA Collision object into local store */ dmaSize = sizeof(btCollisionObject); - dmaPpuAddress2 = lsMem.getCollisionObjectWrapper()->getCollisionObjectPtr(); - cellDmaGet(&lsMem.gColObj, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); + dmaPpuAddress2 = lsMemPtr->getCollisionObjectWrapper()->getCollisionObjectPtr(); + cellDmaGet(&lsMemPtr->gColObj, dmaPpuAddress2 , dmaSize, DMA_TAG(2), 0, 0); cellDmaWaitTagStatusAll(DMA_MASK(2)); /* Gather information about collision object and shape */ - gatheredObjectData.m_worldTransform = lsMem.getColObj()->getWorldTransform(); - gatheredObjectData.m_collisionMargin = lsMem.getCollisionObjectWrapper()->getCollisionMargin (); - gatheredObjectData.m_shapeType = lsMem.getCollisionObjectWrapper()->getShapeType (); - gatheredObjectData.m_collisionShape = (ppu_address_t)lsMem.getColObj()->getCollisionShape(); - gatheredObjectData.m_spuCollisionShape = (void*)&lsMem.gCollisionShape.collisionShape[0]; + gatheredObjectData->m_worldTransform = lsMemPtr->getColObj()->getWorldTransform(); + gatheredObjectData->m_collisionMargin = lsMemPtr->getCollisionObjectWrapper()->getCollisionMargin (); + gatheredObjectData->m_shapeType = lsMemPtr->getCollisionObjectWrapper()->getShapeType (); + gatheredObjectData->m_collisionShape = (ppu_address_t)lsMemPtr->getColObj()->getCollisionShape(); + gatheredObjectData->m_spuCollisionShape = (void*)&lsMemPtr->gCollisionShape.collisionShape; /* DMA shape data */ - dmaCollisionShape (gatheredObjectData.m_spuCollisionShape, gatheredObjectData.m_collisionShape, 1, gatheredObjectData.m_shapeType); + dmaCollisionShape (gatheredObjectData->m_spuCollisionShape, gatheredObjectData->m_collisionShape, 1, gatheredObjectData->m_shapeType); cellDmaWaitTagStatusAll(DMA_MASK(1)); - btConvexInternalShape* spuConvexShape = (btConvexInternalShape*)gatheredObjectData.m_spuCollisionShape; - gatheredObjectData.m_primitiveDimensions = spuConvexShape->getImplicitShapeDimensions (); + if (btBroadphaseProxy::isConvex (gatheredObjectData->m_shapeType)) + { + btConvexInternalShape* spuConvexShape = (btConvexInternalShape*)gatheredObjectData->m_spuCollisionShape; + gatheredObjectData->m_primitiveDimensions = spuConvexShape->getImplicitShapeDimensions (); + } else { + gatheredObjectData->m_primitiveDimensions = btVector3(1.0, 1.0, 1.0); + } } void dmaLoadRayOutput (ppu_address_t rayOutputAddr, SpuRaycastTaskWorkUnitOut* rayOutput, uint32_t dmaTag) @@ -82,6 +98,366 @@ void dmaStoreRayOutput (ppu_address_t rayOutputAddr, const SpuRaycastTaskWorkUni cellDmaLargePut (rayOutput, rayOutputAddr, sizeof(*rayOutput), DMA_TAG(dmaTag), 0, 0); } +#if 0 +SIMD_FORCE_INLINE void small_cache_read(void* buffer, ppu_address_t ea, size_t size) +{ +#if USE_SOFTWARE_CACHE + // Check for alignment requirements. We need to make sure the entire request fits within one cache line, + // so the first and last bytes should fall on the same cache line + btAssert((ea & ~SPE_CACHELINE_MASK) == ((ea + size - 1) & ~SPE_CACHELINE_MASK)); + + void* ls = spe_cache_read(ea); + memcpy(buffer, ls, size); +#else + stallingUnalignedDmaSmallGet(buffer,ea,size); +#endif +} +#endif + +void small_cache_read_triple( void* ls0, ppu_address_t ea0, + void* ls1, ppu_address_t ea1, + void* ls2, ppu_address_t ea2, + size_t size) +{ + btAssert(size<16); + ATTRIBUTE_ALIGNED16(char tmpBuffer0[32]); + ATTRIBUTE_ALIGNED16(char tmpBuffer1[32]); + ATTRIBUTE_ALIGNED16(char tmpBuffer2[32]); + + uint32_t i; + + + ///make sure last 4 bits are the same, for cellDmaSmallGet + char* localStore0 = (char*)ls0; + uint32_t last4BitsOffset = ea0 & 0x0f; + char* tmpTarget0 = tmpBuffer0 + last4BitsOffset; + tmpTarget0 = (char*)cellDmaSmallGetReadOnly(tmpTarget0,ea0,size,DMA_TAG(1),0,0); + + + char* localStore1 = (char*)ls1; + last4BitsOffset = ea1 & 0x0f; + char* tmpTarget1 = tmpBuffer1 + last4BitsOffset; + tmpTarget1 = (char*)cellDmaSmallGetReadOnly(tmpTarget1,ea1,size,DMA_TAG(1),0,0); + + char* localStore2 = (char*)ls2; + last4BitsOffset = ea2 & 0x0f; + char* tmpTarget2 = tmpBuffer2 + last4BitsOffset; + tmpTarget2 = (char*)cellDmaSmallGetReadOnly(tmpTarget2,ea2,size,DMA_TAG(1),0,0); + + + cellDmaWaitTagStatusAll( DMA_MASK(1) ); + + //this is slowish, perhaps memcpy on SPU is smarter? + for (i=0; btLikely( ibvhShapeData.gIndexMesh.m_triangleIndexBase+triangleIndex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_triangleIndexStride); + + small_cache_read_triple(&m_lsMemPtr->spuIndices[0],(ppu_address_t)&indexBasePtr[0], + &m_lsMemPtr->spuIndices[1],(ppu_address_t)&indexBasePtr[1], + &m_lsMemPtr->spuIndices[2],(ppu_address_t)&indexBasePtr[2], + sizeof(int)); + //printf("%d %d %d\n", m_lsMemPtr->spuIndices[0], m_lsMemPtr->spuIndices[1], m_lsMemPtr->spuIndices[2]); + // spu_printf("SPU index0=%d ,",spuIndices[0]); + // spu_printf("SPU index1=%d ,",spuIndices[1]); + // spu_printf("SPU index2=%d ,",spuIndices[2]); + // spu_printf("SPU: indexBasePtr=%llx\n",indexBasePtr); + + const btVector3& meshScaling = m_lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getScaling(); + + for (int j=2;btLikely( j>=0 );j--) + { + int graphicsindex = m_lsMemPtr->spuIndices[j]; + + //spu_printf("SPU index=%d ,",graphicsindex); + btScalar* graphicsbasePtr = (btScalar*)(m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexBase+graphicsindex*m_lsMemPtr->bvhShapeData.gIndexMesh.m_vertexStride); + + // spu_printf("SPU graphicsbasePtr=%llx\n",graphicsbasePtr); + + + ///handle un-aligned vertices... + + //another DMA for each vertex + small_cache_read_triple(&spuUnscaledVertex[0],(ppu_address_t)&graphicsbasePtr[0], + &spuUnscaledVertex[1],(ppu_address_t)&graphicsbasePtr[1], + &spuUnscaledVertex[2],(ppu_address_t)&graphicsbasePtr[2], + sizeof(btScalar)); + + //printf("%f %f %f\n", spuUnscaledVertex[0],spuUnscaledVertex[1],spuUnscaledVertex[2]); + spuTriangleVertices[j] = btVector3( + spuUnscaledVertex[0]*meshScaling.getX(), + spuUnscaledVertex[1]*meshScaling.getY(), + spuUnscaledVertex[2]*meshScaling.getZ()); + + //spu_printf("SPU:triangle vertices:%f,%f,%f\n",spuTriangleVertices[j].x(),spuTriangleVertices[j].y(),spuTriangleVertices[j].z()); + } + + RaycastGatheredObjectData triangleGatheredObjectData (*m_gatheredObjectData); + triangleGatheredObjectData.m_shapeType = TRIANGLE_SHAPE_PROXYTYPE; + triangleGatheredObjectData.m_spuCollisionShape = &spuTriangleVertices[0]; + + //printf("%f %f %f\n", spuTriangleVertices[0][0],spuTriangleVertices[0][1],spuTriangleVertices[0][2]); + //printf("%f %f %f\n", spuTriangleVertices[1][0],spuTriangleVertices[1][1],spuTriangleVertices[1][2]); + //printf("%f %f %f\n", spuTriangleVertices[2][0],spuTriangleVertices[2][1],spuTriangleVertices[2][2]); + SpuRaycastTaskWorkUnitOut out; + out.hitFraction = 1.0; + + performRaycastAgainstConvex (&triangleGatheredObjectData, m_workUnit, &out, m_lsMemPtr); + /* XXX: For now only take the closest hit */ + if (out.hitFraction < m_workUnitOut->hitFraction) + { + m_workUnitOut->hitFraction = out.hitFraction; + m_workUnitOut->hitNormal = out.hitNormal; + } + } + +}; + +void spuWalkStacklessQuantizedTreeAgainstRay(RaycastTask_LocalStoreMemory* lsMemPtr, btNodeOverlapCallback* nodeCallback,const btVector3& raySource, const btVector3& rayTarget,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,const btQuantizedBvhNode* rootNode, int startNodeIndex,int endNodeIndex) +{ + + int curIndex = startNodeIndex; + int walkIterations = 0; + int subTreeSize = endNodeIndex - startNodeIndex; + + int escapeIndex; + + unsigned int boxBoxOverlap, rayBoxOverlap; + unsigned int isLeafNode; +#define RAYAABB2 +#ifdef RAYAABB2 + btScalar lambda_max = 1.0; + btVector3 rayFrom = raySource; + btVector3 rayDirection = (rayTarget-raySource); + rayDirection.normalize (); + lambda_max = rayDirection.dot(rayTarget-raySource); + rayDirection[0] = btScalar(1.0) / rayDirection[0]; + rayDirection[1] = btScalar(1.0) / rayDirection[1]; + rayDirection[2] = btScalar(1.0) / rayDirection[2]; + unsigned int sign[3] = { rayDirection[0] < 0.0, rayDirection[1] < 0.0, rayDirection[2] < 0.0}; +#endif + + while (curIndex < endNodeIndex) + { + //catch bugs in tree data + assert (walkIterations < subTreeSize); + + walkIterations++; + boxBoxOverlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); + isLeafNode = rootNode->isLeafNode(); + + rayBoxOverlap = 0; + btScalar param = 1.0; + btVector3 normal; + if (boxBoxOverlap) + { + btVector3 bounds[2]; + bounds[0] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMin); + bounds[1] = lsMemPtr->bvhShapeData.getOptimizedBvh()->unQuantize(rootNode->m_quantizedAabbMax); +#ifdef RAYAABB2 + rayBoxOverlap = btRayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0, lambda_max); +#else + rayBoxOverlap = btRayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal); +#endif + } + + if (isLeafNode && rayBoxOverlap) + { + //printf("overlap with node %d\n",rootNode->getTriangleIndex()); + nodeCallback->processNode(0,rootNode->getTriangleIndex()); + // spu_printf("SPU: overlap detected with triangleIndex:%d\n",rootNode->getTriangleIndex()); + } + + if (rayBoxOverlap || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->getEscapeIndex(); + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + +} + +void performRaycastAgainstConcave (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr) +{ + //order: first collision shape is convex, second concave. m_isSwapped is true, if the original order was opposite + register int dmaSize; + register ppu_address_t dmaPpuAddress2; + + btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)gatheredObjectData->m_spuCollisionShape; + + //need the mesh interface, for access to triangle vertices + dmaBvhShapeData (&(lsMemPtr->bvhShapeData), trimeshShape); + + btVector3 aabbMin; + btVector3 aabbMax; + + /* Calculate the AABB for the ray in the triangle mesh shape */ + btTransform rayInTriangleSpace; + rayInTriangleSpace = gatheredObjectData->m_worldTransform.inverse(); + + btVector3 rayFromInTriangleSpace = rayInTriangleSpace(workUnit.rayFrom); + btVector3 rayToInTriangleSpace = rayInTriangleSpace(workUnit.rayTo); + + aabbMin = rayFromInTriangleSpace; + aabbMin.setMin (rayToInTriangleSpace); + aabbMax = rayFromInTriangleSpace; + aabbMax.setMax (rayToInTriangleSpace); + + unsigned short int quantizedQueryAabbMin[3]; + unsigned short int quantizedQueryAabbMax[3]; + lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMin,aabbMin); + lsMemPtr->bvhShapeData.getOptimizedBvh()->quantizeWithClamp(quantizedQueryAabbMax,aabbMax); + + QuantizedNodeArray& nodeArray = lsMemPtr->bvhShapeData.getOptimizedBvh()->getQuantizedNodeArray(); + //spu_printf("SPU: numNodes = %d\n",nodeArray.size()); + + BvhSubtreeInfoArray& subTrees = lsMemPtr->bvhShapeData.getOptimizedBvh()->getSubtreeInfoArray(); + + spuRaycastNodeCallback nodeCallback (gatheredObjectData, workUnit, workUnitOut, lsMemPtr); + + IndexedMeshArray& indexArray = lsMemPtr->bvhShapeData.gTriangleMeshInterfacePtr->getIndexedMeshArray(); + + //spu_printf("SPU:indexArray.size() = %d\n",indexArray.size()); + // spu_printf("SPU: numSubTrees = %d\n",subTrees.size()); + //not likely to happen + if (subTrees.size() && indexArray.size() == 1) + { + ///DMA in the index info + dmaBvhIndexedMesh (&lsMemPtr->bvhShapeData.gIndexMesh, indexArray, 0 /* index into indexArray */, 1 /* dmaTag */); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + //display the headers + int numBatch = subTrees.size(); + for (int i=0;ibvhShapeData.gSubtreeHeaders[0], (ppu_address_t)(&subTrees[i]), nextBatch, 1); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + + + // spu_printf("nextBatch = %d\n",nextBatch); + + for (int j=0;jbvhShapeData.gSubtreeHeaders[j]; + + unsigned int overlap = spuTestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); + if (overlap) + { + btAssert(subtree.m_subtreeSize); + + //dma the actual nodes of this subtree + dmaBvhSubTreeNodes (&lsMemPtr->bvhShapeData.gSubtreeNodes[0], subtree, nodeArray, 2); + + cellDmaWaitTagStatusAll(DMA_MASK(2)); + + /* Walk this subtree */ + spuWalkStacklessQuantizedTreeAgainstRay(lsMemPtr, &nodeCallback,rayFromInTriangleSpace, rayToInTriangleSpace, quantizedQueryAabbMin,quantizedQueryAabbMax, + &lsMemPtr->bvhShapeData.gSubtreeNodes[0], + 0, + subtree.m_subtreeSize); + } + // spu_printf("subtreeSize = %d\n",gSubtreeHeaders[j].m_subtreeSize); + } + + // unsigned short int m_quantizedAabbMin[3]; + // unsigned short int m_quantizedAabbMax[3]; + // int m_rootNodeIndex; + // int m_subtreeSize; + i+=nextBatch; + } + + //pre-fetch first tree, then loop and double buffer + } +} + +void performRaycastAgainstCompound (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr) +{ + spu_printf ("Currently no support for ray. vs compound objects. Support coming soon.\n"); +} + +void +performRaycastAgainstConvex (RaycastGatheredObjectData* gatheredObjectData, const SpuRaycastTaskWorkUnit& workUnit, SpuRaycastTaskWorkUnitOut* workUnitOut, RaycastTask_LocalStoreMemory* lsMemPtr) +{ + SpuVoronoiSimplexSolver simplexSolver; + + btTransform rayFromTrans, rayToTrans; + rayFromTrans.setIdentity (); + rayFromTrans.setOrigin (workUnit.rayFrom); + rayToTrans.setIdentity (); + rayToTrans.setOrigin (workUnit.rayTo); + + SpuCastResult result; + + /* Load the vertex data if the shape is a convex hull */ + /* XXX: We might be loading the shape twice */ + ATTRIBUTE_ALIGNED16(char convexHullShape[sizeof(btConvexHullShape)]); + if (gatheredObjectData->m_shapeType == CONVEX_HULL_SHAPE_PROXYTYPE) + { + register int dmaSize; + register ppu_address_t dmaPpuAddress2; + dmaSize = sizeof(btConvexHullShape); + dmaPpuAddress2 = gatheredObjectData->m_collisionShape; + cellDmaGet(&convexHullShape, dmaPpuAddress2, dmaSize, DMA_TAG(1), 0, 0); + cellDmaWaitTagStatusAll(DMA_MASK(1)); + dmaConvexVertexData (&lsMemPtr->convexVertexData, (btConvexHullShape*)&convexHullShape); + cellDmaWaitTagStatusAll(DMA_MASK(2)); // dmaConvexVertexData uses dma channel 2! + lsMemPtr->convexVertexData.gSpuConvexShapePtr = gatheredObjectData->m_spuCollisionShape; + lsMemPtr->convexVertexData.gConvexPoints = &lsMemPtr->convexVertexData.g_convexPointBuffer[0]; + } + + /* performRaycast */ + SpuSubsimplexRayCast caster (gatheredObjectData->m_spuCollisionShape, &lsMemPtr->convexVertexData, gatheredObjectData->m_shapeType, 0.0, &simplexSolver); + bool r = caster.calcTimeOfImpact (rayFromTrans, rayToTrans, gatheredObjectData->m_worldTransform, gatheredObjectData->m_worldTransform,result); + + if (r) + { + workUnitOut->hitFraction = result.m_fraction; + workUnitOut->hitNormal = result.m_normal; + } +} + void processRaycastTask(void* userPtr, void* lsMemory) { RaycastTask_LocalStoreMemory* localMemory = (RaycastTask_LocalStoreMemory*)lsMemory; @@ -95,22 +471,36 @@ void processRaycastTask(void* userPtr, void* lsMemory) for (int objectId = 0; objectId < taskDesc.numSpuCollisionObjectWrappers; objectId++) { RaycastGatheredObjectData gatheredObjectData; - GatherCollisionObjectAndShapeData (gatheredObjectData, *localMemory, (ppu_address_t)&cows[objectId]); + GatherCollisionObjectAndShapeData (&gatheredObjectData, localMemory, (ppu_address_t)&cows[objectId]); /* load initial collision shape */ for (int rayId = 0; rayId < taskDesc.numWorkUnits; rayId++) { - SpuRaycastTaskWorkUnitOut rayOut; - - dmaLoadRayOutput ((ppu_address_t)taskDesc.workUnits[rayId].output, &rayOut, 1); + const SpuRaycastTaskWorkUnit& workUnit = taskDesc.workUnits[rayId]; + ATTRIBUTE_ALIGNED16(SpuRaycastTaskWorkUnitOut workUnitOut); + dmaLoadRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); cellDmaWaitTagStatusAll(DMA_MASK(1)); - - float t = (float)rayId/(float)taskDesc.numWorkUnits; - /* performRaycast */ - rayOut.hitFraction = 0.1f * t; - rayOut.hitNormal = btVector3(1.0, 0.0, 0.0); + + SpuRaycastTaskWorkUnitOut tWorkUnitOut; + tWorkUnitOut.hitFraction = 1.0; + + if (btBroadphaseProxy::isConvex (gatheredObjectData.m_shapeType)) + { + //performRaycastAgainstConvex (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory); + } else if (btBroadphaseProxy::isCompound (gatheredObjectData.m_shapeType)) { + performRaycastAgainstCompound (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory); + } else if (btBroadphaseProxy::isConcave (gatheredObjectData.m_shapeType)) { + performRaycastAgainstConcave (&gatheredObjectData, workUnit, &tWorkUnitOut, localMemory); + } + + /* XXX Only support taking the closest hit for now */ + if (tWorkUnitOut.hitFraction < workUnitOut.hitFraction) + { + workUnitOut.hitFraction = tWorkUnitOut.hitFraction; + workUnitOut.hitNormal = tWorkUnitOut.hitNormal; + } /* write ray cast data back */ - dmaStoreRayOutput ((ppu_address_t)taskDesc.workUnits[rayId].output, &rayOut, 1); + dmaStoreRayOutput ((ppu_address_t)workUnit.output, &workUnitOut, 1); cellDmaWaitTagStatusAll(DMA_MASK(1)); } } diff --git a/Extras/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.h b/Extras/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.h index 0eb1b5d8b..682c25c89 100644 --- a/Extras/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.h +++ b/Extras/BulletMultiThreaded/SpuRaycastTask/SpuRaycastTask.h @@ -16,7 +16,7 @@ struct RaycastGatheredObjectData btTransform m_worldTransform; }; -struct SpuRaycastTaskWorkUnitOut +ATTRIBUTE_ALIGNED16(struct) SpuRaycastTaskWorkUnitOut { btVector3 hitNormal; /* out */ btScalar hitFraction; /* out */ @@ -24,14 +24,14 @@ struct SpuRaycastTaskWorkUnitOut }; /* Perform a raycast on collision object */ -struct SpuRaycastTaskWorkUnit +ATTRIBUTE_ALIGNED16(struct) SpuRaycastTaskWorkUnit { btVector3 rayFrom; /* in */ btVector3 rayTo; /* in */ SpuRaycastTaskWorkUnitOut* output; /* out */ }; -#define SPU_RAYCAST_WORK_UNITS_PER_TASK 16 +#define SPU_RAYCAST_WORK_UNITS_PER_TASK 4 struct SpuRaycastTaskDesc { diff --git a/Extras/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.cpp b/Extras/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.cpp index 8cb60a770..e8f93ed4d 100644 --- a/Extras/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.cpp +++ b/Extras/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.cpp @@ -13,19 +13,17 @@ subject to the following restrictions: 3. This notice may not be removed or altered from any source distribution. */ - #include "SpuSubSimplexConvexCast.h" -#include "SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h" + #include "BulletCollision/CollisionShapes/btConvexShape.h" #include "BulletCollision/CollisionShapes/btMinkowskiSumShape.h" #include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h" -SpuSubsimplexConvexCast::SpuSubsimplexConvexCast (const void* convexA, - const void* convexB, - SpuVoronoiSimplexSolver* simplexSolver) - :m_simplexSolver(simplexSolver), m_convexA(convexA),m_convexB(convexB) +SpuSubsimplexRayCast::SpuSubsimplexRayCast (void* shapeB, SpuConvexPolyhedronVertexData* convexDataB, int shapeTypeB, float marginB, + SpuVoronoiSimplexSolver* simplexSolver) + :m_simplexSolver(simplexSolver), m_shapeB(shapeB), m_convexDataB(convexDataB), m_shapeTypeB(shapeTypeB), m_marginB(marginB) { } @@ -37,27 +35,33 @@ SpuSubsimplexConvexCast::SpuSubsimplexConvexCast (const void* convexA, #define MAX_ITERATIONS 32 #endif -bool SpuSubsimplexConvexCast::calcTimeOfImpact(const btTransform& fromA, - const btTransform& toA, - const btTransform& fromB, - const btTransform& toB, - SpuCastResult& result) +/* Returns the support point of the minkowski sum: + * MSUM(Pellet, ConvexShape) + * + */ +btVector3 supportPoint (btTransform xform, int shapeType, const void* shape, SpuConvexPolyhedronVertexData* convexVertexData, btVector3 seperatingAxis) { - //localGetSupportingVertexWithoutMargin(m_shapeTypeA, m_minkowskiA, seperatingAxisInA,input.m_convexVertexData[0]); -#if 0 - btMinkowskiSumShape combi(m_convexA,m_convexB); - btMinkowskiSumShape* convex = &combi; + btVector3 SupportPellet = btVector3(0.0, 0.0, 0.0); + btVector3 rotatedSeperatingAxis = seperatingAxis * xform.getBasis(); + btVector3 SupportShape = xform(localGetSupportingVertexWithoutMargin(shapeType, (void*)shape, rotatedSeperatingAxis, convexVertexData)); + return SupportPellet + SupportShape; +} +bool SpuSubsimplexRayCast::calcTimeOfImpact(const btTransform& fromRay, + const btTransform& toRay, + const btTransform& fromB, + const btTransform& toB, + SpuCastResult& result) +{ btTransform rayFromLocalA; btTransform rayToLocalA; - rayFromLocalA = fromA.inverse()* fromB; - rayToLocalA = toA.inverse()* toB; - + rayFromLocalA = fromRay.inverse()* fromB; + rayToLocalA = toRay.inverse()* toB; m_simplexSolver->reset(); - - convex->setTransformB(btTransform(rayFromLocalA.getBasis())); + + btTransform bXform = btTransform(rayFromLocalA.getBasis()); //btScalar radius = btScalar(0.01); @@ -69,8 +73,7 @@ bool SpuSubsimplexConvexCast::calcTimeOfImpact(const btTransform& fromA, btVector3 r = -(rayToLocalA.getOrigin()-rayFromLocalA.getOrigin()); btVector3 x = s; btVector3 v; - btVector3 arbitraryPoint = convex->localGetSupportingVertex(r); - + btVector3 arbitraryPoint = supportPoint(bXform, m_shapeTypeB, m_shapeB, m_convexDataB, r); v = x - arbitraryPoint; int maxIter = MAX_ITERATIONS; @@ -82,7 +85,6 @@ bool SpuSubsimplexConvexCast::calcTimeOfImpact(const btTransform& fromA, btScalar lastLambda = lambda; - btScalar dist2 = v.length2(); #ifdef BT_USE_DOUBLE_PRECISION btScalar epsilon = btScalar(0.0001); @@ -94,8 +96,8 @@ bool SpuSubsimplexConvexCast::calcTimeOfImpact(const btTransform& fromA, while ( (dist2 > epsilon) && maxIter--) { - p = convex->localGetSupportingVertex( v); - w = x - p; + p = supportPoint(bXform, m_shapeTypeB, m_shapeB, m_convexDataB, v); + w = x - p; btScalar VdotW = v.dot(w); @@ -136,8 +138,6 @@ bool SpuSubsimplexConvexCast::calcTimeOfImpact(const btTransform& fromA, result.m_fraction = lambda; result.m_normal = n; -#endif - return true; } diff --git a/Extras/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.h b/Extras/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.h index f539722c7..81321648e 100644 --- a/Extras/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.h +++ b/Extras/BulletMultiThreaded/SpuRaycastTask/SpuSubSimplexConvexCast.h @@ -14,43 +14,47 @@ subject to the following restrictions: */ -#ifndef SPU_SUBSIMPLEX_CONVEX_CAST_H -#define SPU_SUBSIMPLEX_CONVEX_CAST_H +#ifndef SPU_SUBSIMPLEX_RAY_CAST_H +#define SPU_SUBSIMPLEX_RAY_CAST_H #include "SpuNarrowPhaseCollisionTask/SpuVoronoiSimplexSolver.h" +#include "SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h" #include "SpuRaycastTask.h" class btConvexShape; struct SpuCastResult { + float m_fraction; + btVector3 m_normal; }; /// btSubsimplexConvexCast implements Gino van den Bergens' paper ///"Ray Casting against bteral Convex Objects with Application to Continuous Collision Detection" /// GJK based Ray Cast, optimized version /// Objects should not start in overlap, otherwise results are not defined. -class SpuSubsimplexConvexCast +class SpuSubsimplexRayCast { SpuVoronoiSimplexSolver* m_simplexSolver; - const void* m_convexA; - const void* m_convexB; - RaycastGatheredObjectData* m_dataB; -public: + void* m_shapeB; + SpuConvexPolyhedronVertexData* m_convexDataB; + int m_shapeTypeB; + float m_marginB; - SpuSubsimplexConvexCast (const void* shapeA, - const void* shapeB, - SpuVoronoiSimplexSolver* simplexSolver); +public: + SpuSubsimplexRayCast (void* shapeB, SpuConvexPolyhedronVertexData* convexDataB, int shapeTypeB, float marginB, + SpuVoronoiSimplexSolver* simplexSolver); //virtual ~btSubsimplexConvexCast(); + ///SimsimplexConvexCast calculateTimeOfImpact calculates the time of impact+normal for the linear cast (sweep) between two moving objects. ///Precondition is that objects should not penetration/overlap at the start from the interval. Overlap can be tested using btGjkPairDetector. - bool calcTimeOfImpact(const btTransform& fromA, - const btTransform& toA, + bool calcTimeOfImpact(const btTransform& fromRay, + const btTransform& toRay, const btTransform& fromB, const btTransform& toB, SpuCastResult& result); }; -#endif //SUBSIMPLEX_CONVEX_CAST_H +#endif //SUBSIMPLEX_RAY_CAST_H diff --git a/Extras/BulletMultiThreaded/SpuRaycastTaskProcess.cpp b/Extras/BulletMultiThreaded/SpuRaycastTaskProcess.cpp index 9c453d377..964348f59 100644 --- a/Extras/BulletMultiThreaded/SpuRaycastTaskProcess.cpp +++ b/Extras/BulletMultiThreaded/SpuRaycastTaskProcess.cpp @@ -1,172 +1,172 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - - -#include "SpuRaycastTaskProcess.h" - -SpuRaycastTaskProcess::SpuRaycastTaskProcess(class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks) -:m_threadInterface(threadInterface), -m_maxNumOutstandingTasks(maxNumOutstandingTasks) -{ - m_workUnitTaskBuffers = (unsigned char *)0; - m_taskBusy.resize(m_maxNumOutstandingTasks); - m_spuRaycastTaskDesc.resize(m_maxNumOutstandingTasks); - - for (int i = 0; i < m_maxNumOutstandingTasks; i++) - { - m_taskBusy[i] = false; - } - m_numBusyTasks = 0; - m_currentTask = 0; - m_currentWorkUnitInTask = 0; - - m_threadInterface->startSPU(); - - //printf("sizeof vec_float4: %d\n", sizeof(vec_float4)); - //printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", sizeof(SpuGatherAndProcessWorkUnitInput)); - -} - -SpuRaycastTaskProcess::~SpuRaycastTaskProcess() -{ - - if (m_workUnitTaskBuffers != 0) - { - btAlignedFree(m_workUnitTaskBuffers); - m_workUnitTaskBuffers = 0; - } - - m_threadInterface->stopSPU(); -} - - - -void SpuRaycastTaskProcess::initialize2(void* spuCollisionObjectsWrappers, int numSpuCollisionObjectWrappers) -{ - m_spuCollisionObjectWrappers = spuCollisionObjectsWrappers; - m_numSpuCollisionObjectWrappers = numSpuCollisionObjectWrappers; - for (int i = 0; i < m_maxNumOutstandingTasks; i++) - { - m_taskBusy[i] = false; - } - m_numBusyTasks = 0; - m_currentTask = 0; - m_currentWorkUnitInTask = 0; - -#ifdef DEBUG_SpuRaycastTaskProcess - m_initialized = true; -#endif -} - - -void SpuRaycastTaskProcess::issueTask2() -{ - m_taskBusy[m_currentTask] = true; - m_numBusyTasks++; - - SpuRaycastTaskDesc& taskDesc = m_spuRaycastTaskDesc[m_currentTask]; - - taskDesc.taskId = m_currentTask; - m_threadInterface->sendRequest(1, (uint32_t) &taskDesc,m_currentTask); - //printf("send thread requested for task %d\n", m_currentTask); - // if all tasks busy, wait for spu event to clear the task. - if (m_numBusyTasks >= m_maxNumOutstandingTasks) - { - unsigned int taskId; - unsigned int outputSize; - - m_threadInterface->waitForResponse(&taskId, &outputSize); - - //printf("PPU: after issue, received event: %u %d\n", taskId, outputSize); - - m_taskBusy[taskId] = false; - - m_numBusyTasks--; - } else { - //printf("Sent request, not enough busy tasks\n"); - } -} - -void SpuRaycastTaskProcess::addWorkToTask(SpuRaycastTaskWorkUnit workunit) -{ - m_spuRaycastTaskDesc[m_currentTask].workUnits[m_currentWorkUnitInTask] = workunit; - m_currentWorkUnitInTask++; - if (m_currentWorkUnitInTask == SPU_RAYCAST_WORK_UNITS_PER_TASK) - { - m_spuRaycastTaskDesc[m_currentTask].numWorkUnits = m_currentWorkUnitInTask; - m_spuRaycastTaskDesc[m_currentTask].numSpuCollisionObjectWrappers = m_numSpuCollisionObjectWrappers; - m_spuRaycastTaskDesc[m_currentTask].spuCollisionObjectsWrappers = m_spuCollisionObjectWrappers; - //printf("Task buffer full, issuing\n"); - issueTask2 (); - //printf("Returned from issueTask2()\n"); - m_currentWorkUnitInTask = 0; - - // find new task buffer - for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++) - { - if (!m_taskBusy[i]) - { - m_currentTask = i; - //init the task data - break; - } - } - //printf("next task = %d\n", m_currentTask); - } -} - - -void -SpuRaycastTaskProcess::flush2() -{ -#ifdef DEBUG_SPU_TASK_SCHEDULING - printf("\nSpuRaycastTaskProcess::flush()\n"); -#endif //DEBUG_SPU_TASK_SCHEDULING - - // if there's a partially filled task buffer, submit that task - //printf("Flushing... %d remaining\n", m_currentWorkUnitInTask); - if (m_currentWorkUnitInTask > 0) - { - m_spuRaycastTaskDesc[m_currentTask].numWorkUnits = m_currentWorkUnitInTask; - m_spuRaycastTaskDesc[m_currentTask].numSpuCollisionObjectWrappers = m_numSpuCollisionObjectWrappers; - m_spuRaycastTaskDesc[m_currentTask].spuCollisionObjectsWrappers = m_spuCollisionObjectWrappers; - issueTask2(); - m_currentWorkUnitInTask = 0; - } - - - // all tasks are issued, wait for all tasks to be complete - while(m_numBusyTasks > 0) - { - // Consolidating SPU code - unsigned int taskId; - unsigned int outputSize; - - //printf("Busy tasks... %d\n", m_numBusyTasks); - - { - // SPURS support. - m_threadInterface->waitForResponse(&taskId, &outputSize); - } - - //printf("PPU: flushing, received event: %u %d\n", taskId, outputSize); - - //postProcess(taskId, outputSize); - - m_taskBusy[taskId] = false; - - m_numBusyTasks--; - } -} +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2007 Erwin Coumans http://bulletphysics.com + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "SpuRaycastTaskProcess.h" + + +SpuRaycastTaskProcess::SpuRaycastTaskProcess(class btThreadSupportInterface* threadInterface, unsigned int maxNumOutstandingTasks) +:m_threadInterface(threadInterface), +m_maxNumOutstandingTasks(maxNumOutstandingTasks) +{ + m_workUnitTaskBuffers = (unsigned char *)0; + m_taskBusy.resize(m_maxNumOutstandingTasks); + m_spuRaycastTaskDesc.resize(m_maxNumOutstandingTasks); + + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + m_taskBusy[i] = false; + } + m_numBusyTasks = 0; + m_currentTask = 0; + m_currentWorkUnitInTask = 0; + + m_threadInterface->startSPU(); + + //printf("sizeof vec_float4: %d\n", sizeof(vec_float4)); + //printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", sizeof(SpuGatherAndProcessWorkUnitInput)); + +} + +SpuRaycastTaskProcess::~SpuRaycastTaskProcess() +{ + + if (m_workUnitTaskBuffers != 0) + { + btAlignedFree(m_workUnitTaskBuffers); + m_workUnitTaskBuffers = 0; + } + + m_threadInterface->stopSPU(); +} + + + +void SpuRaycastTaskProcess::initialize2(void* spuCollisionObjectsWrappers, int numSpuCollisionObjectWrappers) +{ + m_spuCollisionObjectWrappers = spuCollisionObjectsWrappers; + m_numSpuCollisionObjectWrappers = numSpuCollisionObjectWrappers; + for (int i = 0; i < m_maxNumOutstandingTasks; i++) + { + m_taskBusy[i] = false; + } + m_numBusyTasks = 0; + m_currentTask = 0; + m_currentWorkUnitInTask = 0; + +#ifdef DEBUG_SpuRaycastTaskProcess + m_initialized = true; +#endif +} + + +void SpuRaycastTaskProcess::issueTask2() +{ + m_taskBusy[m_currentTask] = true; + m_numBusyTasks++; + + SpuRaycastTaskDesc& taskDesc = m_spuRaycastTaskDesc[m_currentTask]; + + taskDesc.taskId = m_currentTask; + m_threadInterface->sendRequest(1, (uint32_t) &taskDesc,m_currentTask); + //printf("send thread requested for task %d\n", m_currentTask); + // if all tasks busy, wait for spu event to clear the task. + if (m_numBusyTasks >= m_maxNumOutstandingTasks) + { + unsigned int taskId; + unsigned int outputSize; + + m_threadInterface->waitForResponse(&taskId, &outputSize); + + //printf("PPU: after issue, received event: %u %d\n", taskId, outputSize); + + m_taskBusy[taskId] = false; + + m_numBusyTasks--; + } else { + //printf("Sent request, not enough busy tasks\n"); + } +} + +void SpuRaycastTaskProcess::addWorkToTask(SpuRaycastTaskWorkUnit workunit) +{ + m_spuRaycastTaskDesc[m_currentTask].workUnits[m_currentWorkUnitInTask] = workunit; + m_currentWorkUnitInTask++; + if (m_currentWorkUnitInTask == SPU_RAYCAST_WORK_UNITS_PER_TASK) + { + m_spuRaycastTaskDesc[m_currentTask].numWorkUnits = m_currentWorkUnitInTask; + m_spuRaycastTaskDesc[m_currentTask].numSpuCollisionObjectWrappers = m_numSpuCollisionObjectWrappers; + m_spuRaycastTaskDesc[m_currentTask].spuCollisionObjectsWrappers = m_spuCollisionObjectWrappers; + //printf("Task buffer full, issuing\n"); + issueTask2 (); + //printf("Returned from issueTask2()\n"); + m_currentWorkUnitInTask = 0; + + // find new task buffer + for (unsigned int i = 0; i < m_maxNumOutstandingTasks; i++) + { + if (!m_taskBusy[i]) + { + m_currentTask = i; + //init the task data + break; + } + } + //printf("next task = %d\n", m_currentTask); + } +} + + +void +SpuRaycastTaskProcess::flush2() +{ +#ifdef DEBUG_SPU_TASK_SCHEDULING + printf("\nSpuRaycastTaskProcess::flush()\n"); +#endif //DEBUG_SPU_TASK_SCHEDULING + + // if there's a partially filled task buffer, submit that task + //printf("Flushing... %d remaining\n", m_currentWorkUnitInTask); + if (m_currentWorkUnitInTask > 0) + { + m_spuRaycastTaskDesc[m_currentTask].numWorkUnits = m_currentWorkUnitInTask; + m_spuRaycastTaskDesc[m_currentTask].numSpuCollisionObjectWrappers = m_numSpuCollisionObjectWrappers; + m_spuRaycastTaskDesc[m_currentTask].spuCollisionObjectsWrappers = m_spuCollisionObjectWrappers; + issueTask2(); + m_currentWorkUnitInTask = 0; + } + + + // all tasks are issued, wait for all tasks to be complete + while(m_numBusyTasks > 0) + { + // Consolidating SPU code + unsigned int taskId; + unsigned int outputSize; + + //printf("Busy tasks... %d\n", m_numBusyTasks); + + { + // SPURS support. + m_threadInterface->waitForResponse(&taskId, &outputSize); + } + + //printf("PPU: flushing, received event: %u %d\n", taskId, outputSize); + + //postProcess(taskId, outputSize); + + m_taskBusy[taskId] = false; + + m_numBusyTasks--; + } +} diff --git a/Extras/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp b/Extras/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp index fff0e66d2..73fd3643a 100644 --- a/Extras/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp +++ b/Extras/BulletMultiThreaded/SpuSolverTask/SpuParallellSolverTask.cpp @@ -17,6 +17,7 @@ Written by: Marten Svanfeldt #define IN_PARALLELL_SOLVER 1 + #include "SpuParallellSolverTask.h" #include "BulletDynamics/Dynamics/btRigidBody.h" #include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"