diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp index 22756c685..e76f80669 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp @@ -18,6 +18,7 @@ m_queue(q), m_allAabbsGPU(ctx,q), m_smallAabbsGPU(ctx,q), m_largeAabbsGPU(ctx,q), +m_pairCount(ctx,q), m_overlappingPairs(ctx,q), m_gpuSmallSortData(ctx,q), m_gpuSmallSortedAabbs(ctx,q), @@ -260,7 +261,7 @@ void b3GpuSapBroadphase::reset() m_smallAabbsGPU.resize(0); m_smallAabbsCPU.resize(0); - + m_pairCount.resize(0); m_largeAabbsGPU.resize(0); m_largeAabbsCPU.resize(0); } @@ -404,8 +405,8 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) m_overlappingPairs.resize(maxPairs); - b3OpenCLArray pairCount(m_context, m_queue); - pairCount.push_back(0); + m_pairCount.resize(0); + m_pairCount.push_back(0); int numPairs=0; { @@ -413,7 +414,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) if (numLargeAabbs && numSmallAabbs) { B3_PROFILE("sap2Kernel"); - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_largeAabbsGPU.getBufferCL() ),b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(pairCount.getBufferCL())}; + b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_largeAabbsGPU.getBufferCL() ),b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())}; b3LauncherCL launcher(m_queue, m_sap2Kernel); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numLargeAabbs ); @@ -423,7 +424,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) //@todo: use actual maximum work item sizes of the device instead of hardcoded values launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64); - numPairs = pairCount.at(0); + numPairs = m_pairCount.at(0); if (numPairs >maxPairs) { b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); @@ -434,7 +435,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) if (m_gpuSmallSortedAabbs.size()) { B3_PROFILE("sapKernel"); - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(pairCount.getBufferCL())}; + b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())}; b3LauncherCL launcher(m_queue, m_sapKernel); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numSmallAabbs ); @@ -469,7 +470,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) launcher.launch1D( num); clFinish(m_queue); - numPairs = pairCount.at(0); + numPairs = m_pairCount.at(0); if (numPairs>maxPairs) { b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h index cf5709435..887b5d728 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h @@ -29,7 +29,9 @@ class b3GpuSapBroadphase int m_currentBuffer; public: - + + b3OpenCLArray m_pairCount; + b3OpenCLArray m_allAabbsGPU; b3AlignedObjectArray m_allAabbsCPU; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index fca41e6c9..c33ff53d6 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -56,7 +56,15 @@ GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_ m_device(device), m_queue(q), m_findSeparatingAxisKernel(0), -m_totalContactsOut(m_context, m_queue) +m_totalContactsOut(m_context, m_queue), +m_sepNormals(m_context, m_queue), +m_hasSeparatingNormals(m_context, m_queue), +m_concaveSepNormals(m_context, m_queue), +m_numConcavePairsOut(m_context, m_queue), +m_gpuCompoundPairs(m_context, m_queue), +m_gpuCompoundSepNormals(m_context, m_queue), +m_gpuHasCompoundSepNormals(m_context, m_queue), +m_numCompoundPairsOut(m_context, m_queue) { m_totalContactsOut.push_back(0); @@ -1866,31 +1874,25 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArray sepNormals(m_context,m_queue); - sepNormals.resize(nPairs); - b3OpenCLArray hasSeparatingNormals(m_context,m_queue); - hasSeparatingNormals.resize(nPairs); + + m_sepNormals.resize(nPairs); + m_hasSeparatingNormals.resize(nPairs); int concaveCapacity=maxTriConvexPairCapacity; - b3OpenCLArray concaveSepNormals(m_context,m_queue); - concaveSepNormals.resize(concaveCapacity); + m_concaveSepNormals.resize(concaveCapacity); - b3OpenCLArray numConcavePairsOut(m_context,m_queue); - numConcavePairsOut.push_back(0); + m_numConcavePairsOut.push_back(0); int compoundPairCapacity=65536*10; - b3OpenCLArray gpuCompoundPairs(m_context,m_queue); - gpuCompoundPairs.resize(compoundPairCapacity); + m_gpuCompoundPairs.resize(compoundPairCapacity); - b3OpenCLArray gpuCompoundSepNormals(m_context,m_queue); - gpuCompoundSepNormals.resize(compoundPairCapacity); + m_gpuCompoundSepNormals.resize(compoundPairCapacity); - b3OpenCLArray gpuHasCompoundSepNormals(m_context,m_queue); - gpuHasCompoundSepNormals.resize(compoundPairCapacity); + m_gpuHasCompoundSepNormals.resize(compoundPairCapacity); - b3OpenCLArray numCompoundPairsOut(m_context,m_queue); - numCompoundPairsOut.push_back(0); + m_numCompoundPairsOut.resize(0); + m_numCompoundPairsOut.push_back(0); int numCompoundPairs = 0; @@ -1914,8 +1916,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArraysize() && treeNodesGPU->size()) { B3_PROFILE("m_bvhTraversalKernel"); - numConcavePairs = numConcavePairsOut.at(0); + numConcavePairs = m_numConcavePairsOut.at(0); b3LauncherCL launcher(m_queue, m_bvhTraversalKernel); launcher.setBuffer( pairs->getBufferCL()); launcher.setBuffer( bodyBuf->getBufferCL()); launcher.setBuffer( gpuCollidables.getBufferCL()); launcher.setBuffer( clAabbsWS.getBufferCL()); launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); - launcher.setBuffer( numConcavePairsOut.getBufferCL()); + launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); launcher.setBuffer( subTreesGPU->getBufferCL()); launcher.setBuffer( treeNodesGPU->getBufferCL()); launcher.setBuffer( bvhInfo->getBufferCL()); @@ -1954,7 +1956,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArray maxTriConvexPairCapacity) { @@ -1979,7 +1981,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArray cpuCompoundSepNormals; - // concaveSepNormals.copyToHost(cpuCompoundSepNormals); + // m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); // b3AlignedObjectArray cpuConcavePairs; // triangleConvexPairsOut.copyToHost(cpuConcavePairs); @@ -2002,7 +2004,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArray compoundPairCapacity) numCompoundPairs = compoundPairCapacity; - gpuCompoundPairs.resize(numCompoundPairs); - gpuHasCompoundSepNormals.resize(numCompoundPairs); - gpuCompoundSepNormals.resize(numCompoundPairs); + m_gpuCompoundPairs.resize(numCompoundPairs); + m_gpuHasCompoundSepNormals.resize(numCompoundPairs); + m_gpuCompoundSepNormals.resize(numCompoundPairs); if (numCompoundPairs) @@ -2050,7 +2052,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArraygetBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), @@ -2083,7 +2085,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArraygetBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), @@ -2093,8 +2095,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArraygetBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; @@ -2256,8 +2258,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArraygetBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( sepNormals.getBufferCL()), - b3BufferInfoCL( hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), @@ -2334,8 +2336,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArraygetBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( sepNormals.getBufferCL()), - b3BufferInfoCL( hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), @@ -2373,8 +2375,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArraygetBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; @@ -2389,12 +2391,12 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArrayresize(nContacts); } - int nCompoundsPairs = gpuCompoundPairs.size(); + int nCompoundsPairs = m_gpuCompoundPairs.size(); if (nCompoundsPairs) { b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( gpuCompoundPairs.getBufferCL(), true ), + b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), @@ -2403,8 +2405,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const b3OpenCLArraygetBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h index 7420ae4dc..700e719fd 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h @@ -61,6 +61,16 @@ struct GpuSatCollision b3OpenCLArray m_totalContactsOut; + b3OpenCLArray m_sepNormals; + b3OpenCLArray m_hasSeparatingNormals; + b3OpenCLArray m_concaveSepNormals; + b3OpenCLArray m_numConcavePairsOut; + b3OpenCLArray m_gpuCompoundPairs; + b3OpenCLArray m_gpuCompoundSepNormals; + b3OpenCLArray m_gpuHasCompoundSepNormals; + b3OpenCLArray m_numCompoundPairsOut; + + GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ); virtual ~GpuSatCollision(); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp index e5a08376e..038924c12 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp @@ -33,7 +33,9 @@ m_queue(queue) m_data->m_gpuSatCollision = new GpuSatCollision(ctx,device,queue); m_data->m_pBufPairsCPU = new b3AlignedObjectArray; m_data->m_pBufPairsCPU->resize(config.m_maxBroadphasePairs); - + m_data->m_triangleConvexPairs = new b3OpenCLArray(m_context,m_queue, config.m_maxTriConvexPairCapacity); + + //m_data->m_convexPairsOutGPU = new b3OpenCLArray(ctx,queue,config.m_maxBroadphasePairs,false); //m_data->m_planePairs = new b3OpenCLArray(ctx,queue,config.m_maxBroadphasePairs,false); @@ -110,6 +112,7 @@ b3GpuNarrowPhase::~b3GpuNarrowPhase() { delete m_data->m_gpuSatCollision; delete m_data->m_pBufPairsCPU; + delete m_data->m_triangleConvexPairs; //delete m_data->m_convexPairsOutGPU; //delete m_data->m_planePairs; delete m_data->m_pBufContactOutCPU; @@ -722,7 +725,6 @@ void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase int nContactOut = 0; int maxTriConvexPairCapacity = m_data->m_config.m_maxTriConvexPairCapacity; - b3OpenCLArray triangleConvexPairs(m_context,m_queue, maxTriConvexPairCapacity); int numTriConvexPairsOut=0; b3OpenCLArray broadphasePairsGPU(m_context,m_queue); @@ -755,7 +757,7 @@ void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase m_data->m_bvhInfoGPU, numObjects, maxTriConvexPairCapacity, - triangleConvexPairs, + *m_data->m_triangleConvexPairs, numTriConvexPairsOut ); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h index bdcf8405d..45937a524 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h @@ -51,6 +51,7 @@ struct b3GpuNarrowPhaseInternalData struct GpuSatCollision* m_gpuSatCollision; b3AlignedObjectArray* m_pBufPairsCPU; + b3OpenCLArray* m_triangleConvexPairs; //b3OpenCLArray* m_convexPairsOutGPU; //b3OpenCLArray* m_planePairs; diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp index e6b15e6f5..8749752fb 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsJacobiSolver.cpp @@ -56,6 +56,8 @@ struct b3GpuPgsJacobiSolverInternalData cl_kernel m_getInfo2Kernel; cl_kernel m_writeBackVelocitiesKernel; + b3OpenCLArray* m_dst; + b3OpenCLArray* m_gpuSolverBodies; b3OpenCLArray* m_gpuBatchConstraints; b3OpenCLArray* m_gpuConstraintRows; @@ -123,6 +125,8 @@ b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, m_gpuData->m_prefixScan = new b3PrefixScanCL(ctx,device,queue); + m_gpuData->m_dst = new b3OpenCLArray(m_gpuData->m_context,m_gpuData->m_queue); + m_gpuData->m_gpuSolverBodies = new b3OpenCLArray(m_gpuData->m_context,m_gpuData->m_queue); m_gpuData->m_gpuBatchConstraints = new b3OpenCLArray(m_gpuData->m_context,m_gpuData->m_queue); m_gpuData->m_gpuConstraintRows = new b3OpenCLArray(m_gpuData->m_context,m_gpuData->m_queue); @@ -130,7 +134,7 @@ b3GpuPgsJacobiSolver::b3GpuPgsJacobiSolver (cl_context ctx, cl_device_id device, cl_int errNum=0; { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,&errNum,"",B3_JOINT_SOLVER_PATH); + cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,&errNum,"",B3_JOINT_SOLVER_PATH,true); b3Assert(errNum==CL_SUCCESS); m_gpuData->m_solveJointConstraintRowsKernels = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device,solveConstraintRowsCL, "solveJointConstraintRows",&errNum,prog); b3Assert(errNum==CL_SUCCESS); @@ -164,6 +168,7 @@ b3GpuPgsJacobiSolver::~b3GpuPgsJacobiSolver () clReleaseKernel(m_gpuData->m_writeBackVelocitiesKernel); delete m_gpuData->m_prefixScan; + delete m_gpuData->m_dst; delete m_gpuData->m_gpuSolverBodies; delete m_gpuData->m_gpuBatchConstraints; delete m_gpuData->m_gpuConstraintRows; @@ -292,10 +297,9 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_gpuConstraintInfo1->copyToHost(m_tmpConstraintSizesPool); - b3OpenCLArray dst(m_gpuData->m_context,m_gpuData->m_queue); - dst.resize(numConstraints); + m_gpuData->m_dst->resize(numConstraints); unsigned int total=0; - m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1,dst,numConstraints,&total); + m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1,*m_gpuData->m_dst,numConstraints,&total); unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints-1); //b3AlignedObjectArray dstHost; //dst.copyToHost(dstHost); @@ -304,7 +308,7 @@ b3Scalar b3GpuPgsJacobiSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue,m_gpuData->m_initBatchConstraintsKernel); - launcher.setBuffer(dst.getBufferCL()); + launcher.setBuffer(m_gpuData->m_dst->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setConst(numConstraints); launcher.launch1D(numConstraints);