mirror of
https://github.com/bulletphysics/bullet3
synced 2024-12-13 13:20:07 +00:00
Avoid breaking up the clipHullHull kernel, it ruins performance. Unfortunately, Mac OSX still requires it.
Use indices instead of copies for small/large aabbs in broadphase (grid / sap)
This commit is contained in:
parent
ff051f87aa
commit
3e8b183587
@ -104,7 +104,7 @@ static PairBench* sPairDemo = 0;
|
||||
|
||||
#define BP_COMBO_INDEX 123
|
||||
|
||||
static int curSelectedBroadphase = 5;
|
||||
static int curSelectedBroadphase = 0;
|
||||
static BroadphaseEntry allBroadphases[]=
|
||||
{
|
||||
{"Gpu Grid",b3GpuGridBroadphase::CreateFunc},
|
||||
|
@ -33,8 +33,8 @@ b3GpuGridBroadphase::b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_
|
||||
m_device(device),
|
||||
m_queue(q),
|
||||
m_allAabbsGPU1(ctx,q),
|
||||
m_largeAabbsGPU(ctx,q),
|
||||
m_smallAabbsGPU(ctx,q),
|
||||
m_smallAabbsMappingGPU(ctx,q),
|
||||
m_largeAabbsMappingGPU(ctx,q),
|
||||
m_gpuPairs(ctx,q),
|
||||
|
||||
m_hashGpu(ctx,q),
|
||||
@ -117,8 +117,10 @@ void b3GpuGridBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3
|
||||
aabb.m_maxVec = aabbMax;
|
||||
aabb.m_minIndices[3] = userPtr;
|
||||
aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr;
|
||||
m_smallAabbsMappingCPU.push_back(m_allAabbsCPU1.size());
|
||||
|
||||
m_allAabbsCPU1.push_back(aabb);
|
||||
m_smallAabbsCPU.push_back(aabb);
|
||||
|
||||
}
|
||||
void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
|
||||
{
|
||||
@ -127,8 +129,9 @@ void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Ve
|
||||
aabb.m_maxVec = aabbMax;
|
||||
aabb.m_minIndices[3] = userPtr;
|
||||
aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr;
|
||||
m_largeAabbsMappingCPU.push_back(m_allAabbsCPU1.size());
|
||||
|
||||
m_allAabbsCPU1.push_back(aabb);
|
||||
m_largeAabbsCPU.push_back(aabb);
|
||||
}
|
||||
|
||||
void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
@ -151,88 +154,25 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
return;
|
||||
}
|
||||
|
||||
//sync small AABBs
|
||||
{
|
||||
|
||||
|
||||
bool syncOnHost = false;
|
||||
if (syncOnHost)
|
||||
{
|
||||
m_allAabbsGPU1.copyToHost(this->m_allAabbsCPU1);
|
||||
b3AlignedObjectArray<b3SapAabb> hostSmallAabbs;
|
||||
m_smallAabbsGPU.copyToHost(hostSmallAabbs);
|
||||
int numSmallAabbs = hostSmallAabbs.size();
|
||||
for (int i=0;i<numSmallAabbs;i++)
|
||||
{
|
||||
//__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)
|
||||
{
|
||||
//int i = get_global_id(0);
|
||||
//if (i>=numObjects)
|
||||
// return;
|
||||
int src = hostSmallAabbs[i].m_signedMaxIndices[3];
|
||||
hostSmallAabbs[i] = m_allAabbsCPU1[src];
|
||||
hostSmallAabbs[i].m_signedMaxIndices[3] = src;
|
||||
}
|
||||
}
|
||||
m_smallAabbsGPU.copyFromHost(hostSmallAabbs);
|
||||
} else
|
||||
{
|
||||
int numSmallAabbs = m_smallAabbsGPU.size();
|
||||
if (numSmallAabbs)
|
||||
{
|
||||
B3_PROFILE("copyAabbsKernelSmall");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( m_allAabbsGPU1.getBufferCL(), true ),
|
||||
b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()),
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_copyAabbsKernel,"m_copyAabbsKernel" );
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numSmallAabbs );
|
||||
int num = numSmallAabbs;
|
||||
launcher.launch1D( num);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//sync large AABBs
|
||||
{
|
||||
int numLargeAabbs = m_largeAabbsGPU.size();
|
||||
|
||||
if (numLargeAabbs)
|
||||
{
|
||||
B3_PROFILE("copyAabbsKernelLarge");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( m_allAabbsGPU1.getBufferCL(), true ),
|
||||
b3BufferInfoCL( m_largeAabbsGPU.getBufferCL()),
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_copyAabbsKernel ,"m_copyAabbsKernel");
|
||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst( numLargeAabbs );
|
||||
int num = numLargeAabbs;
|
||||
launcher.launch1D( num);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int numSmallAabbs = m_smallAabbsGPU.size();
|
||||
int numSmallAabbs = m_smallAabbsMappingGPU.size();
|
||||
|
||||
b3OpenCLArray<int> pairCount(m_context,m_queue);
|
||||
pairCount.push_back(0);
|
||||
m_gpuPairs.resize(maxPairs);//numSmallAabbs*maxPairsPerBody);
|
||||
|
||||
{
|
||||
int numLargeAabbs = m_largeAabbsGPU.size();
|
||||
int numLargeAabbs = m_largeAabbsMappingGPU.size();
|
||||
if (numLargeAabbs && numSmallAabbs)
|
||||
{
|
||||
B3_PROFILE("sap2Kernel");
|
||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_largeAabbsGPU.getBufferCL() ),
|
||||
b3BufferInfoCL( m_smallAabbsGPU.getBufferCL() ),
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( m_allAabbsGPU1.getBufferCL() ),
|
||||
b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ),
|
||||
b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ),
|
||||
b3BufferInfoCL( m_gpuPairs.getBufferCL() ),
|
||||
b3BufferInfoCL(pairCount.getBufferCL())};
|
||||
b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel");
|
||||
@ -245,6 +185,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64);
|
||||
|
||||
int numPairs = pairCount.at(0);
|
||||
|
||||
if (numPairs >maxPairs)
|
||||
{
|
||||
b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
|
||||
@ -264,7 +205,8 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
B3_PROFILE("kCalcHashAABB");
|
||||
b3LauncherCL launch(m_queue,kCalcHashAABB,"kCalcHashAABB");
|
||||
launch.setConst(numSmallAabbs);
|
||||
launch.setBuffer(m_smallAabbsGPU.getBufferCL());
|
||||
launch.setBuffer(m_allAabbsGPU1.getBufferCL());
|
||||
launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
|
||||
launch.setBuffer(m_hashGpu.getBufferCL());
|
||||
launch.setBuffer(this->m_paramsGPU.getBufferCL());
|
||||
launch.launch1D(numSmallAabbs);
|
||||
@ -307,7 +249,8 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
|
||||
b3LauncherCL launch(m_queue,kFindOverlappingPairs,"kFindOverlappingPairs");
|
||||
launch.setConst(numSmallAabbs);
|
||||
launch.setBuffer(m_smallAabbsGPU.getBufferCL());
|
||||
launch.setBuffer(m_allAabbsGPU1.getBufferCL());
|
||||
launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
|
||||
launch.setBuffer(m_hashGpu.getBufferCL());
|
||||
launch.setBuffer(m_cellStartGpu.getBufferCL());
|
||||
|
||||
@ -397,8 +340,8 @@ void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
void b3GpuGridBroadphase::writeAabbsToGpu()
|
||||
{
|
||||
m_allAabbsGPU1.copyFromHost(m_allAabbsCPU1);
|
||||
m_largeAabbsGPU.copyFromHost(m_largeAabbsCPU);
|
||||
m_smallAabbsGPU.copyFromHost(m_smallAabbsCPU);
|
||||
m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU);
|
||||
m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU);
|
||||
|
||||
}
|
||||
|
||||
|
@ -32,10 +32,11 @@ protected:
|
||||
b3OpenCLArray<b3SapAabb> m_allAabbsGPU1;
|
||||
b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU1;
|
||||
|
||||
b3OpenCLArray<b3SapAabb> m_smallAabbsGPU;
|
||||
b3AlignedObjectArray<b3SapAabb> m_smallAabbsCPU;
|
||||
b3OpenCLArray<b3SapAabb> m_largeAabbsGPU;
|
||||
b3AlignedObjectArray<b3SapAabb> m_largeAabbsCPU;
|
||||
b3OpenCLArray<int> m_smallAabbsMappingGPU;
|
||||
b3AlignedObjectArray<int> m_smallAabbsMappingCPU;
|
||||
|
||||
b3OpenCLArray<int> m_largeAabbsMappingGPU;
|
||||
b3AlignedObjectArray<int> m_largeAabbsMappingCPU;
|
||||
|
||||
b3AlignedObjectArray<b3Int4> m_hostPairs;
|
||||
b3OpenCLArray<b3Int4> m_gpuPairs;
|
||||
|
@ -1149,7 +1149,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
b3BufferInfoCL( m_allAabbsGPU.getBufferCL() ),
|
||||
b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ),
|
||||
b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ),
|
||||
b3BufferInfoCL( m_overlappingPairs.getBufferCL() ),
|
||||
b3BufferInfoCL(m_pairCount.getBufferCL())};
|
||||
b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel");
|
||||
|
@ -22,15 +22,15 @@ int4 getGridPos(float4 worldPos, __global float4* pParams)
|
||||
|
||||
|
||||
// calculate grid hash value for each body using its AABB
|
||||
__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams )
|
||||
__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
if(index >= numObjects)
|
||||
{
|
||||
return;
|
||||
}
|
||||
float4 bbMin = pAABB[index*2];
|
||||
float4 bbMax = pAABB[index*2 + 1];
|
||||
float4 bbMin = allpAABB[smallAabbMapping[index]*2];
|
||||
float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];
|
||||
float4 pos;
|
||||
pos.x = (bbMin.x + bbMax.x) * 0.5f;
|
||||
pos.y = (bbMin.y + bbMax.y) * 0.5f;
|
||||
@ -102,7 +102,8 @@ void findPairsInCell( int numObjects,
|
||||
int index,
|
||||
__global int2* pHash,
|
||||
__global int* pCellStart,
|
||||
__global float4* pAABB,
|
||||
__global float4* allpAABB,
|
||||
__global const int* smallAabbMapping,
|
||||
__global float4* pParams,
|
||||
volatile __global int* pairCount,
|
||||
__global int4* pPairBuff2,
|
||||
@ -121,8 +122,8 @@ void findPairsInCell( int numObjects,
|
||||
// iterate over bodies in this cell
|
||||
int2 sortedData = pHash[index];
|
||||
int unsorted_indx = sortedData.y;
|
||||
float4 min0 = pAABB[unsorted_indx*2 + 0];
|
||||
float4 max0 = pAABB[unsorted_indx*2 + 1];
|
||||
float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];
|
||||
float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];
|
||||
int handleIndex = as_int(min0.w);
|
||||
|
||||
int bucketEnd = bucketStart + maxBodiesPerCell;
|
||||
@ -138,8 +139,8 @@ void findPairsInCell( int numObjects,
|
||||
//if (unsorted_indx2 < unsorted_indx) // check not colliding with self
|
||||
if (unsorted_indx2 != unsorted_indx) // check not colliding with self
|
||||
{
|
||||
float4 min1 = pAABB[unsorted_indx2*2 + 0];
|
||||
float4 max1 = pAABB[unsorted_indx2*2 + 1];
|
||||
float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];
|
||||
float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];
|
||||
if(testAABBOverlap(min0, max0, min1, max1))
|
||||
{
|
||||
if (pairCount)
|
||||
@ -166,7 +167,8 @@ void findPairsInCell( int numObjects,
|
||||
}
|
||||
|
||||
__kernel void kFindOverlappingPairs( int numObjects,
|
||||
__global float4* pAABB,
|
||||
__global float4* allpAABB,
|
||||
__global const int* smallAabbMapping,
|
||||
__global int2* pHash,
|
||||
__global int* pCellStart,
|
||||
__global float4* pParams ,
|
||||
@ -183,8 +185,8 @@ __kernel void kFindOverlappingPairs( int numObjects,
|
||||
}
|
||||
int2 sortedData = pHash[index];
|
||||
int unsorted_indx = sortedData.y;
|
||||
float4 bbMin = pAABB[unsorted_indx*2 + 0];
|
||||
float4 bbMax = pAABB[unsorted_indx*2 + 1];
|
||||
float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];
|
||||
float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];
|
||||
float4 pos;
|
||||
pos.x = (bbMin.x + bbMax.x) * 0.5f;
|
||||
pos.y = (bbMin.y + bbMax.y) * 0.5f;
|
||||
@ -202,7 +204,7 @@ __kernel void kFindOverlappingPairs( int numObjects,
|
||||
for(int x=-1; x<=1; x++)
|
||||
{
|
||||
gridPosB.x = gridPosA.x + x;
|
||||
findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pParams, pairCount,pPairBuff2, maxPairs);
|
||||
findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -19,15 +19,15 @@ static const char* gridBroadphaseCL= \
|
||||
" return gridPos;\n"
|
||||
"}\n"
|
||||
"// calculate grid hash value for each body using its AABB\n"
|
||||
"__kernel void kCalcHashAABB(int numObjects, __global float4* pAABB, __global int2* pHash, __global float4* pParams )\n"
|
||||
"__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )\n"
|
||||
"{\n"
|
||||
" int index = get_global_id(0);\n"
|
||||
" if(index >= numObjects)\n"
|
||||
" {\n"
|
||||
" return;\n"
|
||||
" }\n"
|
||||
" float4 bbMin = pAABB[index*2];\n"
|
||||
" float4 bbMax = pAABB[index*2 + 1];\n"
|
||||
" float4 bbMin = allpAABB[smallAabbMapping[index]*2];\n"
|
||||
" float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];\n"
|
||||
" float4 pos;\n"
|
||||
" pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
|
||||
" pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
|
||||
@ -91,7 +91,8 @@ static const char* gridBroadphaseCL= \
|
||||
" int index,\n"
|
||||
" __global int2* pHash,\n"
|
||||
" __global int* pCellStart,\n"
|
||||
" __global float4* pAABB, \n"
|
||||
" __global float4* allpAABB, \n"
|
||||
" __global const int* smallAabbMapping,\n"
|
||||
" __global float4* pParams,\n"
|
||||
" volatile __global int* pairCount,\n"
|
||||
" __global int4* pPairBuff2,\n"
|
||||
@ -110,8 +111,8 @@ static const char* gridBroadphaseCL= \
|
||||
" // iterate over bodies in this cell\n"
|
||||
" int2 sortedData = pHash[index];\n"
|
||||
" int unsorted_indx = sortedData.y;\n"
|
||||
" float4 min0 = pAABB[unsorted_indx*2 + 0]; \n"
|
||||
" float4 max0 = pAABB[unsorted_indx*2 + 1];\n"
|
||||
" float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; \n"
|
||||
" float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n"
|
||||
" int handleIndex = as_int(min0.w);\n"
|
||||
" \n"
|
||||
" int bucketEnd = bucketStart + maxBodiesPerCell;\n"
|
||||
@ -127,8 +128,8 @@ static const char* gridBroadphaseCL= \
|
||||
" //if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n"
|
||||
" if (unsorted_indx2 != unsorted_indx) // check not colliding with self\n"
|
||||
" { \n"
|
||||
" float4 min1 = pAABB[unsorted_indx2*2 + 0];\n"
|
||||
" float4 max1 = pAABB[unsorted_indx2*2 + 1];\n"
|
||||
" float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];\n"
|
||||
" float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];\n"
|
||||
" if(testAABBOverlap(min0, max0, min1, max1))\n"
|
||||
" {\n"
|
||||
" if (pairCount)\n"
|
||||
@ -154,7 +155,8 @@ static const char* gridBroadphaseCL= \
|
||||
" }\n"
|
||||
"}\n"
|
||||
"__kernel void kFindOverlappingPairs( int numObjects,\n"
|
||||
" __global float4* pAABB, \n"
|
||||
" __global float4* allpAABB, \n"
|
||||
" __global const int* smallAabbMapping,\n"
|
||||
" __global int2* pHash, \n"
|
||||
" __global int* pCellStart, \n"
|
||||
" __global float4* pParams ,\n"
|
||||
@ -170,8 +172,8 @@ static const char* gridBroadphaseCL= \
|
||||
" }\n"
|
||||
" int2 sortedData = pHash[index];\n"
|
||||
" int unsorted_indx = sortedData.y;\n"
|
||||
" float4 bbMin = pAABB[unsorted_indx*2 + 0];\n"
|
||||
" float4 bbMax = pAABB[unsorted_indx*2 + 1];\n"
|
||||
" float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];\n"
|
||||
" float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n"
|
||||
" float4 pos;\n"
|
||||
" pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
|
||||
" pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
|
||||
@ -189,7 +191,7 @@ static const char* gridBroadphaseCL= \
|
||||
" for(int x=-1; x<=1; x++) \n"
|
||||
" {\n"
|
||||
" gridPosB.x = gridPosA.x + x;\n"
|
||||
" findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, pAABB, pParams, pairCount,pPairBuff2, maxPairs);\n"
|
||||
" findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
|
@ -63,25 +63,26 @@ bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL*
|
||||
}
|
||||
|
||||
|
||||
__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)
|
||||
__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
if (i>=numUnsortedAabbs)
|
||||
return;
|
||||
|
||||
int j = get_global_id(1);
|
||||
if (j>=numSortedAabbs)
|
||||
if (j>=numUnSortedAabbs2)
|
||||
return;
|
||||
|
||||
|
||||
__global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];
|
||||
__global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];
|
||||
|
||||
if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,&sortedAabbs[j]))
|
||||
if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))
|
||||
{
|
||||
int4 myPair;
|
||||
|
||||
int xIndex = unsortedAabbPtr[0].m_minIndices[3];
|
||||
int yIndex = sortedAabbs[j].m_minIndices[3];
|
||||
int yIndex = unsortedAabbPtr2[0].m_minIndices[3];
|
||||
if (xIndex>yIndex)
|
||||
{
|
||||
int tmp = xIndex;
|
||||
@ -349,7 +350,7 @@ __kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btA
|
||||
}
|
||||
|
||||
|
||||
__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global int2* sortData, int numObjects, int axis)
|
||||
__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
if (i>=numObjects)
|
||||
|
@ -56,21 +56,22 @@ static const char* sapCL= \
|
||||
" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
|
||||
" return overlap;\n"
|
||||
"}\n"
|
||||
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
|
||||
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numUnsortedAabbs)\n"
|
||||
" return;\n"
|
||||
" int j = get_global_id(1);\n"
|
||||
" if (j>=numSortedAabbs)\n"
|
||||
" if (j>=numUnSortedAabbs2)\n"
|
||||
" return;\n"
|
||||
" __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n"
|
||||
" if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,&sortedAabbs[j]))\n"
|
||||
" __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n"
|
||||
" if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n"
|
||||
" {\n"
|
||||
" int4 myPair;\n"
|
||||
" \n"
|
||||
" int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n"
|
||||
" int yIndex = sortedAabbs[j].m_minIndices[3];\n"
|
||||
" int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n"
|
||||
" if (xIndex>yIndex)\n"
|
||||
" {\n"
|
||||
" int tmp = xIndex;\n"
|
||||
@ -306,7 +307,7 @@ static const char* sapCL= \
|
||||
" destAabbs[i] = allAabbs[src];\n"
|
||||
" destAabbs[i].m_maxIndices[3] = src;\n"
|
||||
"}\n"
|
||||
"__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global int2* sortData, int numObjects, int axis)\n"
|
||||
"__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if (i>=numObjects)\n"
|
||||
|
@ -3171,6 +3171,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
|
||||
if (1)
|
||||
{
|
||||
|
||||
if (1)
|
||||
{
|
||||
{
|
||||
B3_PROFILE("findSeparatingAxisVertexFaceKernel");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
@ -3228,7 +3230,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
|
||||
clFinish(m_queue);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
if (1)
|
||||
{
|
||||
B3_PROFILE("findSeparatingAxisUnitSphereKernel");
|
||||
b3BufferInfoCL bInfo[] = {
|
||||
@ -4466,8 +4469,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
|
||||
|
||||
|
||||
//convex-convex contact clipping
|
||||
B3_PROFILE("clipHullHullKernel");
|
||||
bool breakupKernel = true;
|
||||
|
||||
bool breakupKernel = false;
|
||||
|
||||
#ifdef __APPLE__
|
||||
breakupKernel = true;
|
||||
@ -4480,6 +4483,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
|
||||
#endif//CHECK_ON_HOST
|
||||
if (computeConvexConvex)
|
||||
{
|
||||
B3_PROFILE("clipHullHullKernel");
|
||||
if (breakupKernel)
|
||||
{
|
||||
|
||||
|
@ -958,8 +958,15 @@ __kernel void clipHullHullKernel( __global int4* pairs,
|
||||
|
||||
int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);
|
||||
|
||||
int dstIdx;
|
||||
|
||||
int mprContactIndex = pairs[pairIndex].z;
|
||||
|
||||
int dstIdx = mprContactIndex;
|
||||
if (dstIdx<0)
|
||||
{
|
||||
AppendInc( nGlobalContactsOut, dstIdx );
|
||||
}
|
||||
|
||||
if (dstIdx<contactCapacity)
|
||||
{
|
||||
pairs[pairIndex].z = dstIdx;
|
||||
@ -976,9 +983,13 @@ __kernel void clipHullHullKernel( __global int4* pairs,
|
||||
c->m_childIndexB = -1;
|
||||
|
||||
for (int i=0;i<nReducedContacts;i++)
|
||||
{
|
||||
//this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact
|
||||
if (i>0||(mprContactIndex<0))
|
||||
{
|
||||
c->m_worldPosB[i] = pointsIn[contactIdx[i]];
|
||||
}
|
||||
}
|
||||
GET_NPOINTS(*c) = nReducedContacts;
|
||||
}
|
||||
|
||||
|
@ -1245,8 +1245,13 @@ static const char* satClipKernelsCL= \
|
||||
" \n"
|
||||
" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n"
|
||||
" \n"
|
||||
" int dstIdx;\n"
|
||||
" \n"
|
||||
" int mprContactIndex = pairs[pairIndex].z;\n"
|
||||
" int dstIdx = mprContactIndex;\n"
|
||||
" if (dstIdx<0)\n"
|
||||
" {\n"
|
||||
" AppendInc( nGlobalContactsOut, dstIdx );\n"
|
||||
" }\n"
|
||||
" if (dstIdx<contactCapacity)\n"
|
||||
" {\n"
|
||||
" pairs[pairIndex].z = dstIdx;\n"
|
||||
@ -1262,8 +1267,12 @@ static const char* satClipKernelsCL= \
|
||||
" c->m_childIndexB = -1;\n"
|
||||
" for (int i=0;i<nReducedContacts;i++)\n"
|
||||
" {\n"
|
||||
" //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact\n"
|
||||
" if (i>0||(mprContactIndex<0))\n"
|
||||
" {\n"
|
||||
" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" GET_NPOINTS(*c) = nReducedContacts;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
|
Loading…
Reference in New Issue
Block a user