mirror of
https://github.com/bulletphysics/bullet3
synced 2025-01-18 21:10:05 +00:00
add GPU incremental 3d sap (not enabled by default)
This commit is contained in:
parent
01fbe80f8f
commit
5991eef749
@ -1,4 +1,6 @@
|
||||
|
||||
bool searchIncremental3dSapOnGpu = true;
|
||||
|
||||
#include "b3GpuSapBroadphase.h"
|
||||
#include "Bullet3Common/b3Vector3.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
|
||||
@ -27,7 +29,23 @@ m_gpuSmallSortedAabbs(ctx,q),
|
||||
m_sum(ctx,q),
|
||||
m_sum2(ctx,q),
|
||||
m_dst(ctx,q),
|
||||
m_currentBuffer(-1)
|
||||
m_currentBuffer(-1),
|
||||
m_objectMinMaxIndexGPUaxis0(ctx,q),
|
||||
m_objectMinMaxIndexGPUaxis1(ctx,q),
|
||||
m_objectMinMaxIndexGPUaxis2(ctx,q),
|
||||
m_objectMinMaxIndexGPUaxis0prev(ctx,q),
|
||||
m_objectMinMaxIndexGPUaxis1prev(ctx,q),
|
||||
m_objectMinMaxIndexGPUaxis2prev(ctx,q),
|
||||
m_sortedAxisGPU0(ctx,q),
|
||||
m_sortedAxisGPU1(ctx,q),
|
||||
m_sortedAxisGPU2(ctx,q),
|
||||
m_sortedAxisGPU0prev(ctx,q),
|
||||
m_sortedAxisGPU1prev(ctx,q),
|
||||
m_sortedAxisGPU2prev(ctx,q),
|
||||
m_addedHostPairsGPU(ctx,q),
|
||||
m_removedHostPairsGPU(ctx,q),
|
||||
m_addedCountGPU(ctx,q),
|
||||
m_removedCountGPU(ctx,q)
|
||||
{
|
||||
const char* sapSrc = sapCL;
|
||||
const char* sapFastSrc = sapFastCL;
|
||||
@ -36,7 +54,8 @@ m_currentBuffer(-1)
|
||||
|
||||
cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"",B3_BROADPHASE_SAP_PATH);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
cl_program sapFastProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"",B3_BROADPHASE_SAPFAST_PATH);
|
||||
//cl_program sapFastProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"",B3_BROADPHASE_SAPFAST_PATH);
|
||||
cl_program sapFastProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,0,&errNum,"",B3_BROADPHASE_SAPFAST_PATH,true);
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
#ifndef __APPLE__
|
||||
m_prefixScanFloat4 = new b3PrefixScanFloat4CL(m_context,m_device,m_queue);
|
||||
@ -54,6 +73,9 @@ m_currentBuffer(-1)
|
||||
m_prepareSumVarianceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "prepareSumVarianceKernel",&errNum,sapProg );
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
m_computePairsIncremental3dSapKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsIncremental3dSapKernel",&errNum,sapFastProg );
|
||||
b3Assert(errNum==CL_SUCCESS);
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
@ -89,6 +111,7 @@ b3GpuSapBroadphase::~b3GpuSapBroadphase()
|
||||
clReleaseKernel(m_sapKernel);
|
||||
clReleaseKernel(m_sap2Kernel);
|
||||
clReleaseKernel(m_prepareSumVarianceKernel);
|
||||
clReleaseKernel(m_computePairsIncremental3dSapKernel);
|
||||
|
||||
}
|
||||
|
||||
@ -125,48 +148,693 @@ void b3GpuSapBroadphase::init3dSap()
|
||||
for (int buf=0;buf<2;buf++)
|
||||
{
|
||||
int totalNumAabbs = m_allAabbsCPU.size();
|
||||
m_sortedAxisCPU[axis][buf].resize(totalNumAabbs);
|
||||
int numEndPoints = 2*totalNumAabbs;
|
||||
m_sortedAxisCPU[axis][buf].resize(numEndPoints);
|
||||
|
||||
if (buf==m_currentBuffer)
|
||||
{
|
||||
for (int i=0;i<totalNumAabbs;i++)
|
||||
{
|
||||
m_sortedAxisCPU[axis][buf][i].m_key = FloatFlip(m_allAabbsCPU[i].m_minIndices[axis]);
|
||||
m_sortedAxisCPU[axis][buf][i].m_value = i;
|
||||
m_sortedAxisCPU[axis][buf][i*2].m_key = FloatFlip(m_allAabbsCPU[i].m_min[axis])-1;
|
||||
m_sortedAxisCPU[axis][buf][i*2].m_value = i*2;
|
||||
m_sortedAxisCPU[axis][buf][i*2+1].m_key = FloatFlip(m_allAabbsCPU[i].m_max[axis])+1;
|
||||
m_sortedAxisCPU[axis][buf][i*2+1].m_value = i*2+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]);
|
||||
}
|
||||
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
int totalNumAabbs = m_allAabbsCPU.size();
|
||||
int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size();
|
||||
m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(numEndPoints);
|
||||
for (int i=0;i<numEndPoints;i++)
|
||||
{
|
||||
int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value;
|
||||
int newDest = destIndex/2;
|
||||
if (destIndex&1)
|
||||
{
|
||||
m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y=i;
|
||||
} else
|
||||
{
|
||||
m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x=i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool b3PairCmp(const b3Int2& p, const b3Int2& q)
|
||||
{
|
||||
return ((p.x<q.x) || ((p.x==q.x) && (p.y<q.y)));
|
||||
}
|
||||
|
||||
|
||||
static bool operator==(const b3Int2& a,const b3Int2& b)
|
||||
{
|
||||
return a.x == b.x && a.y == b.y;
|
||||
};
|
||||
|
||||
static bool operator<(const b3Int2& a,const b3Int2& b)
|
||||
{
|
||||
return a.x < b.x || (a.x == b.x && a.y < b.y);
|
||||
};
|
||||
|
||||
static bool operator>(const b3Int2& a,const b3Int2& b)
|
||||
{
|
||||
return a.x > b.x || (a.x == b.x && a.y > b.y);
|
||||
};
|
||||
|
||||
b3AlignedObjectArray<b3Int2> addedHostPairs;
|
||||
b3AlignedObjectArray<b3Int2> removedHostPairs;
|
||||
|
||||
b3AlignedObjectArray<b3SapAabb> preAabbs;
|
||||
|
||||
void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
{
|
||||
b3Assert(m_currentBuffer>=0);
|
||||
if (m_currentBuffer<0)
|
||||
return;
|
||||
static int framepje = 0;
|
||||
//printf("framepje=%d\n",framepje++);
|
||||
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
|
||||
for (int axis=0;axis<3;axis++)
|
||||
B3_PROFILE("calculateOverlappingPairsHostIncremental3Sap");
|
||||
|
||||
addedHostPairs.resize(0);
|
||||
removedHostPairs.resize(0);
|
||||
|
||||
b3Assert(m_currentBuffer>=0);
|
||||
|
||||
{
|
||||
for (int buf=0;buf<2;buf++)
|
||||
preAabbs.resize(m_allAabbsCPU.size());
|
||||
for (int i=0;i<preAabbs.size();i++)
|
||||
{
|
||||
b3Assert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size());
|
||||
preAabbs[i]=m_allAabbsCPU[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (m_currentBuffer<0)
|
||||
return;
|
||||
{
|
||||
B3_PROFILE("m_allAabbsGPU.copyToHost");
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
}
|
||||
|
||||
b3AlignedObjectArray<b3Int2> allPairs;
|
||||
{
|
||||
B3_PROFILE("m_overlappingPairs.copyToHost");
|
||||
m_overlappingPairs.copyToHost(allPairs);
|
||||
}
|
||||
if (0)
|
||||
{
|
||||
{
|
||||
printf("ab[40].min=%f,%f,%f,ab[40].max=%f,%f,%f\n",
|
||||
m_allAabbsCPU[40].m_min[0], m_allAabbsCPU[40].m_min[1],m_allAabbsCPU[40].m_min[2],
|
||||
m_allAabbsCPU[40].m_max[0], m_allAabbsCPU[40].m_max[1],m_allAabbsCPU[40].m_max[2]);
|
||||
}
|
||||
|
||||
{
|
||||
printf("ab[53].min=%f,%f,%f,ab[53].max=%f,%f,%f\n",
|
||||
m_allAabbsCPU[53].m_min[0], m_allAabbsCPU[53].m_min[1],m_allAabbsCPU[53].m_min[2],
|
||||
m_allAabbsCPU[53].m_max[0], m_allAabbsCPU[53].m_max[1],m_allAabbsCPU[53].m_max[2]);
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
b3Int2 newPair;
|
||||
newPair.x = 40;
|
||||
newPair.y = 53;
|
||||
int index = allPairs.findBinarySearch(newPair);
|
||||
printf("hasPair(40,53)=%d out of %d\n",index, allPairs.size());
|
||||
|
||||
{
|
||||
int overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[40].m_min, (const b3Vector3&)m_allAabbsCPU[40].m_max,(const b3Vector3&)m_allAabbsCPU[53].m_min,(const b3Vector3&)m_allAabbsCPU[53].m_max);
|
||||
printf("overlap=%d\n",overlap);
|
||||
}
|
||||
|
||||
if (preAabbs.size())
|
||||
{
|
||||
int prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[40].m_min, (const b3Vector3&)preAabbs[40].m_max,(const b3Vector3&)preAabbs[53].m_min,(const b3Vector3&)preAabbs[53].m_max);
|
||||
printf("prevoverlap=%d\n",prevOverlap);
|
||||
} else
|
||||
{
|
||||
printf("unknown prevoverlap\n");
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (0)
|
||||
{
|
||||
for (int i=0;i<m_allAabbsCPU.size();i++)
|
||||
{
|
||||
//printf("aabb[%d] min=%f,%f,%f max=%f,%f,%f\n",i,m_allAabbsCPU[i].m_min[0],m_allAabbsCPU[i].m_min[1],m_allAabbsCPU[i].m_min[2], m_allAabbsCPU[i].m_max[0],m_allAabbsCPU[i].m_max[1],m_allAabbsCPU[i].m_max[2]);
|
||||
|
||||
|
||||
}
|
||||
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
for (int buf=0;buf<2;buf++)
|
||||
{
|
||||
b3Assert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size()*2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
m_currentBuffer = 1-m_currentBuffer;
|
||||
|
||||
for (int axis=0;axis<3;axis++)
|
||||
|
||||
|
||||
int totalNumAabbs = m_allAabbsCPU.size();
|
||||
|
||||
{
|
||||
int totalNumAabbs = m_allAabbsCPU.size();
|
||||
B3_PROFILE("assign m_sortedAxisCPU(FloatFlip)");
|
||||
for (int i=0;i<totalNumAabbs;i++)
|
||||
{
|
||||
m_sortedAxisCPU[axis][m_currentBuffer][i].m_key = FloatFlip(m_allAabbsCPU[i].m_minIndices[axis]);
|
||||
m_sortedAxisCPU[axis][m_currentBuffer][i].m_value = i;
|
||||
|
||||
|
||||
unsigned int keyMin[3];
|
||||
unsigned int keyMax[3];
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
float vmin=m_allAabbsCPU[i].m_min[axis];
|
||||
float vmax = m_allAabbsCPU[i].m_max[axis];
|
||||
keyMin[axis] = FloatFlip(vmin);
|
||||
keyMax[axis] = FloatFlip(vmax);
|
||||
|
||||
m_sortedAxisCPU[axis][m_currentBuffer][i*2].m_key = keyMin[axis]-1;
|
||||
m_sortedAxisCPU[axis][m_currentBuffer][i*2].m_value = i*2;
|
||||
m_sortedAxisCPU[axis][m_currentBuffer][i*2+1].m_key = keyMax[axis]+1;
|
||||
m_sortedAxisCPU[axis][m_currentBuffer][i*2+1].m_value = i*2+1;
|
||||
}
|
||||
//printf("aabb[%d] min=%u,%u,%u max %u,%u,%u\n", i,keyMin[0],keyMin[1],keyMin[2],keyMax[0],keyMax[1],keyMax[2]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
{
|
||||
B3_PROFILE("sort m_sortedAxisCPU");
|
||||
for (int axis=0;axis<3;axis++)
|
||||
m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]);
|
||||
}
|
||||
|
||||
if (0)
|
||||
{
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
//printf("axis %d\n",axis);
|
||||
for (int i=0;i<m_sortedAxisCPU[axis][m_currentBuffer].size();i++)
|
||||
{
|
||||
int key = m_sortedAxisCPU[axis][m_currentBuffer][i].m_key;
|
||||
int value = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value;
|
||||
//printf("[%d]=%d\n",i,value);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
B3_PROFILE("assign m_objectMinMaxIndexCPU");
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
int totalNumAabbs = m_allAabbsCPU.size();
|
||||
int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size();
|
||||
m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(totalNumAabbs);
|
||||
for (int i=0;i<numEndPoints;i++)
|
||||
{
|
||||
int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value;
|
||||
int newDest = destIndex/2;
|
||||
if (destIndex&1)
|
||||
{
|
||||
m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y=i;
|
||||
} else
|
||||
{
|
||||
m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x=i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (0)
|
||||
{
|
||||
printf("==========================\n");
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
unsigned int curMinIndex40 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][40].x;
|
||||
unsigned int curMaxIndex40 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][40].y;
|
||||
unsigned int prevMaxIndex40 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][40].y;
|
||||
unsigned int prevMinIndex40 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][40].x;
|
||||
|
||||
int dmin40 = curMinIndex40 - prevMinIndex40;
|
||||
int dmax40 = curMinIndex40 - prevMinIndex40;
|
||||
printf("axis %d curMinIndex40=%d prevMinIndex40=%d\n",axis,curMinIndex40, prevMinIndex40);
|
||||
printf("axis %d curMaxIndex40=%d prevMaxIndex40=%d\n",axis,curMaxIndex40, prevMaxIndex40);
|
||||
}
|
||||
printf(".........................\n");
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
unsigned int curMinIndex53 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][53].x;
|
||||
unsigned int curMaxIndex53 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][53].y;
|
||||
unsigned int prevMaxIndex53 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][53].y;
|
||||
unsigned int prevMinIndex53 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][53].x;
|
||||
|
||||
int dmin40 = curMinIndex53 - prevMinIndex53;
|
||||
int dmax40 = curMinIndex53 - prevMinIndex53;
|
||||
printf("axis %d curMinIndex53=%d prevMinIndex53=%d\n",axis,curMinIndex53, prevMinIndex53);
|
||||
printf("axis %d curMaxIndex53=%d prevMaxIndex53=%d\n",axis,curMaxIndex53, prevMaxIndex53);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
int a = m_objectMinMaxIndexCPU[0][m_currentBuffer].size();
|
||||
int b = m_objectMinMaxIndexCPU[1][m_currentBuffer].size();
|
||||
int c = m_objectMinMaxIndexCPU[2][m_currentBuffer].size();
|
||||
b3Assert(a==b);
|
||||
b3Assert(b==c);
|
||||
|
||||
if (searchIncremental3dSapOnGpu)
|
||||
{
|
||||
B3_PROFILE("computePairsIncremental3dSapKernelGPU");
|
||||
int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size();
|
||||
int maxCapacity = 1024*1024;
|
||||
{
|
||||
B3_PROFILE("copy from host");
|
||||
m_objectMinMaxIndexGPUaxis0.copyFromHost(m_objectMinMaxIndexCPU[0][m_currentBuffer]);
|
||||
m_objectMinMaxIndexGPUaxis1.copyFromHost(m_objectMinMaxIndexCPU[1][m_currentBuffer]);
|
||||
m_objectMinMaxIndexGPUaxis2.copyFromHost(m_objectMinMaxIndexCPU[2][m_currentBuffer]);
|
||||
m_objectMinMaxIndexGPUaxis0prev.copyFromHost(m_objectMinMaxIndexCPU[0][1-m_currentBuffer]);
|
||||
m_objectMinMaxIndexGPUaxis1prev.copyFromHost(m_objectMinMaxIndexCPU[1][1-m_currentBuffer]);
|
||||
m_objectMinMaxIndexGPUaxis2prev.copyFromHost(m_objectMinMaxIndexCPU[2][1-m_currentBuffer]);
|
||||
|
||||
m_sortedAxisGPU0.copyFromHost(m_sortedAxisCPU[0][m_currentBuffer]);
|
||||
m_sortedAxisGPU1.copyFromHost(m_sortedAxisCPU[1][m_currentBuffer]);
|
||||
m_sortedAxisGPU2.copyFromHost(m_sortedAxisCPU[2][m_currentBuffer]);
|
||||
m_sortedAxisGPU0prev.copyFromHost(m_sortedAxisCPU[0][1-m_currentBuffer]);
|
||||
m_sortedAxisGPU1prev.copyFromHost(m_sortedAxisCPU[1][1-m_currentBuffer]);
|
||||
m_sortedAxisGPU2prev.copyFromHost(m_sortedAxisCPU[2][1-m_currentBuffer]);
|
||||
|
||||
|
||||
m_addedHostPairsGPU.resize(maxCapacity);
|
||||
m_removedHostPairsGPU.resize(maxCapacity);
|
||||
|
||||
m_addedCountGPU.resize(0);
|
||||
m_addedCountGPU.push_back(0);
|
||||
m_removedCountGPU.resize(0);
|
||||
m_removedCountGPU.push_back(0);
|
||||
}
|
||||
|
||||
{
|
||||
B3_PROFILE("launch1D");
|
||||
b3LauncherCL launcher(m_queue, m_computePairsIncremental3dSapKernel);
|
||||
launcher.setBuffer(m_objectMinMaxIndexGPUaxis0.getBufferCL());
|
||||
launcher.setBuffer(m_objectMinMaxIndexGPUaxis1.getBufferCL());
|
||||
launcher.setBuffer(m_objectMinMaxIndexGPUaxis2.getBufferCL());
|
||||
launcher.setBuffer(m_objectMinMaxIndexGPUaxis0prev.getBufferCL());
|
||||
launcher.setBuffer(m_objectMinMaxIndexGPUaxis1prev.getBufferCL());
|
||||
launcher.setBuffer(m_objectMinMaxIndexGPUaxis2prev.getBufferCL());
|
||||
|
||||
launcher.setBuffer(m_sortedAxisGPU0.getBufferCL());
|
||||
launcher.setBuffer(m_sortedAxisGPU1.getBufferCL());
|
||||
launcher.setBuffer(m_sortedAxisGPU2.getBufferCL());
|
||||
launcher.setBuffer(m_sortedAxisGPU0prev.getBufferCL());
|
||||
launcher.setBuffer(m_sortedAxisGPU1prev.getBufferCL());
|
||||
launcher.setBuffer(m_sortedAxisGPU2prev.getBufferCL());
|
||||
|
||||
|
||||
launcher.setBuffer(m_addedHostPairsGPU.getBufferCL());
|
||||
launcher.setBuffer(m_removedHostPairsGPU.getBufferCL());
|
||||
launcher.setBuffer(m_addedCountGPU.getBufferCL());
|
||||
launcher.setBuffer(m_removedCountGPU.getBufferCL());
|
||||
launcher.setConst(maxCapacity);
|
||||
launcher.setConst( numObjects);
|
||||
launcher.launch1D( numObjects);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
{
|
||||
B3_PROFILE("copy to host");
|
||||
int addedCountGPU = m_addedCountGPU.at(0);
|
||||
m_addedHostPairsGPU.resize(addedCountGPU);
|
||||
m_addedHostPairsGPU.copyToHost(addedHostPairs);
|
||||
|
||||
//printf("addedCountGPU=%d\n",addedCountGPU);
|
||||
int removedCountGPU = m_removedCountGPU.at(0);
|
||||
m_removedHostPairsGPU.resize(removedCountGPU);
|
||||
m_removedHostPairsGPU.copyToHost(removedHostPairs);
|
||||
//printf("removedCountGPU=%d\n",removedCountGPU);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size();
|
||||
|
||||
B3_PROFILE("actual search");
|
||||
for (int i=0;i<numObjects;i++)
|
||||
{
|
||||
//int numObjects = m_objectMinMaxIndexCPU[axis][m_currentBuffer].size();
|
||||
//int checkObjects[]={40,53};
|
||||
//int numCheckObjects = sizeof(checkObjects)/sizeof(int);
|
||||
|
||||
//for (int a=0;a<numCheckObjects ;a++)
|
||||
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
//int i = checkObjects[a];
|
||||
|
||||
unsigned int curMinIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].x;
|
||||
unsigned int curMaxIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].y;
|
||||
unsigned int prevMinIndex = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][i].x;
|
||||
int dmin = curMinIndex - prevMinIndex;
|
||||
|
||||
unsigned int prevMaxIndex = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][i].y;
|
||||
|
||||
|
||||
|
||||
int dmax = curMaxIndex - prevMaxIndex;
|
||||
if (dmin!=0)
|
||||
{
|
||||
//printf("for object %d, dmin=%d\n",i,dmin);
|
||||
}
|
||||
if (dmax!=0)
|
||||
{
|
||||
//printf("for object %d, dmax=%d\n",i,dmax);
|
||||
}
|
||||
for (int otherbuffer = 0;otherbuffer<2;otherbuffer++)
|
||||
{
|
||||
if (dmin!=0)
|
||||
{
|
||||
int stepMin = dmin<0 ? -1 : 1;
|
||||
for (int j=prevMinIndex;j!=curMinIndex;j+=stepMin)
|
||||
{
|
||||
int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y;
|
||||
int otherIndex = otherIndex2/2;
|
||||
if (otherIndex!=i)
|
||||
{
|
||||
bool otherIsMax = ((otherIndex2&1)!=0);
|
||||
|
||||
if (otherIsMax)
|
||||
{
|
||||
//bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max);
|
||||
//bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max);
|
||||
|
||||
bool overlap = true;
|
||||
|
||||
for (int ax=0;ax<3;ax++)
|
||||
{
|
||||
if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) ||
|
||||
(m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x))
|
||||
overlap=false;
|
||||
}
|
||||
|
||||
// b3Assert(overlap2==overlap);
|
||||
|
||||
bool prevOverlap = true;
|
||||
|
||||
for (int ax=0;ax<3;ax++)
|
||||
{
|
||||
if ((m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].y) ||
|
||||
(m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].x))
|
||||
prevOverlap=false;
|
||||
}
|
||||
|
||||
|
||||
//b3Assert(overlap==overlap2);
|
||||
|
||||
|
||||
|
||||
if (dmin<0)
|
||||
{
|
||||
if (overlap && !prevOverlap)
|
||||
{
|
||||
//add a pair
|
||||
b3Int2 newPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
newPair.x = i;
|
||||
newPair.y = otherIndex;
|
||||
} else
|
||||
{
|
||||
newPair.x = otherIndex;
|
||||
newPair.y = i;
|
||||
}
|
||||
addedHostPairs.push_back(newPair);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!overlap && prevOverlap)
|
||||
{
|
||||
|
||||
//remove a pair
|
||||
b3Int2 removedPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
removedPair.x = i;
|
||||
removedPair.y = otherIndex;
|
||||
} else
|
||||
{
|
||||
removedPair.x = otherIndex;
|
||||
removedPair.y = i;
|
||||
}
|
||||
removedHostPairs.push_back(removedPair);
|
||||
}
|
||||
}//otherisMax
|
||||
}//if (dmin<0)
|
||||
}//if (otherIndex!=i)
|
||||
}//for (int j=
|
||||
}
|
||||
|
||||
if (dmax!=0)
|
||||
{
|
||||
int stepMax = dmax<0 ? -1 : 1;
|
||||
for (int j=prevMaxIndex;j!=curMaxIndex;j+=stepMax)
|
||||
{
|
||||
int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y;
|
||||
int otherIndex = otherIndex2/2;
|
||||
if (otherIndex!=i)
|
||||
{
|
||||
bool otherIsMin = ((otherIndex2&1)==0);
|
||||
//if (otherIsMin)
|
||||
{
|
||||
//bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max);
|
||||
//bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max);
|
||||
|
||||
bool overlap = true;
|
||||
|
||||
for (int ax=0;ax<3;ax++)
|
||||
{
|
||||
if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) ||
|
||||
(m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x))
|
||||
overlap=false;
|
||||
}
|
||||
//b3Assert(overlap2==overlap);
|
||||
|
||||
bool prevOverlap = true;
|
||||
|
||||
for (int ax=0;ax<3;ax++)
|
||||
{
|
||||
if ((m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].y) ||
|
||||
(m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].x))
|
||||
prevOverlap=false;
|
||||
}
|
||||
|
||||
|
||||
if (dmax>0)
|
||||
{
|
||||
if (overlap && !prevOverlap)
|
||||
{
|
||||
//add a pair
|
||||
b3Int2 newPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
newPair.x = i;
|
||||
newPair.y = otherIndex;
|
||||
} else
|
||||
{
|
||||
newPair.x = otherIndex;
|
||||
newPair.y = i;
|
||||
}
|
||||
addedHostPairs.push_back(newPair);
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!overlap && prevOverlap)
|
||||
{
|
||||
//if (otherIndex2&1==0) -> min?
|
||||
//remove a pair
|
||||
b3Int2 removedPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
removedPair.x = i;
|
||||
removedPair.y = otherIndex;
|
||||
} else
|
||||
{
|
||||
removedPair.x = otherIndex;
|
||||
removedPair.y = i;
|
||||
}
|
||||
removedHostPairs.push_back(removedPair);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}//if (dmin<0)
|
||||
}//if (otherIndex!=i)
|
||||
}//for (int j=
|
||||
}
|
||||
}//for (int otherbuffer
|
||||
}//for (int axis=0;
|
||||
}//for (int i=0;i<numObjects
|
||||
}
|
||||
|
||||
//remove duplicates and add/remove then to existing m_overlappingPairs
|
||||
|
||||
|
||||
|
||||
{
|
||||
{
|
||||
B3_PROFILE("sort allPairs");
|
||||
allPairs.quickSort(b3PairCmp);
|
||||
}
|
||||
{
|
||||
B3_PROFILE("sort addedHostPairs");
|
||||
addedHostPairs.quickSort(b3PairCmp);
|
||||
}
|
||||
{
|
||||
B3_PROFILE("sort removedHostPairs");
|
||||
removedHostPairs.quickSort(b3PairCmp);
|
||||
}
|
||||
}
|
||||
|
||||
b3Int2 prevPair;
|
||||
prevPair.x = -1;
|
||||
prevPair.y = -1;
|
||||
|
||||
int uniqueRemovedPairs = 0;
|
||||
|
||||
b3AlignedObjectArray<int> removedPositions;
|
||||
|
||||
{
|
||||
B3_PROFILE("actual removing");
|
||||
for (int i=0;i<removedHostPairs.size();i++)
|
||||
{
|
||||
b3Int2 removedPair = removedHostPairs[i];
|
||||
if ((removedPair.x != prevPair.x) || (removedPair.y != prevPair.y))
|
||||
{
|
||||
|
||||
int index1 = allPairs.findBinarySearch(removedPair);
|
||||
|
||||
//#ifdef _DEBUG
|
||||
|
||||
|
||||
|
||||
int index2 = allPairs.findLinearSearch(removedPair);
|
||||
b3Assert(index1==index2);
|
||||
|
||||
//b3Assert(index1!=allPairs.size());
|
||||
if (index1<allPairs.size())
|
||||
//#endif//_DEBUG
|
||||
{
|
||||
uniqueRemovedPairs++;
|
||||
removedPositions.push_back(index1);
|
||||
{
|
||||
//printf("framepje(%d) remove pair(%d):%d,%d\n",framepje,i,removedPair.x,removedPair.y);
|
||||
}
|
||||
}
|
||||
}
|
||||
prevPair = removedPair;
|
||||
}
|
||||
|
||||
if (uniqueRemovedPairs)
|
||||
{
|
||||
for (int i=0;i<removedPositions.size();i++)
|
||||
{
|
||||
allPairs[removedPositions[i]].x = INT_MAX ;
|
||||
allPairs[removedPositions[i]].y = INT_MAX ;
|
||||
}
|
||||
allPairs.quickSort(b3PairCmp);
|
||||
allPairs.resize(allPairs.size()-uniqueRemovedPairs);
|
||||
}
|
||||
}
|
||||
//if (uniqueRemovedPairs)
|
||||
// printf("uniqueRemovedPairs=%d\n",uniqueRemovedPairs);
|
||||
//printf("removedHostPairs.size = %d\n",removedHostPairs.size());
|
||||
|
||||
prevPair.x = -1;
|
||||
prevPair.y = -1;
|
||||
|
||||
int uniqueAddedPairs=0;
|
||||
b3AlignedObjectArray<b3Int2> actualAddedPairs;
|
||||
|
||||
{
|
||||
B3_PROFILE("actual adding");
|
||||
for (int i=0;i<addedHostPairs.size();i++)
|
||||
{
|
||||
b3Int2 newPair = addedHostPairs[i];
|
||||
if ((newPair.x != prevPair.x) || (newPair.y != prevPair.y))
|
||||
{
|
||||
//#ifdef _DEBUG
|
||||
int index1 = allPairs.findBinarySearch(newPair);
|
||||
|
||||
|
||||
int index2 = allPairs.findLinearSearch(newPair);
|
||||
b3Assert(index1==index2);
|
||||
|
||||
|
||||
b3Assert(index1==allPairs.size());
|
||||
if (index1!=allPairs.size())
|
||||
{
|
||||
printf("??\n");
|
||||
}
|
||||
|
||||
if (index1==allPairs.size())
|
||||
//#endif //_DEBUG
|
||||
{
|
||||
uniqueAddedPairs++;
|
||||
actualAddedPairs.push_back(newPair);
|
||||
}
|
||||
}
|
||||
prevPair = newPair;
|
||||
}
|
||||
for (int i=0;i<actualAddedPairs.size();i++)
|
||||
{
|
||||
//printf("framepje (%d), new pair(%d):%d,%d\n",framepje,i,actualAddedPairs[i].x,actualAddedPairs[i].y);
|
||||
allPairs.push_back(actualAddedPairs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
//if (uniqueAddedPairs)
|
||||
// printf("uniqueAddedPairs=%d\n", uniqueAddedPairs);
|
||||
|
||||
|
||||
|
||||
//b3AlignedObjectArray<b3Int2> addedHostPairs;
|
||||
//b3AlignedObjectArray<b3Int2> removedHostPairs;
|
||||
{
|
||||
B3_PROFILE("m_overlappingPairs.copyFromHost");
|
||||
m_overlappingPairs.copyFromHost(allPairs);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -177,8 +845,8 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
|
||||
void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
{
|
||||
//test
|
||||
//if (m_currentBuffer>=0)
|
||||
// calculateOverlappingPairsHostIncremental3Sap();
|
||||
if (m_currentBuffer>=0)
|
||||
return calculateOverlappingPairsHostIncremental3Sap();
|
||||
|
||||
b3Assert(m_allAabbsCPU.size() == m_allAabbsGPU.size());
|
||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
||||
@ -203,6 +871,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
|
||||
int axis=0;
|
||||
{
|
||||
B3_PROFILE("CPU compute best variance axis");
|
||||
b3Vector3 s(0,0,0),s2(0,0,0);
|
||||
int numRigidBodies = numSmallAabbs;
|
||||
|
||||
@ -250,8 +919,17 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
(b3Vector3&)m_smallAabbsCPU[j].m_min,(b3Vector3&)m_smallAabbsCPU[j].m_max))
|
||||
{
|
||||
b3Int2 pair;
|
||||
pair.x = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array
|
||||
pair.y = m_smallAabbsCPU[j].m_minIndices[3];
|
||||
int a = m_smallAabbsCPU[i].m_minIndices[3];
|
||||
int b = m_smallAabbsCPU[j].m_minIndices[3];
|
||||
if (a<=b)
|
||||
{
|
||||
pair.x = a;//store the original index in the unsorted aabb array
|
||||
pair.y = b;
|
||||
} else
|
||||
{
|
||||
pair.x = b;//store the original index in the unsorted aabb array
|
||||
pair.y = a;
|
||||
}
|
||||
hostPairs.push_back(pair);
|
||||
}
|
||||
}
|
||||
@ -272,8 +950,18 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
(b3Vector3&)m_largeAabbsCPU[j].m_min,(b3Vector3&)m_largeAabbsCPU[j].m_max))
|
||||
{
|
||||
b3Int2 pair;
|
||||
pair.x = m_largeAabbsCPU[j].m_minIndices[3];
|
||||
pair.y = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array
|
||||
int a = m_largeAabbsCPU[j].m_minIndices[3];
|
||||
int b = m_smallAabbsCPU[i].m_minIndices[3];
|
||||
if (a<=b)
|
||||
{
|
||||
pair.x = a;
|
||||
pair.y = b;//store the original index in the unsorted aabb array
|
||||
} else
|
||||
{
|
||||
pair.x = b;
|
||||
pair.y = a;//store the original index in the unsorted aabb array
|
||||
}
|
||||
|
||||
hostPairs.push_back(pair);
|
||||
}
|
||||
}
|
||||
@ -293,7 +981,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
||||
m_overlappingPairs.resize(0);
|
||||
}
|
||||
|
||||
//init3dSap();
|
||||
init3dSap();
|
||||
|
||||
}
|
||||
|
||||
@ -312,6 +1000,9 @@ void b3GpuSapBroadphase::reset()
|
||||
|
||||
void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
{
|
||||
//if (m_currentBuffer>=0)
|
||||
// return calculateOverlappingPairsHostIncremental3Sap();
|
||||
|
||||
B3_PROFILE("GPU 1-axis SAP calculateOverlappingPairs");
|
||||
|
||||
int axis = 0;
|
||||
@ -363,7 +1054,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
|
||||
if (m_prefixScanFloat4)
|
||||
{
|
||||
B3_PROFILE("compute best variance axis");
|
||||
B3_PROFILE("GPU compute best variance axis");
|
||||
int numSmallAabbs = m_smallAabbsGPU.size();
|
||||
if (m_dst.size()!=(numSmallAabbs+1))
|
||||
{
|
||||
@ -609,7 +1300,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
||||
|
||||
}//B3_PROFILE("GPU_RADIX SORT");
|
||||
|
||||
|
||||
//init3dSap();
|
||||
}
|
||||
|
||||
void b3GpuSapBroadphase::writeAabbsToGpu()
|
||||
|
@ -7,7 +7,7 @@ class b3Vector3;
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
|
||||
|
||||
#include "b3SapAabb.h"
|
||||
|
||||
#include "Bullet3Common/b3Int2.h"
|
||||
|
||||
|
||||
class b3GpuSapBroadphase
|
||||
@ -22,11 +22,33 @@ class b3GpuSapBroadphase
|
||||
cl_kernel m_sapKernel;
|
||||
cl_kernel m_sap2Kernel;
|
||||
cl_kernel m_prepareSumVarianceKernel;
|
||||
cl_kernel m_computePairsIncremental3dSapKernel;
|
||||
|
||||
class b3RadixSort32CL* m_sorter;
|
||||
|
||||
///test for 3d SAP
|
||||
b3AlignedObjectArray<b3SortData> m_sortedAxisCPU[3][2];
|
||||
b3AlignedObjectArray<b3UnsignedInt2> m_objectMinMaxIndexCPU[3][2];
|
||||
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0;
|
||||
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1;
|
||||
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2;
|
||||
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0prev;
|
||||
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1prev;
|
||||
b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2prev;
|
||||
|
||||
b3OpenCLArray<b3SortData> m_sortedAxisGPU0;
|
||||
b3OpenCLArray<b3SortData> m_sortedAxisGPU1;
|
||||
b3OpenCLArray<b3SortData> m_sortedAxisGPU2;
|
||||
b3OpenCLArray<b3SortData> m_sortedAxisGPU0prev;
|
||||
b3OpenCLArray<b3SortData> m_sortedAxisGPU1prev;
|
||||
b3OpenCLArray<b3SortData> m_sortedAxisGPU2prev;
|
||||
|
||||
|
||||
b3OpenCLArray<b3Int2> m_addedHostPairsGPU;
|
||||
b3OpenCLArray<b3Int2> m_removedHostPairsGPU;
|
||||
b3OpenCLArray<int> m_addedCountGPU;
|
||||
b3OpenCLArray<int> m_removedCountGPU;
|
||||
|
||||
int m_currentBuffer;
|
||||
|
||||
public:
|
||||
|
@ -1,7 +1,9 @@
|
||||
#ifndef B3_SAP_AABB_H
|
||||
#define B3_SAP_AABB_H
|
||||
|
||||
struct b3SapAabb
|
||||
#include "Bullet3Common/b3Scalar.h"
|
||||
|
||||
B3_ATTRIBUTE_ALIGNED16(struct) b3SapAabb
|
||||
{
|
||||
union
|
||||
{
|
||||
|
@ -30,6 +30,22 @@ typedef struct
|
||||
};
|
||||
} btAabbCL;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
unsigned int m_key;
|
||||
unsigned int x;
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
unsigned int m_value;
|
||||
unsigned int y;
|
||||
|
||||
};
|
||||
}b3SortData;
|
||||
|
||||
|
||||
/// conservative test for overlap between two aabbs
|
||||
bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);
|
||||
@ -46,6 +62,260 @@ bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)
|
||||
return overlap;
|
||||
}
|
||||
|
||||
__kernel void computePairsIncremental3dSapKernel( __global const uint2* objectMinMaxIndexGPUaxis0,
|
||||
__global const uint2* objectMinMaxIndexGPUaxis1,
|
||||
__global const uint2* objectMinMaxIndexGPUaxis2,
|
||||
__global const uint2* objectMinMaxIndexGPUaxis0prev,
|
||||
__global const uint2* objectMinMaxIndexGPUaxis1prev,
|
||||
__global const uint2* objectMinMaxIndexGPUaxis2prev,
|
||||
__global const b3SortData* sortedAxisGPU0,
|
||||
__global const b3SortData* sortedAxisGPU1,
|
||||
__global const b3SortData* sortedAxisGPU2,
|
||||
__global const b3SortData* sortedAxisGPU0prev,
|
||||
__global const b3SortData* sortedAxisGPU1prev,
|
||||
__global const b3SortData* sortedAxisGPU2prev,
|
||||
__global int2* addedHostPairsGPU,
|
||||
__global int2* removedHostPairsGPU,
|
||||
volatile __global int* addedHostPairsCount,
|
||||
volatile __global int* removedHostPairsCount,
|
||||
int maxCapacity,
|
||||
int numObjects)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
if (i>=numObjects)
|
||||
return;
|
||||
|
||||
__global const uint2* objectMinMaxIndexGPU[3][2];
|
||||
objectMinMaxIndexGPU[0][0]=objectMinMaxIndexGPUaxis0;
|
||||
objectMinMaxIndexGPU[1][0]=objectMinMaxIndexGPUaxis1;
|
||||
objectMinMaxIndexGPU[2][0]=objectMinMaxIndexGPUaxis2;
|
||||
objectMinMaxIndexGPU[0][1]=objectMinMaxIndexGPUaxis0prev;
|
||||
objectMinMaxIndexGPU[1][1]=objectMinMaxIndexGPUaxis1prev;
|
||||
objectMinMaxIndexGPU[2][1]=objectMinMaxIndexGPUaxis2prev;
|
||||
|
||||
__global const b3SortData* sortedAxisGPU[3][2];
|
||||
sortedAxisGPU[0][0] = sortedAxisGPU0;
|
||||
sortedAxisGPU[1][0] = sortedAxisGPU1;
|
||||
sortedAxisGPU[2][0] = sortedAxisGPU2;
|
||||
sortedAxisGPU[0][1] = sortedAxisGPU0prev;
|
||||
sortedAxisGPU[1][1] = sortedAxisGPU1prev;
|
||||
sortedAxisGPU[2][1] = sortedAxisGPU2prev;
|
||||
|
||||
int m_currentBuffer = 0;
|
||||
|
||||
for (int axis=0;axis<3;axis++)
|
||||
{
|
||||
//int i = checkObjects[a];
|
||||
|
||||
unsigned int curMinIndex = objectMinMaxIndexGPU[axis][m_currentBuffer][i].x;
|
||||
unsigned int curMaxIndex = objectMinMaxIndexGPU[axis][m_currentBuffer][i].y;
|
||||
unsigned int prevMinIndex = objectMinMaxIndexGPU[axis][1-m_currentBuffer][i].x;
|
||||
int dmin = curMinIndex - prevMinIndex;
|
||||
|
||||
unsigned int prevMaxIndex = objectMinMaxIndexGPU[axis][1-m_currentBuffer][i].y;
|
||||
|
||||
int dmax = curMaxIndex - prevMaxIndex;
|
||||
|
||||
for (int otherbuffer = 0;otherbuffer<2;otherbuffer++)
|
||||
{
|
||||
if (dmin!=0)
|
||||
{
|
||||
int stepMin = dmin<0 ? -1 : 1;
|
||||
for (int j=prevMinIndex;j!=curMinIndex;j+=stepMin)
|
||||
{
|
||||
int otherIndex2 = sortedAxisGPU[axis][otherbuffer][j].y;
|
||||
int otherIndex = otherIndex2/2;
|
||||
if (otherIndex!=i)
|
||||
{
|
||||
bool otherIsMax = ((otherIndex2&1)!=0);
|
||||
|
||||
if (otherIsMax)
|
||||
{
|
||||
|
||||
bool overlap = true;
|
||||
|
||||
for (int ax=0;ax<3;ax++)
|
||||
{
|
||||
if ((objectMinMaxIndexGPU[ax][m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].y) ||
|
||||
(objectMinMaxIndexGPU[ax][m_currentBuffer][i].y < objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].x))
|
||||
overlap=false;
|
||||
}
|
||||
|
||||
// b3Assert(overlap2==overlap);
|
||||
|
||||
bool prevOverlap = true;
|
||||
|
||||
for (int ax=0;ax<3;ax++)
|
||||
{
|
||||
if ((objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].y) ||
|
||||
(objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].y < objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].x))
|
||||
prevOverlap=false;
|
||||
}
|
||||
|
||||
|
||||
//b3Assert(overlap==overlap2);
|
||||
|
||||
|
||||
|
||||
if (dmin<0)
|
||||
{
|
||||
if (overlap && !prevOverlap)
|
||||
{
|
||||
//add a pair
|
||||
int2 newPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
newPair.x = i;
|
||||
newPair.y = otherIndex;
|
||||
} else
|
||||
{
|
||||
newPair.x = otherIndex;
|
||||
newPair.y = i;
|
||||
}
|
||||
|
||||
{
|
||||
int curPair = atomic_inc(addedHostPairsCount);
|
||||
if (curPair<maxCapacity)
|
||||
{
|
||||
|
||||
addedHostPairsGPU[curPair].x = newPair.x;
|
||||
addedHostPairsGPU[curPair].y = newPair.y;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!overlap && prevOverlap)
|
||||
{
|
||||
|
||||
//remove a pair
|
||||
int2 removedPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
removedPair.x = i;
|
||||
removedPair.y = otherIndex;
|
||||
} else
|
||||
{
|
||||
removedPair.x = otherIndex;
|
||||
removedPair.y = i;
|
||||
}
|
||||
{
|
||||
int curPair = atomic_inc(removedHostPairsCount);
|
||||
if (curPair<maxCapacity)
|
||||
{
|
||||
|
||||
removedHostPairsGPU[curPair].x = removedPair.x;
|
||||
removedHostPairsGPU[curPair].y = removedPair.y;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}//otherisMax
|
||||
}//if (dmin<0)
|
||||
}//if (otherIndex!=i)
|
||||
}//for (int j=
|
||||
}
|
||||
|
||||
if (dmax!=0)
|
||||
{
|
||||
int stepMax = dmax<0 ? -1 : 1;
|
||||
for (int j=prevMaxIndex;j!=curMaxIndex;j+=stepMax)
|
||||
{
|
||||
int otherIndex2 = sortedAxisGPU[axis][otherbuffer][j].y;
|
||||
int otherIndex = otherIndex2/2;
|
||||
if (otherIndex!=i)
|
||||
{
|
||||
bool otherIsMin = ((otherIndex2&1)==0);
|
||||
if (otherIsMin)
|
||||
{
|
||||
|
||||
bool overlap = true;
|
||||
|
||||
for (int ax=0;ax<3;ax++)
|
||||
{
|
||||
if ((objectMinMaxIndexGPU[ax][m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].y) ||
|
||||
(objectMinMaxIndexGPU[ax][m_currentBuffer][i].y < objectMinMaxIndexGPU[ax][m_currentBuffer][otherIndex].x))
|
||||
overlap=false;
|
||||
}
|
||||
//b3Assert(overlap2==overlap);
|
||||
|
||||
bool prevOverlap = true;
|
||||
|
||||
for (int ax=0;ax<3;ax++)
|
||||
{
|
||||
if ((objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].x > objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].y) ||
|
||||
(objectMinMaxIndexGPU[ax][1-m_currentBuffer][i].y < objectMinMaxIndexGPU[ax][1-m_currentBuffer][otherIndex].x))
|
||||
prevOverlap=false;
|
||||
}
|
||||
|
||||
|
||||
if (dmax>0)
|
||||
{
|
||||
if (overlap && !prevOverlap)
|
||||
{
|
||||
//add a pair
|
||||
int2 newPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
newPair.x = i;
|
||||
newPair.y = otherIndex;
|
||||
} else
|
||||
{
|
||||
newPair.x = otherIndex;
|
||||
newPair.y = i;
|
||||
}
|
||||
{
|
||||
int curPair = atomic_inc(addedHostPairsCount);
|
||||
if (curPair<maxCapacity)
|
||||
{
|
||||
|
||||
addedHostPairsGPU[curPair].x = newPair.x;
|
||||
addedHostPairsGPU[curPair].y = newPair.y;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!overlap && prevOverlap)
|
||||
{
|
||||
//if (otherIndex2&1==0) -> min?
|
||||
//remove a pair
|
||||
int2 removedPair;
|
||||
if (i<=otherIndex)
|
||||
{
|
||||
removedPair.x = i;
|
||||
removedPair.y = otherIndex;
|
||||
} else
|
||||
{
|
||||
removedPair.x = otherIndex;
|
||||
removedPair.y = i;
|
||||
}
|
||||
{
|
||||
int curPair = atomic_inc(removedHostPairsCount);
|
||||
if (curPair<maxCapacity)
|
||||
{
|
||||
|
||||
removedHostPairsGPU[curPair].x = removedPair.x;
|
||||
removedHostPairsGPU[curPair].y = removedPair.y;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}//if (dmin<0)
|
||||
}//if (otherIndex!=i)
|
||||
}//for (int j=
|
||||
}
|
||||
}//for (int otherbuffer
|
||||
}//for (int axis=0;
|
||||
|
||||
|
||||
}
|
||||
|
||||
//computePairsKernelBatchWrite
|
||||
__kernel void computePairsKernel( __global const btAabbCL* aabbs, volatile __global int2* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
|
||||
|
@ -8,14 +8,15 @@ struct b3SortData
|
||||
{
|
||||
union
|
||||
{
|
||||
int m_key;
|
||||
int x;
|
||||
unsigned int m_key;
|
||||
unsigned int x;
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
int m_value;
|
||||
int y;
|
||||
unsigned int m_value;
|
||||
unsigned int y;
|
||||
|
||||
};
|
||||
};
|
||||
#include "b3BufferInfoCL.h"
|
||||
|
Loading…
Reference in New Issue
Block a user