mirror of
https://github.com/bulletphysics/bullet3
synced 2025-01-18 21:10:05 +00:00
replace 'copy' step of large/small Aabbs by indices
This commit is contained in:
parent
33ebebd1c9
commit
ff051f87aa
@ -104,7 +104,7 @@ static PairBench* sPairDemo = 0;
|
|||||||
|
|
||||||
#define BP_COMBO_INDEX 123
|
#define BP_COMBO_INDEX 123
|
||||||
|
|
||||||
static int curSelectedBroadphase = 0;
|
static int curSelectedBroadphase = 5;
|
||||||
static BroadphaseEntry allBroadphases[]=
|
static BroadphaseEntry allBroadphases[]=
|
||||||
{
|
{
|
||||||
{"Gpu Grid",b3GpuGridBroadphase::CreateFunc},
|
{"Gpu Grid",b3GpuGridBroadphase::CreateFunc},
|
||||||
|
@ -20,8 +20,8 @@ b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_co
|
|||||||
m_device(device),
|
m_device(device),
|
||||||
m_queue(q),
|
m_queue(q),
|
||||||
m_allAabbsGPU(ctx,q),
|
m_allAabbsGPU(ctx,q),
|
||||||
m_smallAabbsGPU(ctx,q),
|
m_smallAabbsMappingGPU(ctx,q),
|
||||||
m_largeAabbsGPU(ctx,q),
|
m_largeAabbsMappingGPU(ctx,q),
|
||||||
m_pairCount(ctx,q),
|
m_pairCount(ctx,q),
|
||||||
m_overlappingPairs(ctx,q),
|
m_overlappingPairs(ctx,q),
|
||||||
m_gpuSmallSortData(ctx,q),
|
m_gpuSmallSortData(ctx,q),
|
||||||
@ -892,32 +892,18 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
//m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs);
|
|
||||||
|
|
||||||
|
|
||||||
int numSmallAabbs = m_smallAabbsCPU.size();
|
|
||||||
{
|
|
||||||
|
|
||||||
for (int j=0;j<numSmallAabbs;j++)
|
|
||||||
{
|
|
||||||
//sync aabb
|
|
||||||
int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
|
|
||||||
m_smallAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
|
||||||
m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int axis=0;
|
int axis=0;
|
||||||
{
|
{
|
||||||
B3_PROFILE("CPU compute best variance axis");
|
B3_PROFILE("CPU compute best variance axis");
|
||||||
b3Vector3 s=b3MakeVector3(0,0,0),s2=b3MakeVector3(0,0,0);
|
b3Vector3 s=b3MakeVector3(0,0,0),s2=b3MakeVector3(0,0,0);
|
||||||
int numRigidBodies = numSmallAabbs;
|
int numRigidBodies = m_smallAabbsMappingCPU.size();
|
||||||
|
|
||||||
for(int i=0;i<numRigidBodies;i++)
|
for(int i=0;i<numRigidBodies;i++)
|
||||||
{
|
{
|
||||||
b3Vector3 maxAabb=b3MakeVector3(m_smallAabbsCPU[i].m_max[0],m_smallAabbsCPU[i].m_max[1],m_smallAabbsCPU[i].m_max[2]);
|
b3SapAabb aabb = this->m_allAabbsCPU[m_smallAabbsMappingCPU[i]];
|
||||||
b3Vector3 minAabb=b3MakeVector3(m_smallAabbsCPU[i].m_min[0],m_smallAabbsCPU[i].m_min[1],m_smallAabbsCPU[i].m_min[2]);
|
|
||||||
|
b3Vector3 maxAabb=b3MakeVector3(aabb.m_max[0],aabb.m_max[1],aabb.m_max[2]);
|
||||||
|
b3Vector3 minAabb=b3MakeVector3(aabb.m_min[0],aabb.m_min[1],aabb.m_min[2]);
|
||||||
b3Vector3 centerAabb=(maxAabb+minAabb)*0.5f;
|
b3Vector3 centerAabb=(maxAabb+minAabb)*0.5f;
|
||||||
|
|
||||||
s += centerAabb;
|
s += centerAabb;
|
||||||
@ -932,34 +918,28 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
{
|
|
||||||
int numLargeAabbs = m_largeAabbsCPU.size();
|
|
||||||
for (int j=0;j<numLargeAabbs;j++)
|
|
||||||
{
|
|
||||||
//sync aabb
|
|
||||||
int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
|
|
||||||
m_largeAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
|
||||||
m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
b3AlignedObjectArray<b3Int4> hostPairs;
|
b3AlignedObjectArray<b3Int4> hostPairs;
|
||||||
|
|
||||||
{
|
{
|
||||||
int numSmallAabbs = m_smallAabbsCPU.size();
|
int numSmallAabbs = m_smallAabbsMappingCPU.size();
|
||||||
for (int i=0;i<numSmallAabbs;i++)
|
for (int i=0;i<numSmallAabbs;i++)
|
||||||
{
|
{
|
||||||
float reference = m_smallAabbsCPU[i].m_max[axis];
|
b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]];
|
||||||
|
float reference = smallAabbi.m_max[axis];
|
||||||
|
|
||||||
for (int j=i+1;j<numSmallAabbs;j++)
|
for (int j=i+1;j<numSmallAabbs;j++)
|
||||||
{
|
{
|
||||||
if (TestAabbAgainstAabb2((b3Vector3&)m_smallAabbsCPU[i].m_min, (b3Vector3&)m_smallAabbsCPU[i].m_max,
|
|
||||||
(b3Vector3&)m_smallAabbsCPU[j].m_min,(b3Vector3&)m_smallAabbsCPU[j].m_max))
|
b3SapAabb smallAabbj = m_allAabbsCPU[m_smallAabbsMappingCPU[j]];
|
||||||
|
|
||||||
|
if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max,
|
||||||
|
(b3Vector3&)smallAabbj.m_min,(b3Vector3&)smallAabbj.m_max))
|
||||||
{
|
{
|
||||||
b3Int4 pair;
|
b3Int4 pair;
|
||||||
int a = m_smallAabbsCPU[i].m_minIndices[3];
|
int a = smallAabbi.m_minIndices[3];
|
||||||
int b = m_smallAabbsCPU[j].m_minIndices[3];
|
int b = smallAabbj.m_minIndices[3];
|
||||||
if (a<=b)
|
if (a<=b)
|
||||||
{
|
{
|
||||||
pair.x = a;//store the original index in the unsorted aabb array
|
pair.x = a;//store the original index in the unsorted aabb array
|
||||||
@ -977,20 +957,23 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
|
|||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
int numSmallAabbs = m_smallAabbsCPU.size();
|
int numSmallAabbs = m_smallAabbsMappingCPU.size();
|
||||||
for (int i=0;i<numSmallAabbs;i++)
|
for (int i=0;i<numSmallAabbs;i++)
|
||||||
{
|
{
|
||||||
float reference = m_smallAabbsCPU[i].m_max[axis];
|
b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]];
|
||||||
int numLargeAabbs = m_largeAabbsCPU.size();
|
|
||||||
|
float reference = smallAabbi.m_max[axis];
|
||||||
|
int numLargeAabbs = m_largeAabbsMappingCPU.size();
|
||||||
|
|
||||||
for (int j=0;j<numLargeAabbs;j++)
|
for (int j=0;j<numLargeAabbs;j++)
|
||||||
{
|
{
|
||||||
if (TestAabbAgainstAabb2((b3Vector3&)m_smallAabbsCPU[i].m_min, (b3Vector3&)m_smallAabbsCPU[i].m_max,
|
b3SapAabb largeAabbj = m_allAabbsCPU[m_largeAabbsMappingCPU[j]];
|
||||||
(b3Vector3&)m_largeAabbsCPU[j].m_min,(b3Vector3&)m_largeAabbsCPU[j].m_max))
|
if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max,
|
||||||
|
(b3Vector3&)largeAabbj.m_min,(b3Vector3&)largeAabbj.m_max))
|
||||||
{
|
{
|
||||||
b3Int4 pair;
|
b3Int4 pair;
|
||||||
int a = m_largeAabbsCPU[j].m_minIndices[3];
|
int a = largeAabbj.m_minIndices[3];
|
||||||
int b = m_smallAabbsCPU[i].m_minIndices[3];
|
int b = smallAabbi.m_minIndices[3];
|
||||||
if (a<=b)
|
if (a<=b)
|
||||||
{
|
{
|
||||||
pair.x = a;
|
pair.x = a;
|
||||||
@ -1029,11 +1012,15 @@ void b3GpuSapBroadphase::reset()
|
|||||||
m_allAabbsGPU.resize(0);
|
m_allAabbsGPU.resize(0);
|
||||||
m_allAabbsCPU.resize(0);
|
m_allAabbsCPU.resize(0);
|
||||||
|
|
||||||
m_smallAabbsGPU.resize(0);
|
|
||||||
m_smallAabbsCPU.resize(0);
|
m_smallAabbsMappingGPU.resize(0);
|
||||||
|
m_smallAabbsMappingCPU.resize(0);
|
||||||
|
|
||||||
m_pairCount.resize(0);
|
m_pairCount.resize(0);
|
||||||
m_largeAabbsGPU.resize(0);
|
|
||||||
m_largeAabbsCPU.resize(0);
|
m_largeAabbsMappingGPU.resize(0);
|
||||||
|
m_largeAabbsMappingCPU.resize(0);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1044,6 +1031,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
|||||||
calculateOverlappingPairsHost(maxPairs);
|
calculateOverlappingPairsHost(maxPairs);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//if (m_currentBuffer>=0)
|
//if (m_currentBuffer>=0)
|
||||||
// return calculateOverlappingPairsHostIncremental3Sap();
|
// return calculateOverlappingPairsHostIncremental3Sap();
|
||||||
|
|
||||||
@ -1057,47 +1045,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
|||||||
|
|
||||||
bool syncOnHost = false;
|
bool syncOnHost = false;
|
||||||
|
|
||||||
if (syncOnHost)
|
int numSmallAabbs = m_smallAabbsMappingCPU.size();
|
||||||
{
|
|
||||||
B3_PROFILE("Synchronize m_smallAabbsGPU (CPU/slow)");
|
|
||||||
|
|
||||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
|
||||||
|
|
||||||
m_smallAabbsGPU.copyToHost(m_smallAabbsCPU);
|
|
||||||
{
|
|
||||||
int numSmallAabbs = m_smallAabbsCPU.size();
|
|
||||||
for (int j=0;j<numSmallAabbs;j++)
|
|
||||||
{
|
|
||||||
//sync aabb
|
|
||||||
int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
|
|
||||||
m_smallAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
|
||||||
m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m_smallAabbsGPU.copyFromHost(m_smallAabbsCPU);
|
|
||||||
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
{
|
|
||||||
int numSmallAabbs = m_smallAabbsGPU.size();
|
|
||||||
if (numSmallAabbs)
|
|
||||||
{
|
|
||||||
B3_PROFILE("copyAabbsKernelSmall");
|
|
||||||
b3BufferInfoCL bInfo[] = {
|
|
||||||
b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ),
|
|
||||||
b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()),
|
|
||||||
};
|
|
||||||
|
|
||||||
b3LauncherCL launcher(m_queue, m_copyAabbsKernel ,"m_copyAabbsKernel");
|
|
||||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
|
||||||
launcher.setConst( numSmallAabbs );
|
|
||||||
int num = numSmallAabbs;
|
|
||||||
launcher.launch1D( num);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int numSmallAabbs = m_smallAabbsGPU.size();
|
|
||||||
if (m_prefixScanFloat4 && numSmallAabbs)
|
if (m_prefixScanFloat4 && numSmallAabbs)
|
||||||
{
|
{
|
||||||
B3_PROFILE("GPU compute best variance axis");
|
B3_PROFILE("GPU compute best variance axis");
|
||||||
@ -1112,7 +1060,9 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel ,"m_prepareSumVarianceKernel");
|
b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel ,"m_prepareSumVarianceKernel");
|
||||||
launcher.setBuffer(m_smallAabbsGPU.getBufferCL());
|
launcher.setBuffer(m_allAabbsGPU.getBufferCL());
|
||||||
|
|
||||||
|
launcher.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
|
||||||
launcher.setBuffer(m_sum.getBufferCL());
|
launcher.setBuffer(m_sum.getBufferCL());
|
||||||
launcher.setBuffer(m_sum2.getBufferCL());
|
launcher.setBuffer(m_sum2.getBufferCL());
|
||||||
launcher.setConst( numSmallAabbs+1 );
|
launcher.setConst( numSmallAabbs+1 );
|
||||||
@ -1134,60 +1084,19 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (syncOnHost)
|
|
||||||
{
|
|
||||||
B3_PROFILE("Synchronize m_largeAabbsGPU (CPU/slow)");
|
|
||||||
|
|
||||||
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
|
|
||||||
|
|
||||||
m_largeAabbsGPU.copyToHost(m_largeAabbsCPU);
|
|
||||||
{
|
|
||||||
int numLargeAabbs = m_largeAabbsCPU.size();
|
|
||||||
for (int j=0;j<numLargeAabbs;j++)
|
|
||||||
{
|
|
||||||
//sync aabb
|
|
||||||
int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
|
|
||||||
m_largeAabbsCPU[j] = m_allAabbsCPU[aabbIndex];
|
|
||||||
m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
m_largeAabbsGPU.copyFromHost(m_largeAabbsCPU);
|
|
||||||
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
int numLargeAabbs = m_largeAabbsGPU.size();
|
|
||||||
|
|
||||||
if (numLargeAabbs)
|
|
||||||
{
|
|
||||||
B3_PROFILE("copyAabbsKernelLarge");
|
|
||||||
b3BufferInfoCL bInfo[] = {
|
|
||||||
b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ),
|
|
||||||
b3BufferInfoCL( m_largeAabbsGPU.getBufferCL()),
|
|
||||||
};
|
|
||||||
|
|
||||||
b3LauncherCL launcher(m_queue, m_copyAabbsKernel ,"m_copyAabbsKernel");
|
|
||||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
|
||||||
launcher.setConst( numLargeAabbs );
|
|
||||||
int num = numLargeAabbs;
|
|
||||||
launcher.launch1D( num);
|
|
||||||
clFinish(m_queue);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
m_gpuSmallSortData.resize(numSmallAabbs);
|
m_gpuSmallSortData.resize(numSmallAabbs);
|
||||||
int numLargeAabbs = m_smallAabbsGPU.size();
|
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
if (m_smallAabbsGPU.size())
|
if (m_smallAabbsMappingGPU.size())
|
||||||
{
|
{
|
||||||
|
|
||||||
B3_PROFILE("flipFloatKernel");
|
B3_PROFILE("flipFloatKernel");
|
||||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), b3BufferInfoCL( m_gpuSmallSortData.getBufferCL())};
|
b3BufferInfoCL bInfo[] = {
|
||||||
|
b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ),
|
||||||
|
b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL(), true),
|
||||||
|
b3BufferInfoCL( m_gpuSmallSortData.getBufferCL())};
|
||||||
b3LauncherCL launcher(m_queue, m_flipFloatKernel ,"m_flipFloatKernel");
|
b3LauncherCL launcher(m_queue, m_flipFloatKernel ,"m_flipFloatKernel");
|
||||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||||
launcher.setConst( numSmallAabbs );
|
launcher.setConst( numSmallAabbs );
|
||||||
@ -1209,7 +1118,12 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
|||||||
if (numSmallAabbs)
|
if (numSmallAabbs)
|
||||||
{
|
{
|
||||||
B3_PROFILE("scatterKernel");
|
B3_PROFILE("scatterKernel");
|
||||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), b3BufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())};
|
|
||||||
|
b3BufferInfoCL bInfo[] = {
|
||||||
|
b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ),
|
||||||
|
b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL(), true),
|
||||||
|
b3BufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),
|
||||||
|
b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())};
|
||||||
b3LauncherCL launcher(m_queue, m_scatterKernel ,"m_scatterKernel ");
|
b3LauncherCL launcher(m_queue, m_scatterKernel ,"m_scatterKernel ");
|
||||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||||
launcher.setConst( numSmallAabbs);
|
launcher.setConst( numSmallAabbs);
|
||||||
@ -1227,11 +1141,17 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
|||||||
int numPairs=0;
|
int numPairs=0;
|
||||||
|
|
||||||
{
|
{
|
||||||
int numLargeAabbs = m_largeAabbsGPU.size();
|
int numLargeAabbs = m_largeAabbsMappingGPU.size();
|
||||||
if (numLargeAabbs && numSmallAabbs)
|
if (numLargeAabbs && numSmallAabbs)
|
||||||
{
|
{
|
||||||
|
//@todo
|
||||||
B3_PROFILE("sap2Kernel");
|
B3_PROFILE("sap2Kernel");
|
||||||
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_largeAabbsGPU.getBufferCL() ),b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())};
|
b3BufferInfoCL bInfo[] = {
|
||||||
|
b3BufferInfoCL( m_allAabbsGPU.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL( m_overlappingPairs.getBufferCL() ),
|
||||||
|
b3BufferInfoCL(m_pairCount.getBufferCL())};
|
||||||
b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel");
|
b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel");
|
||||||
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
|
||||||
launcher.setConst( numLargeAabbs );
|
launcher.setConst( numLargeAabbs );
|
||||||
@ -1348,15 +1268,17 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
|
|||||||
m_overlappingPairs.resize(numPairs);
|
m_overlappingPairs.resize(numPairs);
|
||||||
|
|
||||||
}//B3_PROFILE("GPU_RADIX SORT");
|
}//B3_PROFILE("GPU_RADIX SORT");
|
||||||
|
|
||||||
//init3dSap();
|
//init3dSap();
|
||||||
}
|
}
|
||||||
|
|
||||||
void b3GpuSapBroadphase::writeAabbsToGpu()
|
void b3GpuSapBroadphase::writeAabbsToGpu()
|
||||||
{
|
{
|
||||||
|
m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU);
|
||||||
|
m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU);
|
||||||
|
|
||||||
m_allAabbsGPU.copyFromHost(m_allAabbsCPU);//might not be necessary, the 'setupGpuAabbsFull' already takes care of this
|
m_allAabbsGPU.copyFromHost(m_allAabbsCPU);//might not be necessary, the 'setupGpuAabbsFull' already takes care of this
|
||||||
m_smallAabbsGPU.copyFromHost(m_smallAabbsCPU);
|
|
||||||
m_largeAabbsGPU.copyFromHost(m_largeAabbsCPU);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1371,7 +1293,8 @@ void b3GpuSapBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vec
|
|||||||
}
|
}
|
||||||
aabb.m_minIndices[3] = index;
|
aabb.m_minIndices[3] = index;
|
||||||
aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size();
|
aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size();
|
||||||
m_largeAabbsCPU.push_back(aabb);
|
m_largeAabbsMappingCPU.push_back(m_allAabbsCPU.size());
|
||||||
|
|
||||||
m_allAabbsCPU.push_back(aabb);
|
m_allAabbsCPU.push_back(aabb);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1386,7 +1309,9 @@ void b3GpuSapBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3&
|
|||||||
}
|
}
|
||||||
aabb.m_minIndices[3] = index;
|
aabb.m_minIndices[3] = index;
|
||||||
aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size();
|
aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size();
|
||||||
m_smallAabbsCPU.push_back(aabb);
|
m_smallAabbsMappingCPU.push_back(m_allAabbsCPU.size());
|
||||||
|
|
||||||
|
|
||||||
m_allAabbsCPU.push_back(aabb);
|
m_allAabbsCPU.push_back(aabb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,11 +74,12 @@ public:
|
|||||||
b3OpenCLArray<b3Vector3> m_sum2;
|
b3OpenCLArray<b3Vector3> m_sum2;
|
||||||
b3OpenCLArray<b3Vector3> m_dst;
|
b3OpenCLArray<b3Vector3> m_dst;
|
||||||
|
|
||||||
b3OpenCLArray<b3SapAabb> m_smallAabbsGPU;
|
b3OpenCLArray<int> m_smallAabbsMappingGPU;
|
||||||
b3AlignedObjectArray<b3SapAabb> m_smallAabbsCPU;
|
b3AlignedObjectArray<int> m_smallAabbsMappingCPU;
|
||||||
|
|
||||||
|
b3OpenCLArray<int> m_largeAabbsMappingGPU;
|
||||||
|
b3AlignedObjectArray<int> m_largeAabbsMappingCPU;
|
||||||
|
|
||||||
b3OpenCLArray<b3SapAabb> m_largeAabbsGPU;
|
|
||||||
b3AlignedObjectArray<b3SapAabb> m_largeAabbsCPU;
|
|
||||||
|
|
||||||
b3OpenCLArray<b3Int4> m_overlappingPairs;
|
b3OpenCLArray<b3Int4> m_overlappingPairs;
|
||||||
|
|
||||||
|
@ -63,7 +63,7 @@ bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL*
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)
|
__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)
|
||||||
{
|
{
|
||||||
int i = get_global_id(0);
|
int i = get_global_id(0);
|
||||||
if (i>=numUnsortedAabbs)
|
if (i>=numUnsortedAabbs)
|
||||||
@ -73,11 +73,14 @@ __kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAa
|
|||||||
if (j>=numSortedAabbs)
|
if (j>=numSortedAabbs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (TestAabbAgainstAabb2GlobalGlobal(&unsortedAabbs[i],&sortedAabbs[j]))
|
|
||||||
|
__global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];
|
||||||
|
|
||||||
|
if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,&sortedAabbs[j]))
|
||||||
{
|
{
|
||||||
int4 myPair;
|
int4 myPair;
|
||||||
|
|
||||||
int xIndex = unsortedAabbs[i].m_minIndices[3];
|
int xIndex = unsortedAabbPtr[0].m_minIndices[3];
|
||||||
int yIndex = sortedAabbs[j].m_minIndices[3];
|
int yIndex = sortedAabbs[j].m_minIndices[3];
|
||||||
if (xIndex>yIndex)
|
if (xIndex>yIndex)
|
||||||
{
|
{
|
||||||
@ -346,36 +349,40 @@ __kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btA
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__kernel void flipFloatKernel( __global const btAabbCL* aabbs, volatile __global int2* sortData, int numObjects, int axis)
|
__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global int2* sortData, int numObjects, int axis)
|
||||||
{
|
{
|
||||||
int i = get_global_id(0);
|
int i = get_global_id(0);
|
||||||
if (i>=numObjects)
|
if (i>=numObjects)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
sortData[i].x = FloatFlip(aabbs[i].m_minElems[axis]);
|
|
||||||
|
sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);
|
||||||
sortData[i].y = i;
|
sortData[i].y = i;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__kernel void scatterKernel( __global const btAabbCL* aabbs, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)
|
__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)
|
||||||
{
|
{
|
||||||
int i = get_global_id(0);
|
int i = get_global_id(0);
|
||||||
if (i>=numObjects)
|
if (i>=numObjects)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
sortedAabbs[i] = aabbs[sortData[i].y];
|
sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__kernel void prepareSumVarianceKernel( __global const btAabbCL* aabbs, __global float4* sum, __global float4* sum2,int numAabbs)
|
__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)
|
||||||
{
|
{
|
||||||
int i = get_global_id(0);
|
int i = get_global_id(0);
|
||||||
if (i>numAabbs)
|
if (i>numAabbs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];
|
||||||
|
|
||||||
float4 s;
|
float4 s;
|
||||||
s = (aabbs[i].m_max+aabbs[i].m_min)*0.5f;
|
s = (smallAabb.m_max+smallAabb.m_min)*0.5f;
|
||||||
sum[i]=s;
|
sum[i]=s;
|
||||||
sum2[i]=s*s;
|
sum2[i]=s*s;
|
||||||
}
|
}
|
||||||
|
@ -56,7 +56,7 @@ static const char* sapCL= \
|
|||||||
" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
|
" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
|
||||||
" return overlap;\n"
|
" return overlap;\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
|
"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const btAabbCL* sortedAabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numSortedAabbs, int axis, int maxPairs)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int i = get_global_id(0);\n"
|
" int i = get_global_id(0);\n"
|
||||||
" if (i>=numUnsortedAabbs)\n"
|
" if (i>=numUnsortedAabbs)\n"
|
||||||
@ -64,11 +64,12 @@ static const char* sapCL= \
|
|||||||
" int j = get_global_id(1);\n"
|
" int j = get_global_id(1);\n"
|
||||||
" if (j>=numSortedAabbs)\n"
|
" if (j>=numSortedAabbs)\n"
|
||||||
" return;\n"
|
" return;\n"
|
||||||
" if (TestAabbAgainstAabb2GlobalGlobal(&unsortedAabbs[i],&sortedAabbs[j]))\n"
|
" __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n"
|
||||||
|
" if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,&sortedAabbs[j]))\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
" int4 myPair;\n"
|
" int4 myPair;\n"
|
||||||
" \n"
|
" \n"
|
||||||
" int xIndex = unsortedAabbs[i].m_minIndices[3];\n"
|
" int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n"
|
||||||
" int yIndex = sortedAabbs[j].m_minIndices[3];\n"
|
" int yIndex = sortedAabbs[j].m_minIndices[3];\n"
|
||||||
" if (xIndex>yIndex)\n"
|
" if (xIndex>yIndex)\n"
|
||||||
" {\n"
|
" {\n"
|
||||||
@ -305,30 +306,35 @@ static const char* sapCL= \
|
|||||||
" destAabbs[i] = allAabbs[src];\n"
|
" destAabbs[i] = allAabbs[src];\n"
|
||||||
" destAabbs[i].m_maxIndices[3] = src;\n"
|
" destAabbs[i].m_maxIndices[3] = src;\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"__kernel void flipFloatKernel( __global const btAabbCL* aabbs, volatile __global int2* sortData, int numObjects, int axis)\n"
|
"__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global int2* sortData, int numObjects, int axis)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int i = get_global_id(0);\n"
|
" int i = get_global_id(0);\n"
|
||||||
" if (i>=numObjects)\n"
|
" if (i>=numObjects)\n"
|
||||||
" return;\n"
|
" return;\n"
|
||||||
" \n"
|
" \n"
|
||||||
" sortData[i].x = FloatFlip(aabbs[i].m_minElems[axis]);\n"
|
" \n"
|
||||||
|
" sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);\n"
|
||||||
" sortData[i].y = i;\n"
|
" sortData[i].y = i;\n"
|
||||||
" \n"
|
" \n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"__kernel void scatterKernel( __global const btAabbCL* aabbs, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n"
|
"__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int i = get_global_id(0);\n"
|
" int i = get_global_id(0);\n"
|
||||||
" if (i>=numObjects)\n"
|
" if (i>=numObjects)\n"
|
||||||
" return;\n"
|
" return;\n"
|
||||||
" sortedAabbs[i] = aabbs[sortData[i].y];\n"
|
" \n"
|
||||||
|
" sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];\n"
|
||||||
"}\n"
|
"}\n"
|
||||||
"__kernel void prepareSumVarianceKernel( __global const btAabbCL* aabbs, __global float4* sum, __global float4* sum2,int numAabbs)\n"
|
"__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int i = get_global_id(0);\n"
|
" int i = get_global_id(0);\n"
|
||||||
" if (i>numAabbs)\n"
|
" if (i>numAabbs)\n"
|
||||||
" return;\n"
|
" return;\n"
|
||||||
|
" \n"
|
||||||
|
" btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];\n"
|
||||||
|
" \n"
|
||||||
" float4 s;\n"
|
" float4 s;\n"
|
||||||
" s = (aabbs[i].m_max+aabbs[i].m_min)*0.5f;\n"
|
" s = (smallAabb.m_max+smallAabb.m_min)*0.5f;\n"
|
||||||
" sum[i]=s;\n"
|
" sum[i]=s;\n"
|
||||||
" sum2[i]=s*s; \n"
|
" sum2[i]=s*s; \n"
|
||||||
"}\n"
|
"}\n"
|
||||||
|
Loading…
Reference in New Issue
Block a user