b3Solver -> pass pointer to source instead of 0 (was left over from a debugging session), thanks to David for the report

Break up clipHullHullConcaveConvexKernel into multiple stages, so it might 'fit' in Apple's OpenCL implementation
Implemented bvhTraversalKernel and findConcaveSeparatingAxis on CPU (debugging, possible future CPU version)
This commit is contained in:
erwin coumans 2013-12-13 07:52:41 -08:00
parent c155e126d0
commit 3fe969c4ee
18 changed files with 800 additions and 295 deletions

View File

@ -101,7 +101,7 @@ enum
}; };
b3AlignedObjectArray<const char*> demoNames; b3AlignedObjectArray<const char*> demoNames;
int selectedDemo = 1; int selectedDemo = 0;
GpuDemo::CreateFunc* allDemos[]= GpuDemo::CreateFunc* allDemos[]=
{ {
//ConcaveCompound2Scene::MyCreateFunc, //ConcaveCompound2Scene::MyCreateFunc,
@ -247,9 +247,21 @@ static void MyMouseButtonCallback(int button, int state, float x, float y)
} }
extern bool useShadowMap; extern bool useShadowMap;
static bool wireframe=false;
void MyKeyboardCallback(int key, int state) void MyKeyboardCallback(int key, int state)
{ {
if (key=='w' && state)
{
wireframe=!wireframe;
if (wireframe)
{
glPolygonMode( GL_FRONT_AND_BACK, GL_LINE );
} else
{
glPolygonMode( GL_FRONT_AND_BACK, GL_FILL );
}
}
if (key=='s' && state) if (key=='s' && state)
{ {
useShadowMap=!useShadowMap; useShadowMap=!useShadowMap;

View File

@ -203,7 +203,98 @@ bool b3FindSeparatingAxisEdgeEdge( const b3ConvexPolyhedronData* hullA, __global
return true; return true;
} }
// work-in-progress
inline int b3FindClippingFaces(b3Float4ConstArg separatingNormal,
__global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,
b3Float4ConstArg posA, b3QuatConstArg ornA,b3Float4ConstArg posB, b3QuatConstArg ornB,
__global b3Float4* worldVertsA1,
__global b3Float4* worldNormalsA1,
__global b3Float4* worldVertsB1,
int capacityWorldVerts,
const float minDist, float maxDist,
__global const b3Float4* verticesA,
__global const b3GpuFace_t* facesA,
__global const int* indicesA,
__global const b3Float4* verticesB,
__global const b3GpuFace_t* facesB,
__global const int* indicesB,
__global b3Int4* clippingFaces, int pairIndex)
{
int numContactsOut = 0;
int numWorldVertsB1= 0;
int closestFaceB=-1;
float dmax = -FLT_MAX;
{
for(int face=0;face<hullB->m_numFaces;face++)
{
const b3Float4 Normal = b3MakeFloat4(facesB[hullB->m_faceOffset+face].m_plane.x,
facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);
const b3Float4 WorldNormal = b3QuatRotate(ornB, Normal);
float d = b3Dot(WorldNormal,separatingNormal);
if (d > dmax)
{
dmax = d;
closestFaceB = face;
}
}
}
{
const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];
const int numVertices = polyB.m_numIndices;
for(int e0=0;e0<numVertices;e0++)
{
const b3Float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = b3TransformPoint(b,posB,ornB);
}
}
int closestFaceA=-1;
{
float dmin = FLT_MAX;
for(int face=0;face<hullA->m_numFaces;face++)
{
const b3Float4 Normal = b3MakeFloat4(
facesA[hullA->m_faceOffset+face].m_plane.x,
facesA[hullA->m_faceOffset+face].m_plane.y,
facesA[hullA->m_faceOffset+face].m_plane.z,
0.f);
const b3Float4 faceANormalWS = b3QuatRotate(ornA,Normal);
float d = b3Dot(faceANormalWS,separatingNormal);
if (d < dmin)
{
dmin = d;
closestFaceA = face;
worldNormalsA1[pairIndex] = faceANormalWS;
}
}
}
int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;
for(int e0=0;e0<numVerticesA;e0++)
{
const b3Float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];
worldVertsA1[pairIndex*capacityWorldVerts+e0] = b3TransformPoint(a, posA,ornA);
}
clippingFaces[pairIndex].x = closestFaceA;
clippingFaces[pairIndex].y = closestFaceB;
clippingFaces[pairIndex].z = numVerticesA;
clippingFaces[pairIndex].w = numWorldVertsB1;
return numContactsOut;
}
__kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs, __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs,
__global const b3RigidBodyData* rigidBodies, __global const b3RigidBodyData* rigidBodies,
__global const b3Collidable* collidables, __global const b3Collidable* collidables,
@ -215,6 +306,12 @@ __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs
__global const b3GpuChildShape* gpuChildShapes, __global const b3GpuChildShape* gpuChildShapes,
__global b3Aabb* aabbs, __global b3Aabb* aabbs,
__global b3Float4* concaveSeparatingNormalsOut, __global b3Float4* concaveSeparatingNormalsOut,
__global b3Int4* clippingFacesOut,
__global b3Vector3* worldVertsA1Out,
__global b3Vector3* worldNormalsA1Out,
__global b3Vector3* worldVertsB1Out,
__global int* hasSeparatingNormals,
int vertexFaceCapacity,
int numConcavePairs, int numConcavePairs,
int pairIdx int pairIdx
) )
@ -242,7 +339,7 @@ __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs
return; return;
} }
hasSeparatingNormals[i] = 0;
int numFacesA = convexShapes[shapeIndexA].m_numFaces; int numFacesA = convexShapes[shapeIndexA].m_numFaces;
int numActualConcaveConvexTests = 0; int numActualConcaveConvexTests = 0;
@ -454,8 +551,34 @@ __kernel void b3FindConcaveSeparatingAxisKernel( __global b3Int4* concavePairs
if (hasSeparatingAxis) if (hasSeparatingAxis)
{ {
hasSeparatingNormals[i]=1;
sepAxis.w = dmin; sepAxis.w = dmin;
concaveSeparatingNormalsOut[pairIdx]=sepAxis; concaveSeparatingNormalsOut[pairIdx]=sepAxis;
//now compute clipping faces A and B, and world-space clipping vertices A and B...
float minDist = -1e30f;
float maxDist = 0.02f;
b3FindClippingFaces(sepAxis,
&convexPolyhedronA,
&convexShapes[shapeIndexB],
posA,ornA,
posB,ornB,
worldVertsA1Out,
worldNormalsA1Out,
worldVertsB1Out,
vertexFaceCapacity,
minDist, maxDist,
verticesA,
facesA,
indicesA,
vertices,
faces,
indices,
clippingFacesOut, pairIdx);
} else } else
{ {
//mark this pair as in-active //mark this pair as in-active

View File

@ -16,7 +16,7 @@ subject to the following restrictions:
bool findSeparatingAxisOnGpu = true; bool findSeparatingAxisOnGpu = true;
bool bvhTraversalKernelGPU = true; bool bvhTraversalKernelGPU = true;
bool findConcaveSeparatingAxisKernelGPU = false;//true; bool findConcaveSeparatingAxisKernelGPU = true;
///This file was written by Erwin Coumans ///This file was written by Erwin Coumans
///Separating axis rest based on work from Pierre Terdiman, see ///Separating axis rest based on work from Pierre Terdiman, see
@ -24,7 +24,7 @@ bool findConcaveSeparatingAxisKernelGPU = false;//true;
//#define B3_DEBUG_SAT_FACE //#define B3_DEBUG_SAT_FACE
#define CHECK_ON_HOST //#define CHECK_ON_HOST
#ifdef CHECK_ON_HOST #ifdef CHECK_ON_HOST
//#define PERSISTENT_CONTACTS_HOST //#define PERSISTENT_CONTACTS_HOST
@ -85,6 +85,7 @@ m_totalContactsOut(m_context, m_queue),
m_sepNormals(m_context, m_queue), m_sepNormals(m_context, m_queue),
m_hasSeparatingNormals(m_context, m_queue), m_hasSeparatingNormals(m_context, m_queue),
m_concaveSepNormals(m_context, m_queue), m_concaveSepNormals(m_context, m_queue),
m_concaveHasSeparatingNormals(m_context,m_queue),
m_numConcavePairsOut(m_context, m_queue), m_numConcavePairsOut(m_context, m_queue),
m_gpuCompoundPairs(m_context, m_queue), m_gpuCompoundPairs(m_context, m_queue),
m_gpuCompoundSepNormals(m_context, m_queue), m_gpuCompoundSepNormals(m_context, m_queue),
@ -2990,7 +2991,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
int concaveCapacity=maxTriConvexPairCapacity; int concaveCapacity=maxTriConvexPairCapacity;
m_concaveSepNormals.resize(concaveCapacity); m_concaveSepNormals.resize(concaveCapacity);
m_concaveHasSeparatingNormals.resize(concaveCapacity);
m_numConcavePairsOut.resize(0); m_numConcavePairsOut.resize(0);
m_numConcavePairsOut.push_back(0); m_numConcavePairsOut.push_back(0);
@ -3039,192 +3040,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
clFinish(m_queue); clFinish(m_queue);
} }
//now perform the tree query on GPU
{
{
if (treeNodesGPU->size() && treeNodesGPU->size())
{
if (bvhTraversalKernelGPU)
{
B3_PROFILE("m_bvhTraversalKernel");
numConcavePairs = m_numConcavePairsOut.at(0);
b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel");
launcher.setBuffer( pairs->getBufferCL());
launcher.setBuffer( bodyBuf->getBufferCL());
launcher.setBuffer( gpuCollidables.getBufferCL());
launcher.setBuffer( clAabbsWorldSpace.getBufferCL());
launcher.setBuffer( triangleConvexPairsOut.getBufferCL());
launcher.setBuffer( m_numConcavePairsOut.getBufferCL());
launcher.setBuffer( subTreesGPU->getBufferCL());
launcher.setBuffer( treeNodesGPU->getBufferCL());
launcher.setBuffer( bvhInfo->getBufferCL());
launcher.setConst( nPairs );
launcher.setConst( maxTriConvexPairCapacity);
int num = nPairs;
launcher.launch1D( num);
clFinish(m_queue);
numConcavePairs = m_numConcavePairsOut.at(0);
} else
{
b3AlignedObjectArray<b3Int4> hostPairs;
pairs->copyToHost(hostPairs);
b3AlignedObjectArray<b3RigidBodyCL> hostBodyBuf;
bodyBuf->copyToHost(hostBodyBuf);
b3AlignedObjectArray<b3Collidable> hostCollidables;
gpuCollidables.copyToHost(hostCollidables);
b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
//int maxTriConvexPairCapacity,
b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
int numTriConvexPairsOutHost=0;
numConcavePairs = 0;
//m_numConcavePairsOut
b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
treeNodesGPU->copyToHost(treeNodesCPU);
b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
subTreesGPU->copyToHost(subTreesCPU);
b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
bvhInfo->copyToHost(bvhInfoCPU);
//compute it...
volatile int hostNumConcavePairsOut=0;
//
for (int i=0;i<nPairs;i++)
{
b3BvhTraversal( &hostPairs.at(0),
&hostBodyBuf.at(0),
&hostCollidables.at(0),
&hostAabbsWorldSpace.at(0),
&triangleConvexPairsOutHost.at(0),
&hostNumConcavePairsOut,
&subTreesCPU.at(0),
&treeNodesCPU.at(0),
&bvhInfoCPU.at(0),
nPairs,
maxTriConvexPairCapacity,
i);
}
numConcavePairs = hostNumConcavePairsOut;
if (hostNumConcavePairsOut)
{
triangleConvexPairsOutHost.resize(hostNumConcavePairsOut);
triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost);
}
//
m_numConcavePairsOut.resize(0);
m_numConcavePairsOut.push_back(numConcavePairs);
}
//printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity);
if (numConcavePairs > maxTriConvexPairCapacity)
{
static int exceeded_maxTriConvexPairCapacity_count = 0;
b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n",
numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++);
numConcavePairs = maxTriConvexPairCapacity;
}
triangleConvexPairsOut.resize(numConcavePairs);
if (numConcavePairs)
{
if (findConcaveSeparatingAxisKernelGPU)
{
//now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut)
B3_PROFILE("findConcaveSeparatingAxisKernel");
b3BufferInfoCL bInfo[] = {
b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ),
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
b3BufferInfoCL( convexData.getBufferCL(),true),
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
b3BufferInfoCL( gpuChildShapes.getBufferCL(),true),
b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true),
b3BufferInfoCL( m_concaveSepNormals.getBufferCL())
};
b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numConcavePairs );
int num = numConcavePairs;
launcher.launch1D( num);
clFinish(m_queue);
} else
{
b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
//triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
b3AlignedObjectArray<b3RigidBodyCL> hostBodyBuf;
bodyBuf->copyToHost(hostBodyBuf);
b3AlignedObjectArray<b3Collidable> hostCollidables;
gpuCollidables.copyToHost(hostCollidables);
b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
b3AlignedObjectArray<b3ConvexPolyhedronCL> hostConvexData;
convexData.copyToHost(hostConvexData);
b3AlignedObjectArray<b3Vector3> hostVertices;
gpuVertices.copyToHost(hostVertices);
b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
gpuUniqueEdges.copyToHost(hostUniqueEdges);
b3AlignedObjectArray<b3GpuFace> hostFaces;
gpuFaces.copyToHost(hostFaces);
b3AlignedObjectArray<int> hostIndices;
gpuIndices.copyToHost(hostIndices);
b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
gpuChildShapes.copyToHost(cpuChildShapes);
//numConcavePairs
//b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ),
//b3BufferInfoCL( bodyBuf->getBufferCL(),true),
//b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
// b3BufferInfoCL( convexData.getBufferCL(),true),
//b3BufferInfoCL( gpuVertices.getBufferCL(),true),
//b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
//b3BufferInfoCL( gpuFaces.getBufferCL(),true),
//b3BufferInfoCL( gpuIndices.getBufferCL(),true),
//b3BufferInfoCL( gpuChildShapes.getBufferCL(),true),
//b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true),
//b3BufferInfoCL( m_concaveSepNormals.getBufferCL())
b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
}
// b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals;
// m_concaveSepNormals.copyToHost(cpuCompoundSepNormals);
// b3AlignedObjectArray<b3Int4> cpuConcavePairs;
// triangleConvexPairsOut.copyToHost(cpuConcavePairs);
}
}
}
}
numCompoundPairs = m_numCompoundPairsOut.at(0); numCompoundPairs = m_numCompoundPairsOut.at(0);
bool useGpuFindCompoundPairs=true; bool useGpuFindCompoundPairs=true;
if (useGpuFindCompoundPairs) if (useGpuFindCompoundPairs)
@ -3442,9 +3259,253 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
} }
int vertexFaceCapacity = 64;
{
//now perform the tree query on GPU
if (treeNodesGPU->size() && treeNodesGPU->size())
{
if (bvhTraversalKernelGPU)
{
B3_PROFILE("m_bvhTraversalKernel");
numConcavePairs = m_numConcavePairsOut.at(0);
b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel");
launcher.setBuffer( pairs->getBufferCL());
launcher.setBuffer( bodyBuf->getBufferCL());
launcher.setBuffer( gpuCollidables.getBufferCL());
launcher.setBuffer( clAabbsWorldSpace.getBufferCL());
launcher.setBuffer( triangleConvexPairsOut.getBufferCL());
launcher.setBuffer( m_numConcavePairsOut.getBufferCL());
launcher.setBuffer( subTreesGPU->getBufferCL());
launcher.setBuffer( treeNodesGPU->getBufferCL());
launcher.setBuffer( bvhInfo->getBufferCL());
launcher.setConst( nPairs );
launcher.setConst( maxTriConvexPairCapacity);
int num = nPairs;
launcher.launch1D( num);
clFinish(m_queue);
numConcavePairs = m_numConcavePairsOut.at(0);
} else
{
b3AlignedObjectArray<b3Int4> hostPairs;
pairs->copyToHost(hostPairs);
b3AlignedObjectArray<b3RigidBodyCL> hostBodyBuf;
bodyBuf->copyToHost(hostBodyBuf);
b3AlignedObjectArray<b3Collidable> hostCollidables;
gpuCollidables.copyToHost(hostCollidables);
b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
//int maxTriConvexPairCapacity,
b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
int numTriConvexPairsOutHost=0;
numConcavePairs = 0;
//m_numConcavePairsOut
b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;
treeNodesGPU->copyToHost(treeNodesCPU);
b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;
subTreesGPU->copyToHost(subTreesCPU);
b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;
bvhInfo->copyToHost(bvhInfoCPU);
//compute it...
volatile int hostNumConcavePairsOut=0;
//
for (int i=0;i<nPairs;i++)
{
b3BvhTraversal( &hostPairs.at(0),
&hostBodyBuf.at(0),
&hostCollidables.at(0),
&hostAabbsWorldSpace.at(0),
&triangleConvexPairsOutHost.at(0),
&hostNumConcavePairsOut,
&subTreesCPU.at(0),
&treeNodesCPU.at(0),
&bvhInfoCPU.at(0),
nPairs,
maxTriConvexPairCapacity,
i);
}
numConcavePairs = hostNumConcavePairsOut;
if (hostNumConcavePairsOut)
{
triangleConvexPairsOutHost.resize(hostNumConcavePairsOut);
triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost);
}
//
m_numConcavePairsOut.resize(0);
m_numConcavePairsOut.push_back(numConcavePairs);
}
//printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity);
if (numConcavePairs > maxTriConvexPairCapacity)
{
static int exceeded_maxTriConvexPairCapacity_count = 0;
b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n",
numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++);
numConcavePairs = maxTriConvexPairCapacity;
}
triangleConvexPairsOut.resize(numConcavePairs);
if (numConcavePairs)
{
clippingFacesOutGPU.resize(numConcavePairs);
worldNormalsAGPU.resize(numConcavePairs);
worldVertsA1GPU.resize(vertexFaceCapacity*numConcavePairs);
worldVertsB1GPU.resize(vertexFaceCapacity*numConcavePairs);
if (findConcaveSeparatingAxisKernelGPU)
{
/*
m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);
clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
worldNormalsAGPU.copyFromHost(worldNormalsACPU);
worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
*/
//now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut)
B3_PROFILE("findConcaveSeparatingAxisKernel");
b3BufferInfoCL bInfo[] = {
b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ),
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
b3BufferInfoCL( gpuCollidables.getBufferCL(),true),
b3BufferInfoCL( convexData.getBufferCL(),true),
b3BufferInfoCL( gpuVertices.getBufferCL(),true),
b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true),
b3BufferInfoCL( gpuFaces.getBufferCL(),true),
b3BufferInfoCL( gpuIndices.getBufferCL(),true),
b3BufferInfoCL( gpuChildShapes.getBufferCL(),true),
b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true),
b3BufferInfoCL( m_concaveSepNormals.getBufferCL()),
b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()),
b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()),
b3BufferInfoCL( worldVertsA1GPU.getBufferCL()),
b3BufferInfoCL(worldNormalsAGPU.getBufferCL()),
b3BufferInfoCL(worldVertsB1GPU.getBufferCL())
};
b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(vertexFaceCapacity);
launcher.setConst( numConcavePairs );
int num = numConcavePairs;
launcher.launch1D( num);
clFinish(m_queue);
} else
{
b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;
b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;
b3AlignedObjectArray<b3Vector3> worldNormalsACPU;
b3AlignedObjectArray<b3Vector3> worldVertsB1CPU;
b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU;
b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;
triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);
//triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity);
b3AlignedObjectArray<b3RigidBodyCL> hostBodyBuf;
bodyBuf->copyToHost(hostBodyBuf);
b3AlignedObjectArray<b3Collidable> hostCollidables;
gpuCollidables.copyToHost(hostCollidables);
b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace;
clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace);
b3AlignedObjectArray<b3ConvexPolyhedronCL> hostConvexData;
convexData.copyToHost(hostConvexData);
b3AlignedObjectArray<b3Vector3> hostVertices;
gpuVertices.copyToHost(hostVertices);
b3AlignedObjectArray<b3Vector3> hostUniqueEdges;
gpuUniqueEdges.copyToHost(hostUniqueEdges);
b3AlignedObjectArray<b3GpuFace> hostFaces;
gpuFaces.copyToHost(hostFaces);
b3AlignedObjectArray<int> hostIndices;
gpuIndices.copyToHost(hostIndices);
b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;
gpuChildShapes.copyToHost(cpuChildShapes);
b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;
m_concaveSepNormals.copyToHost(concaveSepNormalsHost);
concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size());
b3GpuChildShape* childShapePointerCPU = 0;
if (cpuChildShapes.size())
childShapePointerCPU = &cpuChildShapes.at(0);
clippingFacesOutCPU.resize(clippingFacesOutGPU.size());
worldVertsA1CPU.resize(worldVertsA1GPU.size());
worldNormalsACPU.resize(worldNormalsAGPU.size());
worldVertsB1CPU.resize(worldVertsB1GPU.size());
for (int i=0;i<numConcavePairs;i++)
{
b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0),
&hostBodyBuf.at(0),
&hostCollidables.at(0),
&hostConvexData.at(0), &hostVertices.at(0),&hostUniqueEdges.at(0),
&hostFaces.at(0),&hostIndices.at(0),childShapePointerCPU,
&hostAabbsWorldSpace.at(0),
&concaveSepNormalsHost.at(0),
&clippingFacesOutCPU.at(0),
&worldVertsA1CPU.at(0),
&worldNormalsACPU.at(0),
&worldVertsB1CPU.at(0),
&concaveHasSeparatingNormalsCPU.at(0),
vertexFaceCapacity,
numConcavePairs,i);
};
m_concaveSepNormals.copyFromHost(concaveSepNormalsHost);
m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);
clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);
worldVertsA1GPU.copyFromHost(worldVertsA1CPU);
worldNormalsAGPU.copyFromHost(worldNormalsACPU);
worldVertsB1GPU.copyFromHost(worldVertsB1CPU);
}
// b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals;
// m_concaveSepNormals.copyToHost(cpuCompoundSepNormals);
// b3AlignedObjectArray<b3Int4> cpuConcavePairs;
// triangleConvexPairsOut.copyToHost(cpuConcavePairs);
}
}
}
if (numConcavePairs) if (numConcavePairs)
{ {
if (numConcavePairs) if (numConcavePairs)
@ -3494,19 +3555,103 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
if (contactClippingOnGpu) if (contactClippingOnGpu)
{ {
//B3_PROFILE("clipHullHullKernel");
m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true);
//concave-convex contact clipping //B3_PROFILE("clipHullHullKernel");
bool breakupConcaveConvexKernel = false;
#ifdef __APPLE__
//actually, some Apple OpenCL platform/device combinations work fine...
breakupConcaveConvexKernel = true;
#endif
//concave-convex contact clipping
if (numConcavePairs) if (numConcavePairs)
{ {
// printf("numConcavePairs = %d\n", numConcavePairs); // printf("numConcavePairs = %d\n", numConcavePairs);
// nContacts = m_totalContactsOut.at(0); // nContacts = m_totalContactsOut.at(0);
// printf("nContacts before = %d\n", nContacts); // printf("nContacts before = %d\n", nContacts);
if (breakupConcaveConvexKernel)
{
worldVertsB2GPU.resize(vertexFaceCapacity*numConcavePairs);
//clipFacesAndFindContacts
bool clipFacesAndFindContactsCPU = false;
if (clipFacesAndFindContactsCPU)
{
} else
{
if (1)
{
B3_PROFILE("clipFacesAndFindContacts");
//nContacts = m_totalContactsOut.at(0);
//int h = m_hasSeparatingNormals.at(0);
//int4 p = clippingFacesOutGPU.at(0);
b3BufferInfoCL bInfo[] = {
b3BufferInfoCL( m_concaveSepNormals.getBufferCL()),
b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()),
b3BufferInfoCL( contactOut->getBufferCL()),
b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()),
b3BufferInfoCL( worldVertsA1GPU.getBufferCL()),
b3BufferInfoCL( worldNormalsAGPU.getBufferCL()),
b3BufferInfoCL( worldVertsB1GPU.getBufferCL()),
b3BufferInfoCL( worldVertsB2GPU.getBufferCL()),
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
};
b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(vertexFaceCapacity);
launcher.setConst( numConcavePairs );
int debugMode = 0;
launcher.setConst( debugMode);
int num = numConcavePairs;
launcher.launch1D( num);
clFinish(m_queue);
//int bla = m_totalContactsOut.at(0);
}
}
//contactReduction
{
contactOut->reserve(nContacts+numConcavePairs);
{
B3_PROFILE("newContactReductionKernel");
b3BufferInfoCL bInfo[] =
{
b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ),
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
b3BufferInfoCL( m_concaveSepNormals.getBufferCL()),
b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()),
b3BufferInfoCL( contactOut->getBufferCL()),
b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()),
b3BufferInfoCL( worldVertsB2GPU.getBufferCL()),
b3BufferInfoCL( m_totalContactsOut.getBufferCL())
};
b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(vertexFaceCapacity);
launcher.setConst( numConcavePairs );
int num = numConcavePairs;
launcher.launch1D( num);
}
nContacts = m_totalContactsOut.at(0);
contactOut->resize(nContacts);
}
//re-use?
} else
{
B3_PROFILE("clipHullHullConcaveConvexKernel"); B3_PROFILE("clipHullHullConcaveConvexKernel");
nContacts = m_totalContactsOut.at(0); nContacts = m_totalContactsOut.at(0);
b3BufferInfoCL bInfo[] = { b3BufferInfoCL bInfo[] = {
@ -3533,6 +3678,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
contactOut->resize(nContacts); contactOut->resize(nContacts);
b3AlignedObjectArray<b3Contact4> cpuContacts; b3AlignedObjectArray<b3Contact4> cpuContacts;
contactOut->copyToHost(cpuContacts); contactOut->copyToHost(cpuContacts);
}
// printf("nContacts after = %d\n", nContacts); // printf("nContacts after = %d\n", nContacts);
} }
@ -3553,25 +3699,13 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
int vertexFaceCapacity = 64;
worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); worldVertsB1GPU.resize(vertexFaceCapacity*nPairs);
clippingFacesOutGPU.resize(nPairs); clippingFacesOutGPU.resize(nPairs);
worldNormalsAGPU.resize(nPairs); worldNormalsAGPU.resize(nPairs);
worldVertsA1GPU.resize(vertexFaceCapacity*nPairs); worldVertsA1GPU.resize(vertexFaceCapacity*nPairs);
worldVertsB2GPU.resize(vertexFaceCapacity*nPairs); worldVertsB2GPU.resize(vertexFaceCapacity*nPairs);
{ {
B3_PROFILE("findClippingFacesKernel"); B3_PROFILE("findClippingFacesKernel");
b3BufferInfoCL bInfo[] = { b3BufferInfoCL bInfo[] = {
@ -3608,13 +3742,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
///clip face B against face A, reduce contacts and append them to a global contact array ///clip face B against face A, reduce contacts and append them to a global contact array
if (1) if (1)
{ {
B3_PROFILE("clipFacesAndContactReductionKernel"); B3_PROFILE("clipFacesAndFindContacts");
//nContacts = m_totalContactsOut.at(0); //nContacts = m_totalContactsOut.at(0);
//int h = m_hasSeparatingNormals.at(0); //int h = m_hasSeparatingNormals.at(0);
//int4 p = clippingFacesOutGPU.at(0); //int4 p = clippingFacesOutGPU.at(0);
b3BufferInfoCL bInfo[] = { b3BufferInfoCL bInfo[] = {
b3BufferInfoCL( pairs->getBufferCL(), true ),
b3BufferInfoCL( bodyBuf->getBufferCL(),true),
b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_sepNormals.getBufferCL()),
b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()),
b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()),
@ -3633,22 +3765,10 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
launcher.setConst( nPairs ); launcher.setConst( nPairs );
int debugMode = 0; int debugMode = 0;
launcher.setConst( debugMode); launcher.setConst( debugMode);
/*
int serializationBytes = launcher.getSerializationBufferSize();
unsigned char* buf = (unsigned char*)malloc(serializationBytes+1);
int actualWritten = launcher.serializeArguments(buf,serializationBytes+1);
FILE* f = fopen("clipFacesAndContactReductionKernel.bin","wb");
fwrite(buf,actualWritten,1,f);
fclose(f);
free(buf);
printf("serializationBytes=%d, actualWritten=%d\n",serializationBytes,actualWritten);
*/
int num = nPairs; int num = nPairs;
launcher.launch1D( num); launcher.launch1D( num);
clFinish(m_queue); clFinish(m_queue);
{ {
// nContacts = m_totalContactsOut.at(0); // nContacts = m_totalContactsOut.at(0);
// printf("nContacts = %d\n",nContacts); // printf("nContacts = %d\n",nContacts);

View File

@ -52,6 +52,7 @@ struct GpuSatCollision
b3OpenCLArray<b3Vector3> m_sepNormals; b3OpenCLArray<b3Vector3> m_sepNormals;
b3OpenCLArray<int> m_hasSeparatingNormals; b3OpenCLArray<int> m_hasSeparatingNormals;
b3OpenCLArray<b3Vector3> m_concaveSepNormals; b3OpenCLArray<b3Vector3> m_concaveSepNormals;
b3OpenCLArray<int> m_concaveHasSeparatingNormals;
b3OpenCLArray<int> m_numConcavePairsOut; b3OpenCLArray<int> m_numConcavePairsOut;
b3OpenCLArray<b3CompoundOverlappingPair> m_gpuCompoundPairs; b3OpenCLArray<b3CompoundOverlappingPair> m_gpuCompoundPairs;
b3OpenCLArray<b3Vector3> m_gpuCompoundSepNormals; b3OpenCLArray<b3Vector3> m_gpuCompoundSepNormals;

View File

@ -29,32 +29,6 @@ static const char* bvhTraversalKernelCL= \
" int m_nodeOffset;\n" " int m_nodeOffset;\n"
" int m_subTreeOffset;\n" " int m_subTreeOffset;\n"
"} b3BvhInfo;\n" "} b3BvhInfo;\n"
"/*\n"
" bool isLeafNode() const\n"
" {\n"
" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n"
" return (m_escapeIndexOrTriangleIndex >= 0);\n"
" }\n"
" int getEscapeIndex() const\n"
" {\n"
" btAssert(!isLeafNode());\n"
" return -m_escapeIndexOrTriangleIndex;\n"
" }\n"
" int getTriangleIndex() const\n"
" {\n"
" btAssert(isLeafNode());\n"
" unsigned int x=0;\n"
" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n"
" // Get only the lower bits where the triangle index is stored\n"
" return (m_escapeIndexOrTriangleIndex&~(y));\n"
" }\n"
" int getPartId() const\n"
" {\n"
" btAssert(isLeafNode());\n"
" // Get only the highest bits where the part index is stored\n"
" return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS));\n"
" }\n"
"*/\n"
"int getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" "int getTriangleIndex(const btQuantizedBvhNode* rootNode)\n"
"{\n" "{\n"
" unsigned int x=0;\n" " unsigned int x=0;\n"

View File

@ -13,6 +13,7 @@ static const char* primitiveContactsKernelsCL= \
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n" "#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n" "#define b3Sin native_sin\n"
@ -36,6 +37,9 @@ static const char* primitiveContactsKernelsCL= \
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n" " return cross(a1, b1);\n"
" }\n" " }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n" "#endif \n"
" \n" " \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"

View File

@ -1353,6 +1353,97 @@ __kernel void findSeparatingAxisKernel( __global const int4* pairs,
int findClippingFaces(const float4 separatingNormal,
const ConvexPolyhedronCL* hullA,
__global const ConvexPolyhedronCL* hullB,
const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,
__global float4* worldVertsA1,
__global float4* worldNormalsA1,
__global float4* worldVertsB1,
int capacityWorldVerts,
const float minDist, float maxDist,
const float4* verticesA,
const btGpuFace* facesA,
const int* indicesA,
__global const float4* verticesB,
__global const btGpuFace* facesB,
__global const int* indicesB,
__global int4* clippingFaces, int pairIndex)
{
int numContactsOut = 0;
int numWorldVertsB1= 0;
int closestFaceB=-1;
float dmax = -FLT_MAX;
{
for(int face=0;face<hullB->m_numFaces;face++)
{
const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,
facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);
const float4 WorldNormal = qtRotate(ornB, Normal);
float d = dot3F4(WorldNormal,separatingNormal);
if (d > dmax)
{
dmax = d;
closestFaceB = face;
}
}
}
{
const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];
const int numVertices = polyB.m_numIndices;
for(int e0=0;e0<numVertices;e0++)
{
const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];
worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);
}
}
int closestFaceA=-1;
{
float dmin = FLT_MAX;
for(int face=0;face<hullA->m_numFaces;face++)
{
const float4 Normal = make_float4(
facesA[hullA->m_faceOffset+face].m_plane.x,
facesA[hullA->m_faceOffset+face].m_plane.y,
facesA[hullA->m_faceOffset+face].m_plane.z,
0.f);
const float4 faceANormalWS = qtRotate(ornA,Normal);
float d = dot3F4(faceANormalWS,separatingNormal);
if (d < dmin)
{
dmin = d;
closestFaceA = face;
worldNormalsA1[pairIndex] = faceANormalWS;
}
}
}
int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;
for(int e0=0;e0<numVerticesA;e0++)
{
const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];
worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);
}
clippingFaces[pairIndex].x = closestFaceA;
clippingFaces[pairIndex].y = closestFaceB;
clippingFaces[pairIndex].z = numVerticesA;
clippingFaces[pairIndex].w = numWorldVertsB1;
return numContactsOut;
}
// work-in-progress // work-in-progress
__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,
__global const BodyData* rigidBodies, __global const BodyData* rigidBodies,
@ -1365,6 +1456,12 @@ __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,
__global const btGpuChildShape* gpuChildShapes, __global const btGpuChildShape* gpuChildShapes,
__global btAabbCL* aabbs, __global btAabbCL* aabbs,
__global float4* concaveSeparatingNormalsOut, __global float4* concaveSeparatingNormalsOut,
__global int* concaveHasSeparatingNormals,
__global int4* clippingFacesOut,
__global float4* worldVertsA1GPU,
__global float4* worldNormalsAGPU,
__global float4* worldVertsB1GPU,
int vertexFaceCapacity,
int numConcavePairs int numConcavePairs
) )
{ {
@ -1372,6 +1469,9 @@ __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,
int i = get_global_id(0); int i = get_global_id(0);
if (i>=numConcavePairs) if (i>=numConcavePairs)
return; return;
concaveHasSeparatingNormals[i] = 0;
int pairIdx = i; int pairIdx = i;
int bodyIndexA = concavePairs[i].x; int bodyIndexA = concavePairs[i].x;
@ -1604,6 +1704,33 @@ __kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,
{ {
sepAxis.w = dmin; sepAxis.w = dmin;
concaveSeparatingNormalsOut[pairIdx]=sepAxis; concaveSeparatingNormalsOut[pairIdx]=sepAxis;
concaveHasSeparatingNormals[i]=1;
float minDist = -1e30f;
float maxDist = 0.02f;
findClippingFaces(sepAxis,
&convexPolyhedronA,
&convexShapes[shapeIndexB],
posA,ornA,
posB,ornB,
worldVertsA1GPU,
worldNormalsAGPU,
worldVertsB1GPU,
vertexFaceCapacity,
minDist, maxDist,
verticesA,
facesA,
indicesA,
vertices,
faces,
indices,
clippingFacesOut, pairIdx);
} else } else
{ {
//mark this pair as in-active //mark this pair as in-active

View File

@ -1669,9 +1669,7 @@ __kernel void findClippingFacesKernel( __global const int4* pairs,
__kernel void clipFacesAndFindContactsKernel( __global int4* pairs, __kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals,
__global const b3RigidBodyData_t* rigidBodies,
__global const float4* separatingNormals,
__global const int* hasSeparatingAxis, __global const int* hasSeparatingAxis,
__global struct b3Contact4Data* globalContactsOut, __global struct b3Contact4Data* globalContactsOut,
__global int4* clippingFacesOut, __global int4* clippingFacesOut,
@ -1698,8 +1696,8 @@ __kernel void clipFacesAndFindContactsKernel( __global int4* pairs,
if (hasSeparatingAxis[i]) if (hasSeparatingAxis[i])
{ {
int bodyIndexA = pairs[i].x; // int bodyIndexA = pairs[i].x;
int bodyIndexB = pairs[i].y; // int bodyIndexB = pairs[i].y;
int numLocalContactsOut = 0; int numLocalContactsOut = 0;

View File

@ -40,6 +40,7 @@ static const char* satClipKernelsCL= \
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n" "#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n" "#define b3Sin native_sin\n"
@ -63,6 +64,9 @@ static const char* satClipKernelsCL= \
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n" " return cross(a1, b1);\n"
" }\n" " }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n" "#endif \n"
" \n" " \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"
@ -1859,9 +1863,7 @@ static const char* satClipKernelsCL= \
" }// if (i<numPairs)\n" " }// if (i<numPairs)\n"
" \n" " \n"
"}\n" "}\n"
"__kernel void clipFacesAndFindContactsKernel( __global int4* pairs,\n" "__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals,\n"
" __global const b3RigidBodyData_t* rigidBodies,\n"
" __global const float4* separatingNormals,\n"
" __global const int* hasSeparatingAxis,\n" " __global const int* hasSeparatingAxis,\n"
" __global struct b3Contact4Data* globalContactsOut,\n" " __global struct b3Contact4Data* globalContactsOut,\n"
" __global int4* clippingFacesOut,\n" " __global int4* clippingFacesOut,\n"
@ -1888,8 +1890,8 @@ static const char* satClipKernelsCL= \
" if (hasSeparatingAxis[i])\n" " if (hasSeparatingAxis[i])\n"
" {\n" " {\n"
" \n" " \n"
" int bodyIndexA = pairs[i].x;\n" "// int bodyIndexA = pairs[i].x;\n"
" int bodyIndexB = pairs[i].y;\n" " // int bodyIndexB = pairs[i].y;\n"
" \n" " \n"
" int numLocalContactsOut = 0;\n" " int numLocalContactsOut = 0;\n"
" int capacityWorldVertsB2 = vertexFaceCapacity;\n" " int capacityWorldVertsB2 = vertexFaceCapacity;\n"

View File

@ -154,6 +154,7 @@ static const char* satKernelsCL= \
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n" "#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n" "#define b3Sin native_sin\n"
@ -177,6 +178,9 @@ static const char* satKernelsCL= \
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n" " return cross(a1, b1);\n"
" }\n" " }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n" "#endif \n"
" \n" " \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"
@ -633,7 +637,7 @@ static const char* satKernelsCL= \
" float4* sep,\n" " float4* sep,\n"
" float* dmin)\n" " float* dmin)\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " \n"
" float4 posA = posA1;\n" " float4 posA = posA1;\n"
" posA.w = 0.f;\n" " posA.w = 0.f;\n"
" float4 posB = posB1;\n" " float4 posB = posB1;\n"
@ -682,7 +686,6 @@ static const char* satKernelsCL= \
" float4* sep,\n" " float4* sep,\n"
" float* dmin)\n" " float* dmin)\n"
"{\n" "{\n"
" int i = get_global_id(0);\n"
" float4 posA = posA1;\n" " float4 posA = posA1;\n"
" posA.w = 0.f;\n" " posA.w = 0.f;\n"
" float4 posB = posB1;\n" " float4 posB = posB1;\n"
@ -731,7 +734,6 @@ static const char* satKernelsCL= \
" float4* sep,\n" " float4* sep,\n"
" float* dmin)\n" " float* dmin)\n"
"{\n" "{\n"
" int i = get_global_id(0);\n"
" float4 posA = posA1;\n" " float4 posA = posA1;\n"
" posA.w = 0.f;\n" " posA.w = 0.f;\n"
" float4 posB = posB1;\n" " float4 posB = posB1;\n"
@ -815,7 +817,7 @@ static const char* satKernelsCL= \
" float4* sep,\n" " float4* sep,\n"
" float* dmin)\n" " float* dmin)\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " \n"
" float4 posA = posA1;\n" " float4 posA = posA1;\n"
" posA.w = 0.f;\n" " posA.w = 0.f;\n"
" float4 posB = posB1;\n" " float4 posB = posB1;\n"
@ -866,7 +868,7 @@ static const char* satKernelsCL= \
" float4* sep,\n" " float4* sep,\n"
" float* dmin)\n" " float* dmin)\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " \n"
" float4 posA = posA1;\n" " float4 posA = posA1;\n"
" posA.w = 0.f;\n" " posA.w = 0.f;\n"
" float4 posB = posB1;\n" " float4 posB = posB1;\n"
@ -1470,6 +1472,92 @@ static const char* satKernelsCL= \
" \n" " \n"
" }\n" " }\n"
"}\n" "}\n"
"int findClippingFaces(const float4 separatingNormal,\n"
" const ConvexPolyhedronCL* hullA, \n"
" __global const ConvexPolyhedronCL* hullB,\n"
" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n"
" __global float4* worldVertsA1,\n"
" __global float4* worldNormalsA1,\n"
" __global float4* worldVertsB1,\n"
" int capacityWorldVerts,\n"
" const float minDist, float maxDist,\n"
" const float4* verticesA,\n"
" const btGpuFace* facesA,\n"
" const int* indicesA,\n"
" __global const float4* verticesB,\n"
" __global const btGpuFace* facesB,\n"
" __global const int* indicesB,\n"
" __global int4* clippingFaces, int pairIndex)\n"
"{\n"
" int numContactsOut = 0;\n"
" int numWorldVertsB1= 0;\n"
" \n"
" \n"
" int closestFaceB=-1;\n"
" float dmax = -FLT_MAX;\n"
" \n"
" {\n"
" for(int face=0;face<hullB->m_numFaces;face++)\n"
" {\n"
" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n"
" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n"
" const float4 WorldNormal = qtRotate(ornB, Normal);\n"
" float d = dot3F4(WorldNormal,separatingNormal);\n"
" if (d > dmax)\n"
" {\n"
" dmax = d;\n"
" closestFaceB = face;\n"
" }\n"
" }\n"
" }\n"
" \n"
" {\n"
" const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n"
" const int numVertices = polyB.m_numIndices;\n"
" for(int e0=0;e0<numVertices;e0++)\n"
" {\n"
" const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n"
" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n"
" }\n"
" }\n"
" \n"
" int closestFaceA=-1;\n"
" {\n"
" float dmin = FLT_MAX;\n"
" for(int face=0;face<hullA->m_numFaces;face++)\n"
" {\n"
" const float4 Normal = make_float4(\n"
" facesA[hullA->m_faceOffset+face].m_plane.x,\n"
" facesA[hullA->m_faceOffset+face].m_plane.y,\n"
" facesA[hullA->m_faceOffset+face].m_plane.z,\n"
" 0.f);\n"
" const float4 faceANormalWS = qtRotate(ornA,Normal);\n"
" \n"
" float d = dot3F4(faceANormalWS,separatingNormal);\n"
" if (d < dmin)\n"
" {\n"
" dmin = d;\n"
" closestFaceA = face;\n"
" worldNormalsA1[pairIndex] = faceANormalWS;\n"
" }\n"
" }\n"
" }\n"
" \n"
" int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n"
" for(int e0=0;e0<numVerticesA;e0++)\n"
" {\n"
" const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n"
" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n"
" }\n"
" \n"
" clippingFaces[pairIndex].x = closestFaceA;\n"
" clippingFaces[pairIndex].y = closestFaceB;\n"
" clippingFaces[pairIndex].z = numVerticesA;\n"
" clippingFaces[pairIndex].w = numWorldVertsB1;\n"
" \n"
" \n"
" return numContactsOut;\n"
"}\n"
"// work-in-progress\n" "// work-in-progress\n"
"__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" "__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n"
" __global const BodyData* rigidBodies,\n" " __global const BodyData* rigidBodies,\n"
@ -1482,12 +1570,19 @@ static const char* satKernelsCL= \
" __global const btGpuChildShape* gpuChildShapes,\n" " __global const btGpuChildShape* gpuChildShapes,\n"
" __global btAabbCL* aabbs,\n" " __global btAabbCL* aabbs,\n"
" __global float4* concaveSeparatingNormalsOut,\n" " __global float4* concaveSeparatingNormalsOut,\n"
" __global int* concaveHasSeparatingNormals,\n"
" __global int4* clippingFacesOut,\n"
" __global float4* worldVertsA1GPU,\n"
" __global float4* worldNormalsAGPU,\n"
" __global float4* worldVertsB1GPU,\n"
" int vertexFaceCapacity,\n"
" int numConcavePairs\n" " int numConcavePairs\n"
" )\n" " )\n"
"{\n" "{\n"
" int i = get_global_id(0);\n" " int i = get_global_id(0);\n"
" if (i>=numConcavePairs)\n" " if (i>=numConcavePairs)\n"
" return;\n" " return;\n"
" concaveHasSeparatingNormals[i] = 0;\n"
" int pairIdx = i;\n" " int pairIdx = i;\n"
" int bodyIndexA = concavePairs[i].x;\n" " int bodyIndexA = concavePairs[i].x;\n"
" int bodyIndexB = concavePairs[i].y;\n" " int bodyIndexB = concavePairs[i].y;\n"
@ -1691,6 +1786,27 @@ static const char* satKernelsCL= \
" {\n" " {\n"
" sepAxis.w = dmin;\n" " sepAxis.w = dmin;\n"
" concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" " concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n"
" concaveHasSeparatingNormals[i]=1;\n"
" float minDist = -1e30f;\n"
" float maxDist = 0.02f;\n"
" \n"
" findClippingFaces(sepAxis,\n"
" &convexPolyhedronA,\n"
" &convexShapes[shapeIndexB],\n"
" posA,ornA,\n"
" posB,ornB,\n"
" worldVertsA1GPU,\n"
" worldNormalsAGPU,\n"
" worldVertsB1GPU,\n"
" vertexFaceCapacity,\n"
" minDist, maxDist,\n"
" verticesA,\n"
" facesA,\n"
" indicesA,\n"
" vertices,\n"
" faces,\n"
" indices,\n"
" clippingFacesOut, pairIdx);\n"
" } else\n" " } else\n"
" { \n" " { \n"
" //mark this pair as in-active\n" " //mark this pair as in-active\n"

View File

@ -124,7 +124,7 @@ b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue,
{ {
cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH,false); cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH);
b3Assert(solveContactProg); b3Assert(solveContactProg);
cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH);

View File

@ -25,6 +25,7 @@ static const char* batchingKernelsCL= \
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n" "#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n" "#define b3Sin native_sin\n"
@ -48,6 +49,9 @@ static const char* batchingKernelsCL= \
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n" " return cross(a1, b1);\n"
" }\n" " }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n" "#endif \n"
" \n" " \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"

View File

@ -25,6 +25,7 @@ static const char* batchingKernelsNewCL= \
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n" "#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n" "#define b3Sin native_sin\n"
@ -48,6 +49,9 @@ static const char* batchingKernelsNewCL= \
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n" " return cross(a1, b1);\n"
" }\n" " }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n" "#endif \n"
" \n" " \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"

View File

@ -25,6 +25,7 @@ static const char* integrateKernelCL= \
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n" "#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n" "#define b3Sin native_sin\n"
@ -48,6 +49,9 @@ static const char* integrateKernelCL= \
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n" " return cross(a1, b1);\n"
" }\n" " }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n" "#endif \n"
" \n" " \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"

View File

@ -25,6 +25,7 @@ static const char* solverSetupCL= \
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n" "#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n" "#define b3Sin native_sin\n"
@ -48,6 +49,9 @@ static const char* solverSetupCL= \
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n" " return cross(a1, b1);\n"
" }\n" " }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n" "#endif \n"
" \n" " \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"

View File

@ -25,6 +25,7 @@ static const char* solverSetup2CL= \
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n" "#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n" "#define b3Sin native_sin\n"
@ -48,6 +49,9 @@ static const char* solverSetup2CL= \
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n" " return cross(a1, b1);\n"
" }\n" " }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n" "#endif \n"
" \n" " \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"

View File

@ -25,6 +25,7 @@ static const char* solverUtilsCL= \
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n" "#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n" "#define b3Sin native_sin\n"
@ -48,6 +49,9 @@ static const char* solverUtilsCL= \
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n" " return cross(a1, b1);\n"
" }\n" " }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n" "#endif \n"
" \n" " \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"

View File

@ -15,6 +15,7 @@ static const char* updateAabbsKernelCL= \
"#ifdef __cplusplus\n" "#ifdef __cplusplus\n"
"#else\n" "#else\n"
"#define b3AtomicInc atomic_inc\n" "#define b3AtomicInc atomic_inc\n"
"#define b3AtomicAdd atomic_add\n"
"#define b3Fabs fabs\n" "#define b3Fabs fabs\n"
"#define b3Sqrt native_sqrt\n" "#define b3Sqrt native_sqrt\n"
"#define b3Sin native_sin\n" "#define b3Sin native_sin\n"
@ -38,6 +39,9 @@ static const char* updateAabbsKernelCL= \
" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n"
" return cross(a1, b1);\n" " return cross(a1, b1);\n"
" }\n" " }\n"
" #define b3MinFloat4 min\n"
" #define b3MaxFloat4 max\n"
" #define b3Normalized(a) normalize(a)\n"
"#endif \n" "#endif \n"
" \n" " \n"
"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n"