mirror of
https://github.com/bulletphysics/bullet3
synced 2024-12-13 21:30:09 +00:00
Draft Parallel Linear BVH Broadphase.
This commit is contained in:
parent
fabdf8b4a9
commit
b7b7356af8
@ -9,6 +9,7 @@
|
||||
#include "OpenGLWindow/b3gWindowInterface.h"
|
||||
#include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h"
|
||||
#include "Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h"
|
||||
#include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h"
|
||||
|
||||
#include "../GpuDemoInternalData.h"
|
||||
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
|
||||
@ -108,6 +109,7 @@ static int curSelectedBroadphase = 0;
|
||||
static BroadphaseEntry allBroadphases[]=
|
||||
{
|
||||
{"Gpu Grid",b3GpuGridBroadphase::CreateFunc},
|
||||
{"Parallel Linear BVH",b3GpuParallelLinearBvhBroadphase::CreateFunc},
|
||||
{"CPU Brute Force",b3GpuSapBroadphase::CreateFuncBruteForceCpu},
|
||||
{"GPU Brute Force",b3GpuSapBroadphase::CreateFuncBruteForceGpu},
|
||||
{"GPU 1-SAP Original",b3GpuSapBroadphase::CreateFuncOriginal},
|
||||
|
@ -13,6 +13,7 @@ premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/Broadphas
|
||||
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFast.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFastKernels.h" --stringname="sapFastCL" stringify
|
||||
|
||||
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h" --stringname="gridBroadphaseCL" stringify
|
||||
premake4 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h" --stringname="parallelLinearBvhCL" stringify
|
||||
|
||||
|
||||
|
||||
|
@ -14,6 +14,7 @@ rem @echo off
|
||||
./premake4_linux64 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFast.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/sapFastKernels.h" --stringname="sapFastCL" stringify
|
||||
|
||||
./premake4_linux64 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h" --stringname="gridBroadphaseCL" stringify
|
||||
./premake4_linux64 --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl" --headerfile="../src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h" --stringname="parallelLinearBvhCL" stringify
|
||||
|
||||
|
||||
|
||||
|
416
src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h
Normal file
416
src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h
Normal file
@ -0,0 +1,416 @@
|
||||
/*
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Initial Author Jackson Lee, 2014
|
||||
|
||||
#ifndef B3_GPU_PARALLEL_LINEAR_BVH_H
|
||||
#define B3_GPU_PARALLEL_LINEAR_BVH_H
|
||||
|
||||
//#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h"
|
||||
#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
|
||||
#include "Bullet3Common/shared/b3Int2.h"
|
||||
#include "Bullet3Common/shared/b3Int4.h"
|
||||
|
||||
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h"
|
||||
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
|
||||
|
||||
#include "Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h"
|
||||
|
||||
#define B3_PLBVH_ROOT_NODE_MARKER -1 //Syncronize with parallelLinearBvh.cl
|
||||
|
||||
///@brief GPU Parallel Linearized Bounding Volume Heirarchy(LBVH) that is reconstructed every frame
|
||||
///@remarks
|
||||
///Main references: \n
|
||||
///"Fast BVH Construction on GPUs" [Lauterbach et al. 2009] \n
|
||||
///"Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d trees" [Karras 2012] \n
|
||||
///@par
|
||||
///The basic algorithm for building the BVH as presented in [Lauterbach et al. 2009] consists of 4 stages:
|
||||
/// - [fully parallel] Assign morton codes for each AABB using its center (after quantizing the AABB centers into a virtual grid)
|
||||
/// - [fully parallel] Sort morton codes
|
||||
/// - [somewhat parallel] Build radix binary tree (assign parent/child pointers for internal nodes of the BVH)
|
||||
/// - [somewhat parallel] Set internal node AABBs
|
||||
///@par
|
||||
///[Karras 2012] improves on the algorithm by introducing fully parallel methods for the last 2 stages.
|
||||
///The BVH implementation here is almost the same as [Karras 2012], but a different method is used for constructing the tree.
|
||||
/// - Instead of building a binary radix tree, we simply pair each node with its nearest sibling.
|
||||
/// This has the effect of further worsening the quality of the BVH, but the main spatial partitioning is done by the
|
||||
/// Z-curve anyways, and this method should be simpler and faster during construction. Additionally, it is still possible
|
||||
/// to improve the quality of the BVH by rearranging the connections between nodes.
|
||||
/// - Due to the way the tree is constructed, it becomes unnecessary to use atomic_inc to get
|
||||
/// the AABB for each internal node. Rather than traveling upwards from the leaf nodes, as in the paper,
|
||||
/// each internal node checks its child nodes to get its AABB.
|
||||
class b3GpuParallelLinearBvh
|
||||
{
|
||||
cl_command_queue m_queue;
|
||||
|
||||
cl_program m_parallelLinearBvhProgram;
|
||||
|
||||
cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
|
||||
cl_kernel m_constructBinaryTreeKernel;
|
||||
cl_kernel m_determineInternalNodeAabbsKernel;
|
||||
|
||||
cl_kernel m_plbvhCalculateOverlappingPairsKernel;
|
||||
|
||||
b3FillCL m_fill;
|
||||
b3RadixSort32CL m_radixSorter;
|
||||
|
||||
//1 element per level in the tree
|
||||
b3AlignedObjectArray<int> m_numNodesPerLevelCpu; //Level 0(m_numNodesPerLevelCpu[0]) is the root, last level contains the leaf nodes
|
||||
b3AlignedObjectArray<int> m_firstIndexOffsetPerLevelCpu; //Contains the index/offset of the first node in that level
|
||||
b3OpenCLArray<int> m_numNodesPerLevelGpu;
|
||||
b3OpenCLArray<int> m_firstIndexOffsetPerLevelGpu;
|
||||
|
||||
//1 element per internal node (number_of_internal_nodes = number_of_leaves - 1); index 0 is the root node
|
||||
b3OpenCLArray<b3SapAabb> m_internalNodeAabbs;
|
||||
b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child
|
||||
b3OpenCLArray<int> m_internalNodeParentNodes;
|
||||
|
||||
//1 element per leaf node
|
||||
b3OpenCLArray<int> m_leafNodeParentNodes;
|
||||
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index
|
||||
|
||||
public:
|
||||
b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) :
|
||||
m_queue(queue),
|
||||
m_fill(context, device, queue),
|
||||
m_radixSorter(context, device, queue),
|
||||
|
||||
m_numNodesPerLevelGpu(context, queue),
|
||||
m_firstIndexOffsetPerLevelGpu(context, queue),
|
||||
m_internalNodeAabbs(context, queue),
|
||||
m_internalNodeChildNodes(context, queue),
|
||||
m_internalNodeParentNodes(context, queue),
|
||||
m_leafNodeParentNodes(context, queue),
|
||||
m_mortonCodesAndAabbIndicies(context, queue)
|
||||
{
|
||||
const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl";
|
||||
|
||||
const char* kernelSource = parallelLinearBvhCL; //parallelLinearBvhCL.h
|
||||
cl_int error;
|
||||
char* additionalMacros = 0;
|
||||
m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH);
|
||||
b3Assert(m_parallelLinearBvhProgram);
|
||||
|
||||
m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_assignMortonCodesAndAabbIndiciesKernel);
|
||||
m_constructBinaryTreeKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "constructBinaryTree", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_constructBinaryTreeKernel);
|
||||
m_determineInternalNodeAabbsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "determineInternalNodeAabbs", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_determineInternalNodeAabbsKernel);
|
||||
|
||||
m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros );
|
||||
b3Assert(m_plbvhCalculateOverlappingPairsKernel);
|
||||
}
|
||||
|
||||
virtual ~b3GpuParallelLinearBvh()
|
||||
{
|
||||
clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel);
|
||||
clReleaseKernel(m_constructBinaryTreeKernel);
|
||||
clReleaseKernel(m_determineInternalNodeAabbsKernel);
|
||||
|
||||
clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
|
||||
|
||||
clReleaseProgram(m_parallelLinearBvhProgram);
|
||||
}
|
||||
|
||||
// fix: need to handle/test case with 2 nodes
|
||||
|
||||
///@param cellsize A virtual grid of size 2^10^3 is used in the process of creating the BVH
|
||||
void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, b3Scalar cellSize)
|
||||
{
|
||||
B3_PROFILE("b3ParallelLinearBvh::build()");
|
||||
|
||||
int numLeaves = worldSpaceAabbs.size(); //Number of leaves in the BVH == Number of rigid body AABBs
|
||||
int numInternalNodes = numLeaves - 1;
|
||||
|
||||
if(numLeaves < 2) return;
|
||||
|
||||
//
|
||||
{
|
||||
m_internalNodeAabbs.resize(numInternalNodes);
|
||||
m_internalNodeChildNodes.resize(numInternalNodes);
|
||||
m_internalNodeParentNodes.resize(numInternalNodes);
|
||||
|
||||
m_leafNodeParentNodes.resize(numLeaves);
|
||||
m_mortonCodesAndAabbIndicies.resize(numLeaves);
|
||||
}
|
||||
|
||||
//Determine number of levels in the binary tree( numLevels = ceil( log2(numLeaves) ) )
|
||||
//The number of levels is equivalent to the number of bits needed to uniquely identify each node(including both internal and leaf nodes)
|
||||
int numLevels = 0;
|
||||
{
|
||||
//Find the most significant bit(msb)
|
||||
int mostSignificantBit = 0;
|
||||
{
|
||||
int temp = numLeaves;
|
||||
while(temp >>= 1) mostSignificantBit++; //Start counting from 0 (0 and 1 have msb 0, 2 has msb 1)
|
||||
}
|
||||
numLevels = mostSignificantBit + 1;
|
||||
|
||||
//If the number of nodes is not a power of 2(as in, can be expressed as 2^N where N is an integer), then there is 1 additional level
|
||||
if( ~(1 << mostSignificantBit) & numLeaves ) numLevels++;
|
||||
|
||||
if(1) printf("numLeaves, numLevels, mostSignificantBit: %d, %d, %d \n", numLeaves, numLevels, mostSignificantBit);
|
||||
}
|
||||
|
||||
//Determine number of nodes per level, use prefix sum to get offsets of each level, and send to GPU
|
||||
{
|
||||
B3_PROFILE("Determine number of nodes per level");
|
||||
|
||||
m_numNodesPerLevelCpu.resize(numLevels);
|
||||
|
||||
//The last level contains the leaf nodes; number of leaves is already known
|
||||
if(numLevels - 1 >= 0) m_numNodesPerLevelCpu[numLevels - 1] = numLeaves;
|
||||
|
||||
//Calculate number of nodes in each level;
|
||||
//start from the second to last level(level right next to leaf nodes) and move towards the root(level 0)
|
||||
int hasRemainder = 0;
|
||||
for(int levelIndex = numLevels - 2; levelIndex >= 0; --levelIndex)
|
||||
{
|
||||
int numNodesPreviousLevel = m_numNodesPerLevelCpu[levelIndex + 1]; //For first iteration this == numLeaves
|
||||
|
||||
bool allNodesAllocated = ( (numNodesPreviousLevel + hasRemainder) % 2 == 0 );
|
||||
|
||||
int numNodesCurrentLevel = (allNodesAllocated) ? (numNodesPreviousLevel + hasRemainder) / 2 : numNodesPreviousLevel / 2;
|
||||
m_numNodesPerLevelCpu[levelIndex] = numNodesCurrentLevel;
|
||||
|
||||
hasRemainder = static_cast<int>(!allNodesAllocated);
|
||||
}
|
||||
|
||||
//Prefix sum to calculate the first index offset of each level
|
||||
{
|
||||
m_firstIndexOffsetPerLevelCpu = m_numNodesPerLevelCpu;
|
||||
|
||||
//Perform inclusive scan
|
||||
for(int i = 1; i < m_firstIndexOffsetPerLevelCpu.size(); ++i)
|
||||
m_firstIndexOffsetPerLevelCpu[i] += m_firstIndexOffsetPerLevelCpu[i - 1];
|
||||
|
||||
//Convert inclusive scan to exclusive scan to get the offsets
|
||||
//This is equivalent to shifting each element in m_firstIndexOffsetPerLevelCpu[] by 1 to the right,
|
||||
//and setting the first element to 0
|
||||
for(int i = 0; i < m_firstIndexOffsetPerLevelCpu.size(); ++i)
|
||||
m_firstIndexOffsetPerLevelCpu[i] -= m_numNodesPerLevelCpu[i];
|
||||
}
|
||||
|
||||
if(1)
|
||||
{
|
||||
int numInternalNodes = 0;
|
||||
for(int i = 0; i < numLevels; ++i)
|
||||
if(i < numLevels - 1) numInternalNodes += m_numNodesPerLevelCpu[i];
|
||||
printf("numInternalNodes: %d\n", numInternalNodes);
|
||||
|
||||
for(int i = 0; i < numLevels; ++i)
|
||||
printf("numNodes, offset[%d]: %d, %d \n", i, m_numNodesPerLevelCpu[i], m_firstIndexOffsetPerLevelCpu[i]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
//Copy to GPU
|
||||
m_numNodesPerLevelGpu.copyFromHost(m_numNodesPerLevelCpu, false);
|
||||
m_firstIndexOffsetPerLevelGpu.copyFromHost(m_firstIndexOffsetPerLevelCpu, false);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//Find the AABB of all input AABBs; this is used to define the size of
|
||||
//each cell in the virtual grid(2^10 cells in each dimension).
|
||||
{
|
||||
B3_PROFILE("Find AABB of merged nodes");
|
||||
|
||||
/*b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_allNodesMergedAabb.getBufferCL() ),
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_findAllNodesMergedAabb, "m_findAllNodesMergedAabb");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numLeaves);
|
||||
|
||||
launcher.launch1D(numLeaves);
|
||||
clFinish(m_queue);*/
|
||||
}
|
||||
|
||||
//Insert the center of the AABBs into a virtual grid,
|
||||
//then convert the discrete grid coordinates into a morton code
|
||||
//For each element in m_mortonCodesAndAabbIndicies, set
|
||||
// m_key == morton code (value to sort by)
|
||||
// m_value = AABB index
|
||||
{
|
||||
B3_PROFILE("Assign morton codes");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_assignMortonCodesAndAabbIndiciesKernel, "m_assignMortonCodesAndAabbIndiciesKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(cellSize);
|
||||
launcher.setConst(numLeaves);
|
||||
|
||||
launcher.launch1D(numLeaves);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//
|
||||
{
|
||||
B3_PROFILE("Sort leaves by morton codes");
|
||||
|
||||
m_radixSorter.execute(m_mortonCodesAndAabbIndicies);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//Optional; only element at m_internalNodeParentNodes[0], the root node, needs to be set here
|
||||
//as the parent indices of other nodes are overwritten during m_constructBinaryTreeKernel
|
||||
{
|
||||
B3_PROFILE("Reset parent node indices");
|
||||
|
||||
m_fill.execute( m_internalNodeParentNodes, B3_PLBVH_ROOT_NODE_MARKER, m_internalNodeParentNodes.size() );
|
||||
m_fill.execute( m_leafNodeParentNodes, B3_PLBVH_ROOT_NODE_MARKER, m_leafNodeParentNodes.size() );
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//Construct binary tree; find the children of each internal node, and assign parent nodes
|
||||
{
|
||||
B3_PROFILE("Construct binary tree");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_firstIndexOffsetPerLevelGpu.getBufferCL() ),
|
||||
b3BufferInfoCL( m_numNodesPerLevelGpu.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leafNodeParentNodes.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_constructBinaryTreeKernel, "m_constructBinaryTreeKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numLevels);
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
launcher.launch1D(numInternalNodes);
|
||||
clFinish(m_queue);
|
||||
|
||||
if(1)
|
||||
{
|
||||
static b3AlignedObjectArray<b3Int2> internalNodeChildNodes;
|
||||
m_internalNodeChildNodes.copyToHost(internalNodeChildNodes, false);
|
||||
clFinish(m_queue);
|
||||
|
||||
for(int i = 0; i < 256; ++i) printf("ch[%d]: %d, %d\n", i, internalNodeChildNodes[i].x, internalNodeChildNodes[i].y);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
//For each internal node, check children to get its AABB; start from the
|
||||
//last level and move towards the root
|
||||
{
|
||||
B3_PROFILE("Set AABBs");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_firstIndexOffsetPerLevelGpu.getBufferCL() ),
|
||||
b3BufferInfoCL( m_numNodesPerLevelGpu.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
|
||||
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_determineInternalNodeAabbsKernel, "m_determineInternalNodeAabbsKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numLevels);
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
launcher.launch1D(numLeaves);
|
||||
clFinish(m_queue);
|
||||
|
||||
if(0)
|
||||
{
|
||||
static b3AlignedObjectArray<b3SapAabb> rigidAabbs;
|
||||
worldSpaceAabbs.copyToHost(rigidAabbs, false);
|
||||
clFinish(m_queue);
|
||||
|
||||
b3SapAabb actualRootAabb;
|
||||
actualRootAabb.m_minVec = b3MakeVector3(B3_LARGE_FLOAT, B3_LARGE_FLOAT, B3_LARGE_FLOAT);
|
||||
actualRootAabb.m_maxVec = b3MakeVector3(-B3_LARGE_FLOAT, -B3_LARGE_FLOAT, -B3_LARGE_FLOAT);
|
||||
for(int i = 0; i < rigidAabbs.size(); ++i)
|
||||
{
|
||||
actualRootAabb.m_minVec.setMin(rigidAabbs[i].m_minVec);
|
||||
actualRootAabb.m_maxVec.setMax(rigidAabbs[i].m_maxVec);
|
||||
}
|
||||
printf("actualRootMin: %f, %f, %f \n", actualRootAabb.m_minVec.x, actualRootAabb.m_minVec.y, actualRootAabb.m_minVec.z);
|
||||
printf("actualRootMax: %f, %f, %f \n", actualRootAabb.m_maxVec.x, actualRootAabb.m_maxVec.y, actualRootAabb.m_maxVec.z);
|
||||
|
||||
b3SapAabb rootAabb = m_internalNodeAabbs.at(0);
|
||||
printf("rootMin: %f, %f, %f \n", rootAabb.m_minVec.x, rootAabb.m_minVec.y, rootAabb.m_minVec.z);
|
||||
printf("rootMax: %f, %f, %f \n", rootAabb.m_maxVec.x, rootAabb.m_maxVec.y, rootAabb.m_maxVec.z);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Max number of pairs is out_overlappingPairs.size()
|
||||
//If the number of overlapping pairs is < out_overlappingPairs.size(), the array is resized
|
||||
void calculateOverlappingPairs(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs,
|
||||
b3OpenCLArray<int>& out_numPairs, b3OpenCLArray<b3Int4>& out_overlappingPairs)
|
||||
{
|
||||
b3Assert( out_numPairs.size() == 1 );
|
||||
|
||||
int maxPairs = out_overlappingPairs.size();
|
||||
|
||||
int reset = 0;
|
||||
out_numPairs.copyFromHostPointer(&reset, 1);
|
||||
|
||||
{
|
||||
B3_PROFILE("PLBVH calculateOverlappingPairs");
|
||||
|
||||
int numQueryAabbs = worldSpaceAabbs.size();
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ),
|
||||
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
|
||||
|
||||
b3BufferInfoCL( out_numPairs.getBufferCL() ),
|
||||
b3BufferInfoCL( out_overlappingPairs.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_plbvhCalculateOverlappingPairsKernel, "m_plbvhCalculateOverlappingPairsKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(maxPairs);
|
||||
launcher.setConst(numQueryAabbs);
|
||||
|
||||
launcher.launch1D(numQueryAabbs);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
int numPairs = -1;
|
||||
out_numPairs.copyToHostPointer(&numPairs, 1);
|
||||
if(numPairs > maxPairs)
|
||||
{
|
||||
b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
|
||||
numPairs = maxPairs;
|
||||
out_numPairs.copyFromHostPointer(&maxPairs, 1);
|
||||
}
|
||||
|
||||
out_overlappingPairs.resize(numPairs);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,98 @@
|
||||
/*
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Initial Author Jackson Lee, 2014
|
||||
|
||||
#ifndef B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H
|
||||
#define B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H
|
||||
|
||||
#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
|
||||
|
||||
#include "b3GpuParallelLinearBvh.h"
|
||||
|
||||
class b3GpuParallelLinearBvhBroadphase : public b3GpuBroadphaseInterface
|
||||
{
|
||||
b3GpuParallelLinearBvh m_plbvh;
|
||||
|
||||
b3OpenCLArray<b3Int4> m_overlappingPairsGpu;
|
||||
b3OpenCLArray<b3SapAabb> m_aabbsGpu;
|
||||
b3OpenCLArray<int> m_tempNumPairs;
|
||||
|
||||
b3AlignedObjectArray<b3SapAabb> m_aabbsCpu;
|
||||
|
||||
public:
|
||||
b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue) :
|
||||
m_plbvh(context, device, queue),
|
||||
|
||||
m_overlappingPairsGpu(context, queue),
|
||||
m_aabbsGpu(context, queue),
|
||||
m_tempNumPairs(context, queue)
|
||||
{
|
||||
m_tempNumPairs.resize(1);
|
||||
}
|
||||
virtual ~b3GpuParallelLinearBvhBroadphase() {}
|
||||
|
||||
virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask)
|
||||
{
|
||||
b3SapAabb aabb;
|
||||
aabb.m_minVec = aabbMin;
|
||||
aabb.m_maxVec = aabbMax;
|
||||
aabb.m_minIndices[3] = userPtr;
|
||||
|
||||
m_aabbsCpu.push_back(aabb);
|
||||
}
|
||||
virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, short int collisionFilterGroup, short int collisionFilterMask)
|
||||
{
|
||||
b3Assert(0); //Not implemented
|
||||
}
|
||||
|
||||
virtual void calculateOverlappingPairs(int maxPairs)
|
||||
{
|
||||
//Detect overall min/max
|
||||
{
|
||||
//Not implemented
|
||||
}
|
||||
|
||||
//Reconstruct BVH
|
||||
const b3Scalar CELL_SIZE(0.1);
|
||||
m_plbvh.build(m_aabbsGpu, CELL_SIZE);
|
||||
|
||||
//
|
||||
m_overlappingPairsGpu.resize(maxPairs);
|
||||
m_plbvh.calculateOverlappingPairs(m_aabbsGpu, m_tempNumPairs, m_overlappingPairsGpu);
|
||||
}
|
||||
virtual void calculateOverlappingPairsHost(int maxPairs)
|
||||
{
|
||||
b3Assert(0); //CPU version not implemented
|
||||
}
|
||||
|
||||
//call writeAabbsToGpu after done making all changes (createProxy etc)
|
||||
virtual void writeAabbsToGpu() { m_aabbsGpu.copyFromHost(m_aabbsCpu); }
|
||||
|
||||
virtual int getNumOverlap() { return m_overlappingPairsGpu.size(); }
|
||||
virtual cl_mem getOverlappingPairBuffer() { return m_overlappingPairsGpu.getBufferCL(); }
|
||||
|
||||
virtual cl_mem getAabbBufferWS() { return m_aabbsGpu.getBufferCL(); }
|
||||
virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() { return m_aabbsGpu; }
|
||||
|
||||
virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()
|
||||
{
|
||||
b3Assert(0); //CPU version not implemented
|
||||
return m_aabbsCpu;
|
||||
}
|
||||
|
||||
static b3GpuBroadphaseInterface* CreateFunc(cl_context context, cl_device_id device, cl_command_queue queue)
|
||||
{
|
||||
return new b3GpuParallelLinearBvhBroadphase(context, device, queue);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,399 @@
|
||||
/*
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
//Initial Author Jackson Lee, 2014
|
||||
|
||||
typedef float b3Scalar;
|
||||
typedef float4 b3Vector3;
|
||||
#define b3Max max
|
||||
#define b3Min min
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned int m_key;
|
||||
unsigned int m_value;
|
||||
} SortDataCL;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
float4 m_min;
|
||||
float m_minElems[4];
|
||||
int m_minIndices[4];
|
||||
};
|
||||
union
|
||||
{
|
||||
float4 m_max;
|
||||
float m_maxElems[4];
|
||||
int m_maxIndices[4];
|
||||
};
|
||||
} b3AabbCL;
|
||||
|
||||
|
||||
unsigned int interleaveBits(unsigned int x)
|
||||
{
|
||||
//........ ........ ......12 3456789A //x
|
||||
//....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits
|
||||
|
||||
//........ ....1234 56789A12 3456789A //x |= (x << 10)
|
||||
//........ ....1111 1....... ...11111 //0x 00 0F 80 1F
|
||||
//........ ....1234 5....... ...6789A //x = ( x | (x << 10) ) & 0x000F801F;
|
||||
|
||||
//.......1 23451234 5.....67 89A6789A //x |= (x << 5)
|
||||
//.......1 1.....11 1.....11 .....111 //0x 01 83 83 07
|
||||
//.......1 2.....34 5.....67 .....89A //x = ( x | (x << 5) ) & 0x01838307;
|
||||
|
||||
//....12.1 2..34534 5..67.67 ..89A89A //x |= (x << 3)
|
||||
//....1... 1..1...1 1..1...1 ..1...11 //0x 08 91 91 23
|
||||
//....1... 2..3...4 5..6...7 ..8...9A //x = ( x | (x << 3) ) & 0x08919123;
|
||||
|
||||
//...11..2 2.33..4N 5.66..77 .88..9NA //x |= (x << 1) ( N indicates overlapping bits, first overlap is bit {4,5} second is {9,A} )
|
||||
//....1..1 ..1...1. 1..1..1. .1...1.1 //0x 09 22 92 45
|
||||
//....1..2 ..3...4. 5..6..7. .8...9.A //x = ( x | (x << 1) ) & 0x09229245;
|
||||
|
||||
//...11.22 .33..445 5.66.77. 88..99AA //x |= (x << 1)
|
||||
//....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 34 92 29
|
||||
//....1..2 ..3..4.. 5..6..7. .8..9..A //x = ( x | (x << 1) ) & 0x09349229;
|
||||
|
||||
//........ ........ ......11 11111111 //0x000003FF
|
||||
x &= 0x000003FF; //Clear all bits above bit 10
|
||||
|
||||
x = ( x | (x << 10) ) & 0x000F801F;
|
||||
x = ( x | (x << 5) ) & 0x01838307;
|
||||
x = ( x | (x << 3) ) & 0x08919123;
|
||||
x = ( x | (x << 1) ) & 0x09229245;
|
||||
x = ( x | (x << 1) ) & 0x09349229;
|
||||
|
||||
return x;
|
||||
}
|
||||
unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)
|
||||
{
|
||||
return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;
|
||||
}
|
||||
|
||||
/*
|
||||
__kernel void findAllNodesMergedAabb(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* out_mergedAabb, int numAabbs)
|
||||
{
|
||||
int aabbIndex = get_global_id(0);
|
||||
if(aabbIndex >= numAabbs) return;
|
||||
|
||||
//Find the most significant bit(msb)
|
||||
int mostSignificantBit = 0;
|
||||
{
|
||||
int temp = numLeaves;
|
||||
while(temp >>= 1) mostSignificantBit++; //Start counting from 0 (0 and 1 have msb 0, 2 has msb 1)
|
||||
}
|
||||
|
||||
int numberOfAabbsAboveMsbSplit = numAabbs & ~( ~(0) << mostSignificantBit ); // verify
|
||||
int numRemainingAabbs = (1 << mostSignificantBit);
|
||||
|
||||
//Merge AABBs above most significant bit so that the number of remaining AABBs is a power of 2
|
||||
//For example, if there are 159 AABBs = 128 + 31, then merge indices [0, 30] and 128 + [0, 30]
|
||||
if(aabbIndex < numberOfAabbsAboveMsbSplit)
|
||||
{
|
||||
int otherAabbIndex = numRemainingAabbs + aabbIndex;
|
||||
|
||||
b3AabbCL aabb = worldSpaceAabbs[aabbIndex];
|
||||
b3AabbCL otherAabb = worldSpaceAabbs[otherAabbIndex];
|
||||
|
||||
b3AabbCL mergedAabb;
|
||||
mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
|
||||
mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);
|
||||
out_mergedAabb[aabbIndex] = mergedAabb;
|
||||
}
|
||||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
|
||||
//
|
||||
int offset = numRemainingAabbs / 2;
|
||||
while(offset >= 1)
|
||||
{
|
||||
if(aabbIndex < offset)
|
||||
{
|
||||
int otherAabbIndex = aabbIndex + offset;
|
||||
|
||||
b3AabbCL aabb = worldSpaceAabbs[aabbIndex];
|
||||
b3AabbCL otherAabb = worldSpaceAabbs[otherAabbIndex];
|
||||
|
||||
b3AabbCL mergedAabb;
|
||||
mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
|
||||
mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);
|
||||
out_mergedAabb[aabbIndex] = mergedAabb;
|
||||
}
|
||||
|
||||
offset = offset / 2;
|
||||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs,
|
||||
__global SortDataCL* out_mortonCodesAndAabbIndices,
|
||||
b3Scalar cellSize, int numAabbs)
|
||||
{
|
||||
int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index
|
||||
if(leafNodeIndex >= numAabbs) return;
|
||||
|
||||
b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];
|
||||
|
||||
b3Vector3 center = (aabb.m_min + aabb.m_max) * 0.5f;
|
||||
|
||||
//Quantize into integer coordinates
|
||||
//floor() is needed to prevent the center cell, at (0,0,0) from being twice the size
|
||||
b3Vector3 gridPosition = center / cellSize;
|
||||
|
||||
int4 discretePosition;
|
||||
discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );
|
||||
discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );
|
||||
discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );
|
||||
|
||||
//Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]
|
||||
discretePosition = b3Max( -512, b3Min(discretePosition, 511) );
|
||||
discretePosition += 512;
|
||||
|
||||
//Interleave bits(assign a morton code, also known as a z-curve)
|
||||
unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);
|
||||
|
||||
//
|
||||
SortDataCL mortonCodeIndexPair;
|
||||
mortonCodeIndexPair.m_key = mortonCode;
|
||||
mortonCodeIndexPair.m_value = leafNodeIndex;
|
||||
|
||||
out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;
|
||||
}
|
||||
|
||||
#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128
|
||||
#define B3_PLBVH_ROOT_NODE_MARKER -1 //Used to indicate that the (root) node has no parent
|
||||
#define B3_PLBVH_ROOT_NODE_INDEX 0
|
||||
|
||||
//For elements of internalNodeChildIndices(int2), the negative bit determines whether it is a leaf or internal node.
|
||||
//Positive index == leaf node, while negative index == internal node (remove negative sign to get index).
|
||||
//
|
||||
//Since the root internal node is at index 0, no internal nodes should reference it as a child,
|
||||
//and so index 0 is always used to indicate a leaf node.
|
||||
int isLeafNode(int index) { return (index >= 0); }
|
||||
int getIndexWithInternalNodeMarkerRemoved(int index) { return (index >= 0) ? index : -index; }
|
||||
int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : -index; }
|
||||
|
||||
__kernel void constructBinaryTree(__global int* firstIndexOffsetPerLevel,
|
||||
__global int* numNodesPerLevel,
|
||||
__global int2* out_internalNodeChildIndices,
|
||||
__global int* out_internalNodeParentNodes,
|
||||
__global int* out_leafNodeParentNodes,
|
||||
int numLevels, int numInternalNodes)
|
||||
{
|
||||
int internalNodeIndex = get_global_id(0);
|
||||
if(internalNodeIndex >= numInternalNodes) return;
|
||||
|
||||
//Find the level that this node is in, using linear search(could replace with binary search)
|
||||
int level = 0;
|
||||
int numInternalLevels = numLevels - 1; //All levels except the last are internal nodes
|
||||
for(; level < numInternalLevels; ++level)
|
||||
{
|
||||
if( firstIndexOffsetPerLevel[level] <= internalNodeIndex && internalNodeIndex < firstIndexOffsetPerLevel[level + 1]) break;
|
||||
}
|
||||
|
||||
//Check lower levels to find child nodes
|
||||
//Left child is always in the next level, but the same does not apply to the right child
|
||||
int indexInLevel = internalNodeIndex - firstIndexOffsetPerLevel[level];
|
||||
int firstIndexInNextLevel = firstIndexOffsetPerLevel[level + 1]; //Should never be out of bounds(see for loop above)
|
||||
|
||||
int leftChildLevel = level + 1;
|
||||
int leftChildIndex = firstIndexInNextLevel + indexInLevel * 2;
|
||||
|
||||
int rightChildLevel = level + 1;
|
||||
int rightChildIndex = leftChildIndex + 1;
|
||||
|
||||
//Under certain conditions, the right child index as calculated above is invalid; need to find the correct index
|
||||
//
|
||||
//First condition: must be at least 2 levels apart from the leaf node level;
|
||||
//if the current level is right next to the leaf node level, then the right child
|
||||
//will never be invalid due to the way the nodes are allocated (also avoid a out-of-bounds memory access)
|
||||
//
|
||||
//Second condition: not enough nodes in the next level for each parent to have 2 children, so the right child is invalid
|
||||
//
|
||||
//Third condition: must be the last node in its level
|
||||
if( level < numLevels - 2
|
||||
&& numNodesPerLevel[level] * 2 > numNodesPerLevel[level + 1]
|
||||
&& indexInLevel == numNodesPerLevel[level] - 1 )
|
||||
{
|
||||
//Check lower levels until we find a node without a parent
|
||||
for(; rightChildLevel < numLevels - 1; ++rightChildLevel)
|
||||
{
|
||||
int rightChildNextLevel = rightChildLevel + 1;
|
||||
|
||||
//If this branch is taken, it means that the last node in rightChildNextLevel has no parent
|
||||
if( numNodesPerLevel[rightChildLevel] * 2 < numNodesPerLevel[rightChildNextLevel] )
|
||||
{
|
||||
//Set the node to the last node in rightChildNextLevel
|
||||
rightChildLevel = rightChildNextLevel;
|
||||
rightChildIndex = firstIndexOffsetPerLevel[rightChildNextLevel] + numNodesPerLevel[rightChildNextLevel] - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int isLeftChildLeaf = (leftChildLevel >= numLevels - 1);
|
||||
int isRightChildLeaf = (rightChildLevel >= numLevels - 1);
|
||||
|
||||
//If left/right child is a leaf node, the index needs to be corrected
|
||||
//the way the index is calculated assumes that the leaf and internal nodes are in a contiguous array,
|
||||
//with leaf nodes at the end of the array; in actuality, the leaf and internal nodes are in separate arrays
|
||||
{
|
||||
int leafNodeLevel = numLevels - 1;
|
||||
leftChildIndex = (isLeftChildLeaf) ? leftChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : leftChildIndex;
|
||||
rightChildIndex = (isLeftChildLeaf) ? rightChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : rightChildIndex;
|
||||
}
|
||||
|
||||
//Set the negative sign bit if the node is internal
|
||||
int2 childIndices;
|
||||
childIndices.x = getIndexWithInternalNodeMarkerSet(isLeftChildLeaf, leftChildIndex);
|
||||
childIndices.y = getIndexWithInternalNodeMarkerSet(isRightChildLeaf, rightChildIndex);
|
||||
out_internalNodeChildIndices[internalNodeIndex] = childIndices;
|
||||
|
||||
//Assign parent node index to children
|
||||
__global int* out_leftChildParentNodeIndices = (isLeftChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;
|
||||
out_leftChildParentNodeIndices[leftChildIndex] = internalNodeIndex;
|
||||
|
||||
__global int* out_rightChildParentNodeIndices = (isRightChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;
|
||||
out_rightChildParentNodeIndices[rightChildIndex] = internalNodeIndex;
|
||||
}
|
||||
|
||||
__kernel void determineInternalNodeAabbs(__global int* firstIndexOffsetPerLevel,
|
||||
__global int* numNodesPerLevel,
|
||||
__global int2* internalNodeChildIndices,
|
||||
__global SortDataCL* mortonCodesAndAabbIndices,
|
||||
__global b3AabbCL* leafNodeAabbs,
|
||||
__global b3AabbCL* out_internalNodeAabbs, int numLevels, int numInternalNodes)
|
||||
{
|
||||
int i = get_global_id(0);
|
||||
if(i >= numInternalNodes) return;
|
||||
|
||||
int numInternalLevels = numLevels - 1;
|
||||
|
||||
//Starting from the level next to the leaf nodes, move towards the root(level 0)
|
||||
for(int level = numInternalLevels - 1; level >= 0; --level)
|
||||
{
|
||||
int indexInLevel = i; //Index relative to firstIndexOffsetPerLevel[level]
|
||||
|
||||
int numNodesInLevel = numNodesPerLevel[level];
|
||||
if(i < numNodesInLevel)
|
||||
{
|
||||
int internalNodeIndexGlobal = indexInLevel + firstIndexOffsetPerLevel[level];
|
||||
int2 childIndicies = internalNodeChildIndices[internalNodeIndexGlobal];
|
||||
|
||||
int leftChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.x);
|
||||
int rightChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.y);
|
||||
|
||||
int isLeftChildLeaf = isLeafNode(childIndicies.x);
|
||||
int isRightChildLeaf = isLeafNode(childIndicies.y);
|
||||
|
||||
int leftChildLeafIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;
|
||||
int rightChildLeafIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;
|
||||
|
||||
b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftChildLeafIndex] : out_internalNodeAabbs[leftChildIndex];
|
||||
b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightChildLeafIndex] : out_internalNodeAabbs[rightChildIndex];
|
||||
|
||||
b3AabbCL internalNodeAabb;
|
||||
internalNodeAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);
|
||||
internalNodeAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);
|
||||
out_internalNodeAabbs[internalNodeIndexGlobal] = internalNodeAabb;
|
||||
}
|
||||
|
||||
barrier(CLK_GLOBAL_MEM_FENCE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//From sap.cl
|
||||
#define NEW_PAIR_MARKER -1
|
||||
|
||||
bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)
|
||||
{
|
||||
bool overlap = true;
|
||||
overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;
|
||||
overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;
|
||||
overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;
|
||||
return overlap;
|
||||
}
|
||||
//From sap.cl
|
||||
|
||||
__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,
|
||||
__global int2* internalNodeChildIndices, __global b3AabbCL* internalNodeAabbs,
|
||||
__global SortDataCL* mortonCodesAndAabbIndices,
|
||||
__global int* out_numPairs, __global int4* out_overlappingPairs,
|
||||
int maxPairs, int numQueryAabbs)
|
||||
{
|
||||
#define USE_SPATIALLY_COHERENT_INDICIES //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve
|
||||
#ifdef USE_SPATIALLY_COHERENT_INDICIES
|
||||
int queryRigidIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
|
||||
if(queryRigidIndex >= numQueryAabbs) return;
|
||||
|
||||
queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;
|
||||
#else
|
||||
int queryRigidIndex = get_global_id(0);
|
||||
if(queryRigidIndex >= numQueryAabbs) return;
|
||||
#endif
|
||||
|
||||
b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];
|
||||
|
||||
int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];
|
||||
|
||||
//Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop)
|
||||
int stackSize = 2;
|
||||
stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x;
|
||||
stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y;
|
||||
|
||||
while(stackSize)
|
||||
{
|
||||
int internalOrLeafNodeIndex = stack[ stackSize - 1 ];
|
||||
--stackSize;
|
||||
|
||||
int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false
|
||||
int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);
|
||||
|
||||
//bvhRigidIndex is not used if internal node
|
||||
int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;
|
||||
|
||||
b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];
|
||||
if( queryRigidIndex != bvhRigidIndex && TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )
|
||||
{
|
||||
if(isLeaf && rigidAabbs[queryRigidIndex].m_minIndices[3] < rigidAabbs[bvhRigidIndex].m_minIndices[3])
|
||||
{
|
||||
int4 pair;
|
||||
pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];
|
||||
pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];
|
||||
pair.z = NEW_PAIR_MARKER;
|
||||
pair.w = NEW_PAIR_MARKER;
|
||||
|
||||
int pairIndex = atomic_inc(out_numPairs);
|
||||
if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;
|
||||
}
|
||||
|
||||
if(!isLeaf) //Internal node
|
||||
{
|
||||
if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)
|
||||
{
|
||||
//Error
|
||||
}
|
||||
else
|
||||
{
|
||||
stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;
|
||||
stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,325 @@
|
||||
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
|
||||
static const char* parallelLinearBvhCL= \
|
||||
"/*\n"
|
||||
"This software is provided 'as-is', without any express or implied warranty.\n"
|
||||
"In no event will the authors be held liable for any damages arising from the use of this software.\n"
|
||||
"Permission is granted to anyone to use this software for any purpose,\n"
|
||||
"including commercial applications, and to alter it and redistribute it freely,\n"
|
||||
"subject to the following restrictions:\n"
|
||||
"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
|
||||
"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
|
||||
"3. This notice may not be removed or altered from any source distribution.\n"
|
||||
"*/\n"
|
||||
"//Initial Author Jackson Lee, 2014\n"
|
||||
"typedef float b3Scalar;\n"
|
||||
"typedef float4 b3Vector3;\n"
|
||||
"#define b3Max max\n"
|
||||
"#define b3Min min\n"
|
||||
"typedef struct\n"
|
||||
"{\n"
|
||||
" unsigned int m_key;\n"
|
||||
" unsigned int m_value;\n"
|
||||
"} SortDataCL;\n"
|
||||
"typedef struct \n"
|
||||
"{\n"
|
||||
" union\n"
|
||||
" {\n"
|
||||
" float4 m_min;\n"
|
||||
" float m_minElems[4];\n"
|
||||
" int m_minIndices[4];\n"
|
||||
" };\n"
|
||||
" union\n"
|
||||
" {\n"
|
||||
" float4 m_max;\n"
|
||||
" float m_maxElems[4];\n"
|
||||
" int m_maxIndices[4];\n"
|
||||
" };\n"
|
||||
"} b3AabbCL;\n"
|
||||
"unsigned int interleaveBits(unsigned int x)\n"
|
||||
"{\n"
|
||||
" //........ ........ ......12 3456789A //x\n"
|
||||
" //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits\n"
|
||||
" \n"
|
||||
" //........ ....1234 56789A12 3456789A //x |= (x << 10)\n"
|
||||
" //........ ....1111 1....... ...11111 //0x 00 0F 80 1F\n"
|
||||
" //........ ....1234 5....... ...6789A //x = ( x | (x << 10) ) & 0x000F801F; \n"
|
||||
" \n"
|
||||
" //.......1 23451234 5.....67 89A6789A //x |= (x << 5)\n"
|
||||
" //.......1 1.....11 1.....11 .....111 //0x 01 83 83 07\n"
|
||||
" //.......1 2.....34 5.....67 .....89A //x = ( x | (x << 5) ) & 0x01838307;\n"
|
||||
" \n"
|
||||
" //....12.1 2..34534 5..67.67 ..89A89A //x |= (x << 3)\n"
|
||||
" //....1... 1..1...1 1..1...1 ..1...11 //0x 08 91 91 23\n"
|
||||
" //....1... 2..3...4 5..6...7 ..8...9A //x = ( x | (x << 3) ) & 0x08919123;\n"
|
||||
" \n"
|
||||
" //...11..2 2.33..4N 5.66..77 .88..9NA //x |= (x << 1) ( N indicates overlapping bits, first overlap is bit {4,5} second is {9,A} )\n"
|
||||
" //....1..1 ..1...1. 1..1..1. .1...1.1 //0x 09 22 92 45\n"
|
||||
" //....1..2 ..3...4. 5..6..7. .8...9.A //x = ( x | (x << 1) ) & 0x09229245;\n"
|
||||
" \n"
|
||||
" //...11.22 .33..445 5.66.77. 88..99AA //x |= (x << 1)\n"
|
||||
" //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 34 92 29\n"
|
||||
" //....1..2 ..3..4.. 5..6..7. .8..9..A //x = ( x | (x << 1) ) & 0x09349229;\n"
|
||||
" \n"
|
||||
" //........ ........ ......11 11111111 //0x000003FF\n"
|
||||
" x &= 0x000003FF; //Clear all bits above bit 10\n"
|
||||
" \n"
|
||||
" x = ( x | (x << 10) ) & 0x000F801F;\n"
|
||||
" x = ( x | (x << 5) ) & 0x01838307;\n"
|
||||
" x = ( x | (x << 3) ) & 0x08919123;\n"
|
||||
" x = ( x | (x << 1) ) & 0x09229245;\n"
|
||||
" x = ( x | (x << 1) ) & 0x09349229;\n"
|
||||
" \n"
|
||||
" return x;\n"
|
||||
"}\n"
|
||||
"unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)\n"
|
||||
"{\n"
|
||||
" return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n"
|
||||
"}\n"
|
||||
"__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, \n"
|
||||
" __global SortDataCL* out_mortonCodesAndAabbIndices, \n"
|
||||
" b3Scalar cellSize, int numAabbs)\n"
|
||||
"{\n"
|
||||
" int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index\n"
|
||||
" if(leafNodeIndex >= numAabbs) return;\n"
|
||||
" \n"
|
||||
" b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];\n"
|
||||
" \n"
|
||||
" b3Vector3 center = (aabb.m_min + aabb.m_max) * 0.5f;\n"
|
||||
" \n"
|
||||
" //Quantize into integer coordinates\n"
|
||||
" //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size\n"
|
||||
" b3Vector3 gridPosition = center / cellSize;\n"
|
||||
" \n"
|
||||
" int4 discretePosition;\n"
|
||||
" discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );\n"
|
||||
" discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );\n"
|
||||
" discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );\n"
|
||||
" \n"
|
||||
" //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]\n"
|
||||
" discretePosition = b3Max( -512, b3Min(discretePosition, 511) );\n"
|
||||
" discretePosition += 512;\n"
|
||||
" \n"
|
||||
" //Interleave bits(assign a morton code, also known as a z-curve)\n"
|
||||
" unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);\n"
|
||||
" \n"
|
||||
" //\n"
|
||||
" SortDataCL mortonCodeIndexPair;\n"
|
||||
" mortonCodeIndexPair.m_key = mortonCode;\n"
|
||||
" mortonCodeIndexPair.m_value = leafNodeIndex;\n"
|
||||
" \n"
|
||||
" out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n"
|
||||
"}\n"
|
||||
"#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n"
|
||||
"#define B3_PLBVH_ROOT_NODE_MARKER -1 //Used to indicate that the node has no parent \n"
|
||||
"#define B3_PLBVH_ROOT_NODE_INDEX 0\n"
|
||||
"//For elements of internalNodeChildIndices(int2), the negative bit determines whether it is a leaf or internal node.\n"
|
||||
"//Positive index == leaf node, while negative index == internal node (remove negative sign to get index).\n"
|
||||
"//\n"
|
||||
"//Since the root internal node is at index 0, no internal nodes should reference it as a child,\n"
|
||||
"//and so index 0 is always used to indicate a leaf node.\n"
|
||||
"int isLeafNode(int index) { return (index >= 0); }\n"
|
||||
"int getIndexWithInternalNodeMarkerRemoved(int index) { return (index >= 0) ? index : -index; }\n"
|
||||
"int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : -index; }\n"
|
||||
"__kernel void constructBinaryTree(__global int* firstIndexOffsetPerLevel,\n"
|
||||
" __global int* numNodesPerLevel,\n"
|
||||
" __global int2* out_internalNodeChildIndices, \n"
|
||||
" __global int* out_internalNodeParentNodes, \n"
|
||||
" __global int* out_leafNodeParentNodes, \n"
|
||||
" int numLevels, int numInternalNodes)\n"
|
||||
"{\n"
|
||||
" int internalNodeIndex = get_global_id(0);\n"
|
||||
" if(internalNodeIndex >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" //Find the level that this node is in, using linear search(could replace with binary search)\n"
|
||||
" int level = 0;\n"
|
||||
" int numInternalLevels = numLevels - 1; //All levels except the last are internal nodes\n"
|
||||
" for(; level < numInternalLevels; ++level)\n"
|
||||
" {\n"
|
||||
" if( firstIndexOffsetPerLevel[level] <= internalNodeIndex && internalNodeIndex < firstIndexOffsetPerLevel[level + 1]) break;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" //Check lower levels to find child nodes\n"
|
||||
" //Left child is always in the next level, but the same does not apply to the right child\n"
|
||||
" int indexInLevel = internalNodeIndex - firstIndexOffsetPerLevel[level];\n"
|
||||
" int firstIndexInNextLevel = firstIndexOffsetPerLevel[level + 1]; //Should never be out of bounds(see for loop above)\n"
|
||||
" \n"
|
||||
" int leftChildLevel = level + 1;\n"
|
||||
" int leftChildIndex = firstIndexInNextLevel + indexInLevel * 2;\n"
|
||||
" \n"
|
||||
" int rightChildLevel = level + 1;\n"
|
||||
" int rightChildIndex = leftChildIndex + 1;\n"
|
||||
" \n"
|
||||
" //Under certain conditions, the right child index as calculated above is invalid; need to find the correct index\n"
|
||||
" //\n"
|
||||
" //First condition: must be at least 2 levels apart from the leaf node level;\n"
|
||||
" //if the current level is right next to the leaf node level, then the right child\n"
|
||||
" //will never be invalid due to the way the nodes are allocated (also avoid a out-of-bounds memory access)\n"
|
||||
" //\n"
|
||||
" //Second condition: not enough nodes in the next level for each parent to have 2 children, so the right child is invalid\n"
|
||||
" //\n"
|
||||
" //Third condition: must be the last node in its level\n"
|
||||
" if( level < numLevels - 2 \n"
|
||||
" && numNodesPerLevel[level] * 2 > numNodesPerLevel[level + 1] \n"
|
||||
" && indexInLevel == numNodesPerLevel[level] - 1 )\n"
|
||||
" {\n"
|
||||
" //Check lower levels until we find a node without a parent\n"
|
||||
" for(; rightChildLevel < numLevels - 1; ++rightChildLevel)\n"
|
||||
" {\n"
|
||||
" int rightChildNextLevel = rightChildLevel + 1;\n"
|
||||
" \n"
|
||||
" //If this branch is taken, it means that the last node in rightChildNextLevel has no parent\n"
|
||||
" if( numNodesPerLevel[rightChildLevel] * 2 < numNodesPerLevel[rightChildNextLevel] )\n"
|
||||
" {\n"
|
||||
" //Set the node to the last node in rightChildNextLevel\n"
|
||||
" rightChildLevel = rightChildNextLevel;\n"
|
||||
" rightChildIndex = firstIndexOffsetPerLevel[rightChildNextLevel] + numNodesPerLevel[rightChildNextLevel] - 1;\n"
|
||||
" break;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" int isLeftChildLeaf = (leftChildLevel >= numLevels - 1);\n"
|
||||
" int isRightChildLeaf = (rightChildLevel >= numLevels - 1);\n"
|
||||
" \n"
|
||||
" //If left/right child is a leaf node, the index needs to be corrected\n"
|
||||
" //the way the index is calculated assumes that the leaf and internal nodes are in a contiguous array,\n"
|
||||
" //with leaf nodes at the end of the array; in actuality, the leaf and internal nodes are in separate arrays\n"
|
||||
" {\n"
|
||||
" int leafNodeLevel = numLevels - 1;\n"
|
||||
" leftChildIndex = (isLeftChildLeaf) ? leftChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : leftChildIndex;\n"
|
||||
" rightChildIndex = (isLeftChildLeaf) ? rightChildIndex - firstIndexOffsetPerLevel[leafNodeLevel] : rightChildIndex;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" //Set the negative sign bit if the node is internal\n"
|
||||
" int2 childIndices;\n"
|
||||
" childIndices.x = getIndexWithInternalNodeMarkerSet(isLeftChildLeaf, leftChildIndex);\n"
|
||||
" childIndices.y = getIndexWithInternalNodeMarkerSet(isRightChildLeaf, rightChildIndex);\n"
|
||||
" out_internalNodeChildIndices[internalNodeIndex] = childIndices;\n"
|
||||
" \n"
|
||||
" //Assign parent node index to children\n"
|
||||
" __global int* out_leftChildParentNodeIndices = (isLeftChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;\n"
|
||||
" out_leftChildParentNodeIndices[leftChildIndex] = internalNodeIndex;\n"
|
||||
" \n"
|
||||
" __global int* out_rightChildParentNodeIndices = (isRightChildLeaf) ? out_leafNodeParentNodes : out_internalNodeParentNodes;\n"
|
||||
" out_rightChildParentNodeIndices[rightChildIndex] = internalNodeIndex;\n"
|
||||
"}\n"
|
||||
"__kernel void determineInternalNodeAabbs(__global int* firstIndexOffsetPerLevel,\n"
|
||||
" __global int* numNodesPerLevel, \n"
|
||||
" __global int2* internalNodeChildIndices,\n"
|
||||
" __global SortDataCL* mortonCodesAndAabbIndices,\n"
|
||||
" __global b3AabbCL* leafNodeAabbs, \n"
|
||||
" __global b3AabbCL* out_internalNodeAabbs, int numLevels, int numInternalNodes)\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" if(i >= numInternalNodes) return;\n"
|
||||
" \n"
|
||||
" int numInternalLevels = numLevels - 1;\n"
|
||||
" \n"
|
||||
" //Starting from the level next to the leaf nodes, move towards the root(level 0)\n"
|
||||
" for(int level = numInternalLevels - 1; level >= 0; --level)\n"
|
||||
" {\n"
|
||||
" int indexInLevel = i; //Index relative to firstIndexOffsetPerLevel[level]\n"
|
||||
" \n"
|
||||
" int numNodesInLevel = numNodesPerLevel[level];\n"
|
||||
" if(i < numNodesInLevel)\n"
|
||||
" {\n"
|
||||
" int internalNodeIndexGlobal = indexInLevel + firstIndexOffsetPerLevel[level];\n"
|
||||
" int2 childIndicies = internalNodeChildIndices[internalNodeIndexGlobal];\n"
|
||||
" \n"
|
||||
" int leftChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.x);\n"
|
||||
" int rightChildIndex = getIndexWithInternalNodeMarkerRemoved(childIndicies.y);\n"
|
||||
" \n"
|
||||
" int isLeftChildLeaf = isLeafNode(childIndicies.x);\n"
|
||||
" int isRightChildLeaf = isLeafNode(childIndicies.y);\n"
|
||||
" \n"
|
||||
" int leftChildLeafIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n"
|
||||
" int rightChildLeafIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n"
|
||||
" \n"
|
||||
" b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftChildLeafIndex] : out_internalNodeAabbs[leftChildIndex];\n"
|
||||
" b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightChildLeafIndex] : out_internalNodeAabbs[rightChildIndex];\n"
|
||||
" \n"
|
||||
" b3AabbCL internalNodeAabb;\n"
|
||||
" internalNodeAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n"
|
||||
" internalNodeAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n"
|
||||
" out_internalNodeAabbs[internalNodeIndexGlobal] = internalNodeAabb;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"//From sap.cl\n"
|
||||
"#define NEW_PAIR_MARKER -1\n"
|
||||
"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)\n"
|
||||
"{\n"
|
||||
" bool overlap = true;\n"
|
||||
" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
|
||||
" overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n"
|
||||
" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
|
||||
" return overlap;\n"
|
||||
"}\n"
|
||||
"//From sap.cl\n"
|
||||
"__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, \n"
|
||||
" __global int2* internalNodeChildIndices, __global b3AabbCL* internalNodeAabbs,\n"
|
||||
" __global SortDataCL* mortonCodesAndAabbIndices,\n"
|
||||
" __global int* out_numPairs, __global int4* out_overlappingPairs, \n"
|
||||
" int maxPairs, int numQueryAabbs)\n"
|
||||
"{\n"
|
||||
" int queryRigidIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
|
||||
" if(queryRigidIndex >= numQueryAabbs) return;\n"
|
||||
" \n"
|
||||
" queryRigidIndex = mortonCodesAndAabbIndices[queryRigidIndex].m_value;\n"
|
||||
" //int queryRigidIndex = get_global_id(0);\n"
|
||||
" //if(queryRigidIndex >= numQueryAabbs) return;\n"
|
||||
" \n"
|
||||
" b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n"
|
||||
" \n"
|
||||
" int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
|
||||
" \n"
|
||||
" //Starting by placing only the root node index, 0, in the stack causes it to be detected as a leaf node(see isLeafNode() in loop)\n"
|
||||
" int stackSize = 2;\n"
|
||||
" stack[0] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].x;\n"
|
||||
" stack[1] = internalNodeChildIndices[B3_PLBVH_ROOT_NODE_INDEX].y;\n"
|
||||
" \n"
|
||||
" while(stackSize)\n"
|
||||
" {\n"
|
||||
" int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n"
|
||||
" --stackSize;\n"
|
||||
" \n"
|
||||
" int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n"
|
||||
" int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n"
|
||||
" \n"
|
||||
" //bvhRigidIndex is not used if internal node\n"
|
||||
" int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n"
|
||||
" \n"
|
||||
" b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n"
|
||||
" if( queryRigidIndex != bvhRigidIndex && TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n"
|
||||
" {\n"
|
||||
" if(isLeaf && rigidAabbs[queryRigidIndex].m_minIndices[3] < rigidAabbs[bvhRigidIndex].m_minIndices[3])\n"
|
||||
" {\n"
|
||||
" int4 pair;\n"
|
||||
" pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];\n"
|
||||
" pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n"
|
||||
" pair.z = NEW_PAIR_MARKER;\n"
|
||||
" pair.w = NEW_PAIR_MARKER;\n"
|
||||
" \n"
|
||||
" int pairIndex = atomic_inc(out_numPairs);\n"
|
||||
" if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" if(!isLeaf) //Internal node\n"
|
||||
" {\n"
|
||||
" if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n"
|
||||
" {\n"
|
||||
" //Error\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n"
|
||||
" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
;
|
Loading…
Reference in New Issue
Block a user