Draft PLBVH construction using binary radix tree.

This commit is contained in:
Jackson Lee 2014-03-03 14:33:53 -08:00
parent 4dcd52c090
commit f19f853685
4 changed files with 564 additions and 6 deletions

View File

@ -31,6 +31,13 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id
m_internalNodeChildNodes(context, queue),
m_internalNodeParentNodes(context, queue),
m_maxCommonPrefix(context, queue),
m_commonPrefixes(context, queue),
m_leftInternalNodePointers(context, queue),
m_rightInternalNodePointers(context, queue),
m_internalNodeLeftChildNodes(context, queue),
m_internalNodeRightChildNodes(context, queue),
m_leafNodeParentNodes(context, queue),
m_mortonCodesAndAabbIndicies(context, queue),
m_mergedAabb(context, queue),
@ -39,6 +46,7 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id
m_largeAabbs(context, queue)
{
m_rootNodeIndex.resize(1);
m_maxCommonPrefix.resize(1);
//
const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl";
@ -61,6 +69,17 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id
m_determineInternalNodeAabbsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "determineInternalNodeAabbs", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_determineInternalNodeAabbsKernel);
m_computePrefixAndInitPointersKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "computePrefixAndInitPointers", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_computePrefixAndInitPointersKernel);
m_correctDuplicatePrefixesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "correctDuplicatePrefixes", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_correctDuplicatePrefixesKernel);
m_buildBinaryRadixTreeLeafNodesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeLeafNodes", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_buildBinaryRadixTreeLeafNodesKernel);
m_buildBinaryRadixTreeInternalNodesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeInternalNodes", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_buildBinaryRadixTreeInternalNodesKernel);
m_convertChildNodeFormatKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "convertChildNodeFormat", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_convertChildNodeFormatKernel);
m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros );
b3Assert(m_plbvhCalculateOverlappingPairsKernel);
m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros );
@ -76,9 +95,16 @@ b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh()
clReleaseKernel(m_separateAabbsKernel);
clReleaseKernel(m_findAllNodesMergedAabbKernel);
clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel);
clReleaseKernel(m_constructBinaryTreeKernel);
clReleaseKernel(m_determineInternalNodeAabbsKernel);
clReleaseKernel(m_computePrefixAndInitPointersKernel);
clReleaseKernel(m_correctDuplicatePrefixesKernel);
clReleaseKernel(m_buildBinaryRadixTreeLeafNodesKernel);
clReleaseKernel(m_buildBinaryRadixTreeInternalNodesKernel);
clReleaseKernel(m_convertChildNodeFormatKernel);
clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel);
clReleaseKernel(m_plbvhRayTraverseKernel);
clReleaseKernel(m_plbvhLargeAabbAabbTestKernel);
@ -159,6 +185,12 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
m_internalNodeChildNodes.resize(numInternalNodes);
m_internalNodeParentNodes.resize(numInternalNodes);
m_commonPrefixes.resize(numInternalNodes);
m_leftInternalNodePointers.resize(numInternalNodes);
m_rightInternalNodePointers.resize(numInternalNodes);
m_internalNodeLeftChildNodes.resize(numInternalNodes);
m_internalNodeRightChildNodes.resize(numInternalNodes);
m_leafNodeParentNodes.resize(numLeaves);
m_mortonCodesAndAabbIndicies.resize(numLeaves);
m_mergedAabb.resize(numLeaves);
@ -166,7 +198,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
//Find the AABB of all input AABBs; this is used to define the size of
//Find the merged AABB of all small AABBs; this is used to define the size of
//each cell in the virtual grid(2^10 cells in each dimension).
{
B3_PROFILE("Find AABB of merged nodes");
@ -196,7 +228,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
//then convert the discrete grid coordinates into a morton code
//For each element in m_mortonCodesAndAabbIndicies, set
// m_key == morton code (value to sort by)
// m_value = AABB index
// m_value == small AABB index
{
B3_PROFILE("Assign morton codes");
@ -234,7 +266,8 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
}
//
constructSimpleBinaryTree();
//constructSimpleBinaryTree();
constructRadixBinaryTree();
}
void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray<int>& out_numPairs, b3OpenCLArray<b3Int4>& out_overlappingPairs)
@ -393,6 +426,8 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayIn
void b3GpuParallelLinearBvh::constructSimpleBinaryTree()
{
B3_PROFILE("b3GpuParallelLinearBvh::constructSimpleBinaryTree()");
int numLeaves = m_leafNodeAabbs.size(); //Number of leaves in the BVH == Number of rigid bodies with small AABBs
int numInternalNodes = numLeaves - 1;
@ -532,9 +567,124 @@ void b3GpuParallelLinearBvh::constructSimpleBinaryTree()
}
}
void b3GpuParallelLinearBvh::constructRadixBinaryTree()
{
B3_PROFILE("b3GpuParallelLinearBvh::constructRadixBinaryTree()");
int numLeaves = m_leafNodeAabbs.size();
int numInternalNodes = numLeaves - 1;
//For each internal node, compute common prefix and set pointers to left and right internal nodes
{
B3_PROFILE("m_computePrefixAndInitPointersKernel");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
b3BufferInfoCL( m_leftInternalNodePointers.getBufferCL() ),
b3BufferInfoCL( m_rightInternalNodePointers.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_computePrefixAndInitPointersKernel, "m_computePrefixAndInitPointersKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numInternalNodes);
launcher.launch1D(numInternalNodes);
clFinish(m_queue);
}
//Increase the common prefixes so that there are no adjacent duplicates for each internal node
{
B3_PROFILE("m_correctDuplicatePrefixesKernel");
int reset = 0;
m_maxCommonPrefix.copyFromHostPointer(&reset, 1);
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
b3BufferInfoCL( m_maxCommonPrefix.getBufferCL() ),
};
b3LauncherCL launcher(m_queue, m_correctDuplicatePrefixesKernel, "m_correctDuplicatePrefixesKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numInternalNodes);
launcher.launch1D(numInternalNodes);
clFinish(m_queue);
}
//For each leaf node, find parent nodes and assign child node indices
{
B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeLeftChildNodes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeRightChildNodes.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numLeaves);
launcher.launch1D(numLeaves);
clFinish(m_queue);
}
//For each internal node, find parent nodes and assign child node indices
{
B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel");
int maxCommonPrefix = -1;
m_maxCommonPrefix.copyToHostPointer(&maxCommonPrefix, 1);
//-1 so that the root sets its AABB
for(int processedCommonPrefix = maxCommonPrefix; processedCommonPrefix >= -1; --processedCommonPrefix)
{
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
b3BufferInfoCL( m_internalNodeLeftChildNodes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeRightChildNodes.getBufferCL() ),
b3BufferInfoCL( m_leftInternalNodePointers.getBufferCL() ),
b3BufferInfoCL( m_rightInternalNodePointers.getBufferCL() ),
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ),
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(processedCommonPrefix);
launcher.setConst(numInternalNodes);
launcher.launch1D(numInternalNodes);
}
clFinish(m_queue);
}
{
B3_PROFILE("m_convertChildNodeFormatKernel");
b3BufferInfoCL bufferInfo[] =
{
b3BufferInfoCL( m_internalNodeLeftChildNodes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeRightChildNodes.getBufferCL() ),
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() )
};
b3LauncherCL launcher(m_queue, m_convertChildNodeFormatKernel, "m_convertChildNodeFormatKernel");
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(numInternalNodes);
launcher.launch1D(numInternalNodes);
clFinish(m_queue);
}
}

View File

@ -56,10 +56,17 @@ class b3GpuParallelLinearBvh
cl_kernel m_findAllNodesMergedAabbKernel;
cl_kernel m_assignMortonCodesAndAabbIndiciesKernel;
//Binary tree construction kernels
//Simple binary tree construction kernels
cl_kernel m_constructBinaryTreeKernel;
cl_kernel m_determineInternalNodeAabbsKernel;
//Radix binary tree construction kernels
cl_kernel m_computePrefixAndInitPointersKernel;
cl_kernel m_correctDuplicatePrefixesKernel;
cl_kernel m_buildBinaryRadixTreeLeafNodesKernel;
cl_kernel m_buildBinaryRadixTreeInternalNodesKernel;
cl_kernel m_convertChildNodeFormatKernel;
//Traversal kernels
cl_kernel m_plbvhCalculateOverlappingPairsKernel;
cl_kernel m_plbvhRayTraverseKernel;
@ -85,6 +92,14 @@ class b3GpuParallelLinearBvh
b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child
b3OpenCLArray<int> m_internalNodeParentNodes;
//1 element per internal node; for radix binary tree construction
b3OpenCLArray<int> m_maxCommonPrefix;
b3OpenCLArray<int> m_commonPrefixes;
b3OpenCLArray<int> m_leftInternalNodePointers; //Linked list
b3OpenCLArray<int> m_rightInternalNodePointers; //Linked list
b3OpenCLArray<int> m_internalNodeLeftChildNodes;
b3OpenCLArray<int> m_internalNodeRightChildNodes;
//1 element per leaf node (leaf nodes only include small AABBs)
b3OpenCLArray<int> m_leafNodeParentNodes;
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key = morton code, m_value == aabb index

View File

@ -468,7 +468,7 @@ __kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,
b3Vector3 rayTo = rays[rayIndex].m_to;
b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);
b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );
//
int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];
@ -567,3 +567,204 @@ __kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global
}
#define B3_PLBVH_LINKED_LIST_INVALID_NODE -1
int longestCommonPrefix(int i, int j) { return clz(i ^ j); }
__kernel void computePrefixAndInitPointers(__global SortDataCL* mortonCodesAndAabbIndices,
__global int* out_commonPrefixes,
__global int* out_leftInternalNodePointers,
__global int* out_rightInternalNodePointers,
int numInternalNodes)
{
int internalNodeIndex = get_global_id(0);
if (internalNodeIndex >= numInternalNodes) return;
//Compute common prefix
{
//Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,
//and the number of internal nodes is always numLeafNodes - 1
int leftLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex].m_key;
int rightLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex + 1].m_key;
out_commonPrefixes[internalNodeIndex] = longestCommonPrefix(leftLeafMortonCode, rightLeafMortonCode);
}
//Assign neighbor pointers of this node
{
int leftInternalIndex = internalNodeIndex - 1;
int rightInternalIndex = internalNodeIndex + 1;
out_leftInternalNodePointers[internalNodeIndex] = (leftInternalIndex >= 0) ? leftInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE;
out_rightInternalNodePointers[internalNodeIndex] = (rightInternalIndex < numInternalNodes) ? rightInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE;
}
}
__kernel void correctDuplicatePrefixes(__global int* commonPrefixes, __global int* out_maxCommonPrefix, int numInternalNodes)
{
int internalNodeIndex = get_global_id(0);
if (internalNodeIndex >= numInternalNodes) return;
int commonPrefix = commonPrefixes[internalNodeIndex];
//Linear search to find the size of the subtree
int firstSubTreeIndex = internalNodeIndex;
int lastSubTreeIndex = internalNodeIndex;
while(firstSubTreeIndex - 1 >= 0 && commonPrefix == commonPrefixes[firstSubTreeIndex - 1]) --firstSubTreeIndex;
while(lastSubTreeIndex + 1 < numInternalNodes && commonPrefix == commonPrefixes[lastSubTreeIndex + 1]) ++lastSubTreeIndex;
//Fix duplicate common prefixes by incrementing them so that a subtree is formed.
//Recursively divide the tree until the position of the split is this node's index.
//Every time this node is not the split node, increment the common prefix.
int isCurrentSplitNode = false;
int correctedCommonPrefix = commonPrefix;
while(!isCurrentSplitNode)
{
int numInternalNodesInSubTree = lastSubTreeIndex - firstSubTreeIndex + 1;
int splitNodeIndex = firstSubTreeIndex + numInternalNodesInSubTree / 2;
if(internalNodeIndex > splitNodeIndex) firstSubTreeIndex = splitNodeIndex + 1;
else if(internalNodeIndex < splitNodeIndex) lastSubTreeIndex = splitNodeIndex - 1;
//else if(internalNodeIndex == splitNodeIndex) break;
isCurrentSplitNode = (internalNodeIndex == splitNodeIndex);
if(!isCurrentSplitNode) correctedCommonPrefix++;
}
commonPrefixes[internalNodeIndex] = correctedCommonPrefix;
atomic_max(out_maxCommonPrefix, correctedCommonPrefix);
}
//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.
//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.
//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.
#define B3_PLBVH_INVALID_COMMON_PREFIX 128
__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixes, __global int* out_leftChildNodes,
__global int* out_rightChildNodes, int numLeafNodes)
{
int leafNodeIndex = get_global_id(0);
if (leafNodeIndex >= numLeafNodes) return;
int numInternalNodes = numLeafNodes - 1;
int leftSplitIndex = leafNodeIndex - 1;
int rightSplitIndex = leafNodeIndex;
int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixes[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixes[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
//Parent node is the highest adjacent common prefix that is lower than the node's common prefix
//Leaf nodes are considered as having the highest common prefix
int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);
//Handle cases for the edge nodes; the first and last node
//For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX
if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;
if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;
int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;
//If the left node is the parent, then this node is its right child and vice versa
__global int* out_childNode = (isLeftHigherCommonPrefix) ? out_rightChildNodes : out_leftChildNodes;
int isLeaf = 1;
out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);
}
__kernel void buildBinaryRadixTreeInternalNodes(__global int* commonPrefixes, __global SortDataCL* mortonCodesAndAabbIndices,
__global int* leftChildNodes, __global int* rightChildNodes,
__global int* leftNeighborPointers, __global int* rightNeighborPointers,
__global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,
__global int* out_rootNodeIndex,
int processedCommonPrefix, int numInternalNodes)
{
int internalNodeIndex = get_global_id(0);
if (internalNodeIndex >= numInternalNodes) return;
int commonPrefix = commonPrefixes[internalNodeIndex];
if (commonPrefix == processedCommonPrefix)
{
//Check neighbors and compare the common prefix to select the parent node
int leftNodeIndex = leftNeighborPointers[internalNodeIndex];
int rightNodeIndex = rightNeighborPointers[internalNodeIndex];
int leftCommonPrefix = (leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[leftNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
int rightCommonPrefix = (rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[rightNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
//Parent node is the highest common prefix that is lower than the node's common prefix
//Since the nodes with lower common prefixes are removed, that condition does not have to be tested for,
//and we only need to pick the node with the higher prefix.
int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);
//
if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;
else if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;
int isRootNode = false;
if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX && rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isRootNode = true;
int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftNodeIndex : rightNodeIndex;
//If the left node is the parent, then this node is its right child and vice versa
__global int* out_childNode = (isLeftHigherCommonPrefix) ? rightChildNodes : leftChildNodes;
int isLeaf = 0;
if(!isRootNode) out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);
if(isRootNode) *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);
//Remove this node from the linked list,
//so that the left and right nodes point at each other instead of this node
if(leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) rightNeighborPointers[leftNodeIndex] = rightNodeIndex;
if(rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) leftNeighborPointers[rightNodeIndex] = leftNodeIndex;
//For debug
leftNeighborPointers[internalNodeIndex] = -2;
rightNeighborPointers[internalNodeIndex] = -2;
}
//Processing occurs from highest common prefix to lowest common prefix
//Nodes in the previously processed level have their children set, so we merge their child AABBs here
if (commonPrefix == processedCommonPrefix + 1)
{
int leftChildIndex = leftChildNodes[internalNodeIndex];
int rightChildIndex = rightChildNodes[internalNodeIndex];
int isLeftChildLeaf = isLeafNode(leftChildIndex);
int isRightChildLeaf = isLeafNode(rightChildIndex);
leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);
rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);
//leftRigidIndex/rightRigidIndex is not used if internal node
int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;
int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;
b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];
b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];
b3AabbCL mergedAabb;
mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);
mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);
internalNodeAabbs[internalNodeIndex] = mergedAabb;
}
}
__kernel void convertChildNodeFormat(__global int* leftChildNodes, __global int* rightChildNodes,
__global int2* out_childNodes, int numInternalNodes)
{
int internalNodeIndex = get_global_id(0);
if (internalNodeIndex >= numInternalNodes) return;
int2 childNodesIndices;
childNodesIndices.x = leftChildNodes[internalNodeIndex];
childNodesIndices.y = rightChildNodes[internalNodeIndex];
out_childNodes[internalNodeIndex] = childNodesIndices;
}

View File

@ -443,6 +443,7 @@ static const char* parallelLinearBvhCL= \
" b3Vector3 rayTo = rays[rayIndex].m_to;\n"
" b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n"
" b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n"
" \n"
" //\n"
" int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
" \n"
@ -538,4 +539,195 @@ static const char* parallelLinearBvhCL= \
" }\n"
" }\n"
"}\n"
"#define B3_PLBVH_LINKED_LIST_INVALID_NODE -1\n"
"int longestCommonPrefix(int i, int j) { return clz(i ^ j); }\n"
"__kernel void computePrefixAndInitPointers(__global SortDataCL* mortonCodesAndAabbIndices,\n"
" __global int* out_commonPrefixes,\n"
" __global int* out_leftInternalNodePointers, \n"
" __global int* out_rightInternalNodePointers,\n"
" int numInternalNodes)\n"
"{\n"
" int internalNodeIndex = get_global_id(0);\n"
" if (internalNodeIndex >= numInternalNodes) return;\n"
" \n"
" //Compute common prefix\n"
" {\n"
" //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,\n"
" //and the number of internal nodes is always numLeafNodes - 1\n"
" int leftLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex].m_key;\n"
" int rightLeafMortonCode = mortonCodesAndAabbIndices[internalNodeIndex + 1].m_key;\n"
" \n"
" out_commonPrefixes[internalNodeIndex] = longestCommonPrefix(leftLeafMortonCode, rightLeafMortonCode);\n"
" }\n"
" \n"
" //Assign neighbor pointers of this node\n"
" {\n"
" int leftInternalIndex = internalNodeIndex - 1;\n"
" int rightInternalIndex = internalNodeIndex + 1;\n"
" \n"
" out_leftInternalNodePointers[internalNodeIndex] = (leftInternalIndex >= 0) ? leftInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE;\n"
" out_rightInternalNodePointers[internalNodeIndex] = (rightInternalIndex < numInternalNodes) ? rightInternalIndex : B3_PLBVH_LINKED_LIST_INVALID_NODE;\n"
" }\n"
"}\n"
"__kernel void correctDuplicatePrefixes(__global int* commonPrefixes, __global int* out_maxCommonPrefix, int numInternalNodes)\n"
"{\n"
" int internalNodeIndex = get_global_id(0);\n"
" if (internalNodeIndex >= numInternalNodes) return;\n"
" \n"
" int commonPrefix = commonPrefixes[internalNodeIndex];\n"
" \n"
" //Linear search to find the size of the subtree\n"
" int firstSubTreeIndex = internalNodeIndex;\n"
" int lastSubTreeIndex = internalNodeIndex;\n"
" \n"
" while(firstSubTreeIndex - 1 >= 0 && commonPrefix == commonPrefixes[firstSubTreeIndex - 1]) --firstSubTreeIndex;\n"
" while(lastSubTreeIndex + 1 < numInternalNodes && commonPrefix == commonPrefixes[lastSubTreeIndex + 1]) ++lastSubTreeIndex;\n"
" \n"
" //Fix duplicate common prefixes by incrementing them so that a subtree is formed.\n"
" //Recursively divide the tree until the position of the split is this node's index.\n"
" //Every time this node is not the split node, increment the common prefix.\n"
" int isCurrentSplitNode = false;\n"
" int correctedCommonPrefix = commonPrefix;\n"
" \n"
" while(!isCurrentSplitNode)\n"
" {\n"
" int numInternalNodesInSubTree = lastSubTreeIndex - firstSubTreeIndex + 1;\n"
" int splitNodeIndex = firstSubTreeIndex + numInternalNodesInSubTree / 2;\n"
" \n"
" if(internalNodeIndex > splitNodeIndex) firstSubTreeIndex = splitNodeIndex + 1;\n"
" else if(internalNodeIndex < splitNodeIndex) lastSubTreeIndex = splitNodeIndex - 1;\n"
" //else if(internalNodeIndex == splitNodeIndex) break;\n"
" \n"
" isCurrentSplitNode = (internalNodeIndex == splitNodeIndex);\n"
" if(!isCurrentSplitNode) correctedCommonPrefix++;\n"
" }\n"
" \n"
" commonPrefixes[internalNodeIndex] = correctedCommonPrefix;\n"
" atomic_max(out_maxCommonPrefix, correctedCommonPrefix);\n"
"}\n"
"//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n"
"//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n"
"//Duplicates common prefixes increase the highest common prefix by N, where 2^N is the number of duplicate nodes.\n"
"#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n"
"__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixes, __global int* out_leftChildNodes, \n"
" __global int* out_rightChildNodes, int numLeafNodes)\n"
"{\n"
" int leafNodeIndex = get_global_id(0);\n"
" if (leafNodeIndex >= numLeafNodes) return;\n"
" \n"
" int numInternalNodes = numLeafNodes - 1;\n"
" \n"
" int leftSplitIndex = leafNodeIndex - 1;\n"
" int rightSplitIndex = leafNodeIndex;\n"
" \n"
" int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixes[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
" int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixes[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
" \n"
" //Parent node is the highest adjacent common prefix that is lower than the node's common prefix\n"
" //Leaf nodes are considered as having the highest common prefix\n"
" int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n"
" \n"
" //Handle cases for the edge nodes; the first and last node\n"
" //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX\n"
" if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n"
" if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n"
" \n"
" int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;\n"
" \n"
" //If the left node is the parent, then this node is its right child and vice versa\n"
" __global int* out_childNode = (isLeftHigherCommonPrefix) ? out_rightChildNodes : out_leftChildNodes;\n"
" \n"
" int isLeaf = 1;\n"
" out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);\n"
"}\n"
"__kernel void buildBinaryRadixTreeInternalNodes(__global int* commonPrefixes, __global SortDataCL* mortonCodesAndAabbIndices,\n"
" __global int* leftChildNodes, __global int* rightChildNodes,\n"
" __global int* leftNeighborPointers, __global int* rightNeighborPointers,\n"
" __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,\n"
" __global int* out_rootNodeIndex,\n"
" int processedCommonPrefix, int numInternalNodes)\n"
"{\n"
" int internalNodeIndex = get_global_id(0);\n"
" if (internalNodeIndex >= numInternalNodes) return;\n"
" \n"
" int commonPrefix = commonPrefixes[internalNodeIndex];\n"
" if (commonPrefix == processedCommonPrefix)\n"
" {\n"
" //Check neighbors and compare the common prefix to select the parent node\n"
" int leftNodeIndex = leftNeighborPointers[internalNodeIndex];\n"
" int rightNodeIndex = rightNeighborPointers[internalNodeIndex];\n"
" \n"
" int leftCommonPrefix = (leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[leftNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
" int rightCommonPrefix = (rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) ? commonPrefixes[rightNodeIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
" \n"
" //Parent node is the highest common prefix that is lower than the node's common prefix\n"
" //Since the nodes with lower common prefixes are removed, that condition does not have to be tested for,\n"
" //and we only need to pick the node with the higher prefix.\n"
" int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n"
" \n"
" //\n"
" if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n"
" else if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n"
" \n"
" int isRootNode = false;\n"
" if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX && rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isRootNode = true;\n"
" \n"
" int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftNodeIndex : rightNodeIndex;\n"
" \n"
" //If the left node is the parent, then this node is its right child and vice versa\n"
" __global int* out_childNode = (isLeftHigherCommonPrefix) ? rightChildNodes : leftChildNodes;\n"
" \n"
" int isLeaf = 0;\n"
" if(!isRootNode) out_childNode[parentNodeIndex] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n"
" \n"
" if(isRootNode) *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n"
" \n"
" //Remove this node from the linked list, \n"
" //so that the left and right nodes point at each other instead of this node\n"
" if(leftNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) rightNeighborPointers[leftNodeIndex] = rightNodeIndex;\n"
" if(rightNodeIndex != B3_PLBVH_LINKED_LIST_INVALID_NODE) leftNeighborPointers[rightNodeIndex] = leftNodeIndex;\n"
" \n"
" //For debug\n"
" leftNeighborPointers[internalNodeIndex] = -2;\n"
" rightNeighborPointers[internalNodeIndex] = -2;\n"
" }\n"
" \n"
" //Processing occurs from highest common prefix to lowest common prefix\n"
" //Nodes in the previously processed level have their children set, so we merge their child AABBs here\n"
" if (commonPrefix == processedCommonPrefix + 1)\n"
" {\n"
" int leftChildIndex = leftChildNodes[internalNodeIndex];\n"
" int rightChildIndex = rightChildNodes[internalNodeIndex];\n"
" \n"
" int isLeftChildLeaf = isLeafNode(leftChildIndex);\n"
" int isRightChildLeaf = isLeafNode(rightChildIndex);\n"
" \n"
" leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);\n"
" rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);\n"
" \n"
" //leftRigidIndex/rightRigidIndex is not used if internal node\n"
" int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n"
" int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n"
" \n"
" b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];\n"
" b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];\n"
" \n"
" b3AabbCL mergedAabb;\n"
" mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n"
" mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n"
" internalNodeAabbs[internalNodeIndex] = mergedAabb;\n"
" }\n"
"}\n"
"__kernel void convertChildNodeFormat(__global int* leftChildNodes, __global int* rightChildNodes, \n"
" __global int2* out_childNodes, int numInternalNodes)\n"
"{\n"
" int internalNodeIndex = get_global_id(0);\n"
" if (internalNodeIndex >= numInternalNodes) return;\n"
" \n"
" int2 childNodesIndices;\n"
" childNodesIndices.x = leftChildNodes[internalNodeIndex];\n"
" childNodesIndices.y = rightChildNodes[internalNodeIndex];\n"
" \n"
" out_childNodes[internalNodeIndex] = childNodesIndices;\n"
"}\n"
;