mirror of
https://github.com/bulletphysics/bullet3
synced 2024-12-14 13:50:04 +00:00
Clean up PLBVH construction, add comments.
This commit is contained in:
parent
038364ccdd
commit
db02ced606
@ -30,12 +30,7 @@ b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id
|
||||
|
||||
m_commonPrefixes(context, queue),
|
||||
m_commonPrefixLengths(context, queue),
|
||||
m_childNodeCount(context, queue),
|
||||
m_distanceFromRoot(context, queue),
|
||||
m_TEMP_leftLowerPrefix(context, queue),
|
||||
m_TEMP_rightLowerPrefix(context, queue),
|
||||
m_TEMP_leftSharedPrefixLength(context, queue),
|
||||
m_TEMP_rightSharedPrefixLength(context, queue),
|
||||
|
||||
m_leafNodeParentNodes(context, queue),
|
||||
m_mortonCodesAndAabbIndicies(context, queue),
|
||||
@ -164,6 +159,8 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
|
||||
|
||||
if(numLeaves < 2)
|
||||
{
|
||||
//Number of leaf nodes is checked in calculateOverlappingPairs() and testRaysAgainstBvhAabbs(),
|
||||
//so it does not matter if numLeaves == 0 and rootNodeIndex == -1
|
||||
int rootNodeIndex = numLeaves - 1;
|
||||
m_rootNodeIndex.copyFromHostPointer(&rootNodeIndex, 1);
|
||||
return;
|
||||
@ -178,12 +175,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
|
||||
|
||||
m_commonPrefixes.resize(numInternalNodes);
|
||||
m_commonPrefixLengths.resize(numInternalNodes);
|
||||
m_childNodeCount.resize(numInternalNodes);
|
||||
m_distanceFromRoot.resize(numInternalNodes);
|
||||
m_TEMP_leftLowerPrefix.resize(numInternalNodes);
|
||||
m_TEMP_rightLowerPrefix.resize(numInternalNodes);
|
||||
m_TEMP_leftSharedPrefixLength.resize(numInternalNodes);
|
||||
m_TEMP_rightSharedPrefixLength.resize(numInternalNodes);
|
||||
|
||||
m_leafNodeParentNodes.resize(numLeaves);
|
||||
m_mortonCodesAndAabbIndicies.resize(numLeaves);
|
||||
@ -191,7 +183,7 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
|
||||
}
|
||||
|
||||
//Find the merged AABB of all small AABBs; this is used to define the size of
|
||||
//each cell in the virtual grid(2^10 cells in each dimension).
|
||||
//each cell in the virtual grid for the next kernel(2^10 cells in each dimension).
|
||||
{
|
||||
B3_PROFILE("Find AABB of merged nodes");
|
||||
|
||||
@ -215,7 +207,6 @@ void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAab
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
|
||||
//Insert the center of the AABBs into a virtual grid,
|
||||
//then convert the discrete grid coordinates into a morton code
|
||||
//For each element in m_mortonCodesAndAabbIndicies, set
|
||||
@ -405,12 +396,6 @@ void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayIn
|
||||
|
||||
}
|
||||
|
||||
// remove
|
||||
#include <iostream>
|
||||
int isLeafNode(int index) { return (index >> 31 == 0); }
|
||||
int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }
|
||||
int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }
|
||||
|
||||
void b3GpuParallelLinearBvh::constructRadixBinaryTree()
|
||||
{
|
||||
B3_PROFILE("b3GpuParallelLinearBvh::constructRadixBinaryTree()");
|
||||
@ -418,7 +403,9 @@ void b3GpuParallelLinearBvh::constructRadixBinaryTree()
|
||||
int numLeaves = m_leafNodeAabbs.size();
|
||||
int numInternalNodes = numLeaves - 1;
|
||||
|
||||
//For each internal node, compute common prefix and set pointers to left and right internal nodes
|
||||
//Each internal node is placed in between 2 leaf nodes.
|
||||
//By using this arrangement and computing the common prefix between
|
||||
//these 2 adjacent leaf nodes, it is possible to quickly construct a binary radix tree.
|
||||
{
|
||||
B3_PROFILE("m_computeAdjacentPairCommonPrefixKernel");
|
||||
|
||||
@ -437,185 +424,107 @@ void b3GpuParallelLinearBvh::constructRadixBinaryTree()
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//For each leaf node, select its parent node by
|
||||
//comparing the 2 nearest internal nodes and assign child node indices
|
||||
{
|
||||
static b3AlignedObjectArray<int> prefixLengths;
|
||||
m_commonPrefixLengths.copyToHost(prefixLengths);
|
||||
clFinish(m_queue);
|
||||
B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel");
|
||||
|
||||
for(int i = 1; i < prefixLengths.size(); ++i)
|
||||
if( prefixLengths[i - 1] == prefixLengths[i] )
|
||||
for(;;) printf("duplicate prefix[%d]: %d\n", i, prefixLengths[i]);
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leafNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numLeaves);
|
||||
|
||||
launcher.launch1D(numLeaves);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//For each internal node, perform 2 binary searches among the other internal nodes
|
||||
//to its left and right to find its potential parent nodes and assign child node indices
|
||||
{
|
||||
//For each leaf node, find parent nodes and assign child node indices
|
||||
B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel");
|
||||
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() )
|
||||
};
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leafNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() )
|
||||
};
|
||||
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numLeaves);
|
||||
launcher.launch1D(numInternalNodes);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
launcher.launch1D(numLeaves);
|
||||
//Find the number of nodes seperating each internal node and the root node
|
||||
//so that the AABBs can be set using the next kernel.
|
||||
//Also determine the maximum number of nodes separating an internal node and the root node.
|
||||
{
|
||||
B3_PROFILE("m_findDistanceFromRootKernel");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_maxDistanceFromRoot.getBufferCL() ),
|
||||
b3BufferInfoCL( m_distanceFromRoot.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_findDistanceFromRootKernel, "m_findDistanceFromRootKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
launcher.launch1D(numInternalNodes);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//Starting from the internal nodes nearest to the leaf nodes, recursively move up
|
||||
//the tree towards the root to set the AABBs of each internal node; each internal node
|
||||
//checks its children and merges their AABBs
|
||||
{
|
||||
B3_PROFILE("m_buildBinaryRadixTreeAabbsRecursiveKernel");
|
||||
|
||||
int maxDistanceFromRoot = -1;
|
||||
{
|
||||
B3_PROFILE("copy maxDistanceFromRoot to CPU");
|
||||
m_maxDistanceFromRoot.copyToHostPointer(&maxDistanceFromRoot, 1);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//For each internal node, find parent nodes and assign child node indices
|
||||
for(int distanceFromRoot = maxDistanceFromRoot; distanceFromRoot >= 0; --distanceFromRoot)
|
||||
{
|
||||
B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_commonPrefixes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ),
|
||||
b3BufferInfoCL( m_distanceFromRoot.getBufferCL() ),
|
||||
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ),
|
||||
b3BufferInfoCL( m_TEMP_leftLowerPrefix.getBufferCL() ),
|
||||
b3BufferInfoCL( m_TEMP_rightLowerPrefix.getBufferCL() ),
|
||||
b3BufferInfoCL( m_TEMP_leftSharedPrefixLength.getBufferCL() ),
|
||||
b3BufferInfoCL( m_TEMP_rightSharedPrefixLength.getBufferCL() )
|
||||
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel");
|
||||
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeAabbsRecursiveKernel, "m_buildBinaryRadixTreeAabbsRecursiveKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(maxDistanceFromRoot);
|
||||
launcher.setConst(distanceFromRoot);
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
//It may seem inefficent to launch a thread for each internal node when a
|
||||
//much smaller number of nodes is actually processed, but this is actually
|
||||
//faster than determining the exact nodes that are ready to merge their child AABBs.
|
||||
launcher.launch1D(numInternalNodes);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
if(0)
|
||||
{
|
||||
static b3AlignedObjectArray<b3SortData> mortonCodesAndAabbIndices;
|
||||
|
||||
static b3AlignedObjectArray<b3Int2> child;
|
||||
static b3AlignedObjectArray<b3Int64> commonPrefixes;
|
||||
static b3AlignedObjectArray<int> commonPrefixLengths;
|
||||
static b3AlignedObjectArray<int> tempLeftLowerPrefixIndex;
|
||||
static b3AlignedObjectArray<int> tempRightLowerPrefixIndex;
|
||||
static b3AlignedObjectArray<int> tempLeftLowerPrefixSPL;
|
||||
static b3AlignedObjectArray<int> tempRightLowerPrefixSPL;
|
||||
static b3AlignedObjectArray<int> internalParent;
|
||||
|
||||
m_mortonCodesAndAabbIndicies.copyToHost(mortonCodesAndAabbIndices);
|
||||
|
||||
m_internalNodeChildNodes.copyToHost(child);
|
||||
m_commonPrefixes.copyToHost(commonPrefixes);
|
||||
m_commonPrefixLengths.copyToHost(commonPrefixLengths);
|
||||
m_TEMP_leftLowerPrefix.copyToHost(tempLeftLowerPrefixIndex);
|
||||
m_TEMP_rightLowerPrefix.copyToHost(tempRightLowerPrefixIndex);
|
||||
m_TEMP_leftSharedPrefixLength.copyToHost(tempLeftLowerPrefixSPL);
|
||||
m_TEMP_rightSharedPrefixLength.copyToHost(tempRightLowerPrefixSPL);
|
||||
m_internalNodeParentNodes.copyToHost(internalParent);
|
||||
|
||||
int rootNode = -1;
|
||||
m_rootNodeIndex.copyToHostPointer(&rootNode, 1);
|
||||
clFinish(m_queue);
|
||||
|
||||
printf( "rootNode: %d\n", getIndexWithInternalNodeMarkerRemoved(rootNode) );
|
||||
|
||||
for(int i = 0; i < numInternalNodes; ++i)
|
||||
{
|
||||
b3Int2 childNodes = child[i];
|
||||
|
||||
printf("childNodes[%d]:", i);
|
||||
printf( " %d", getIndexWithInternalNodeMarkerRemoved(childNodes.x) );
|
||||
if( !isLeafNode(childNodes.x) ) printf("i");
|
||||
printf( ", %d", getIndexWithInternalNodeMarkerRemoved(childNodes.y) );
|
||||
if( !isLeafNode(childNodes.y) ) printf("i");
|
||||
printf(" (lr: %d, %d)", tempLeftLowerPrefixIndex[i], tempRightLowerPrefixIndex[i]);
|
||||
printf(" (spl: %d, %d)", tempLeftLowerPrefixSPL[i], tempRightLowerPrefixSPL[i]);
|
||||
printf(" (prefix: %d)", commonPrefixLengths[i]);
|
||||
printf(" (par: %d)", internalParent[i]);
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
for(int i = 0; i < numInternalNodes; ++i)
|
||||
{
|
||||
int hi = static_cast<int>(commonPrefixes[i] >> 32);
|
||||
int lo = static_cast<int>(commonPrefixes[i]);
|
||||
|
||||
printf("commonPrefix[%d]: %x, %d, len %d \n", i, hi, lo, commonPrefixLengths[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
for(int i = 0; i < numLeaves; ++i)
|
||||
{
|
||||
printf("z-curve[%d]: %x \n", i, mortonCodesAndAabbIndices[i].m_key);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
|
||||
std::cout << std::endl;
|
||||
for(;;);
|
||||
}
|
||||
|
||||
//Find the number of nodes seperating each internal node and the root node
|
||||
//so that the AABBs can be set using the next kernel
|
||||
{
|
||||
B3_PROFILE("m_findDistanceFromRootKernel");
|
||||
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_maxDistanceFromRoot.getBufferCL() ),
|
||||
b3BufferInfoCL( m_distanceFromRoot.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_findDistanceFromRootKernel, "m_findDistanceFromRootKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
launcher.launch1D(numInternalNodes);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
//Starting from the nodes nearest to the leaf nodes, recursively move up
|
||||
//the tree to set the AABBs of each internal node; each internal node
|
||||
//checks its children and merges their AABBs
|
||||
{
|
||||
B3_PROFILE("m_buildBinaryRadixTreeAabbsRecursiveKernel");
|
||||
|
||||
int maxDistanceFromRoot = -1;
|
||||
{
|
||||
B3_PROFILE("copy maxDistanceFromRoot to CPU");
|
||||
m_maxDistanceFromRoot.copyToHostPointer(&maxDistanceFromRoot, 1);
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
for(int distanceFromRoot = maxDistanceFromRoot; distanceFromRoot >= 0; --distanceFromRoot)
|
||||
{
|
||||
b3BufferInfoCL bufferInfo[] =
|
||||
{
|
||||
b3BufferInfoCL( m_distanceFromRoot.getBufferCL() ),
|
||||
b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ),
|
||||
b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ),
|
||||
b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() )
|
||||
};
|
||||
|
||||
b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeAabbsRecursiveKernel, "m_buildBinaryRadixTreeAabbsRecursiveKernel");
|
||||
launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
|
||||
launcher.setConst(maxDistanceFromRoot);
|
||||
launcher.setConst(distanceFromRoot);
|
||||
launcher.setConst(numInternalNodes);
|
||||
|
||||
launcher.launch1D(numInternalNodes);
|
||||
}
|
||||
|
||||
clFinish(m_queue);
|
||||
}
|
||||
|
||||
clFinish(m_queue);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,28 +70,23 @@ class b3GpuParallelLinearBvh
|
||||
b3RadixSort32CL m_radixSorter;
|
||||
|
||||
//1 element
|
||||
b3OpenCLArray<int> m_rootNodeIndex;
|
||||
b3OpenCLArray<int> m_maxDistanceFromRoot;
|
||||
b3OpenCLArray<int> m_rootNodeIndex; //Most significant bit(0x80000000) is set to indicate internal node
|
||||
b3OpenCLArray<int> m_maxDistanceFromRoot; //Max number of internal nodes between an internal node and the root node
|
||||
|
||||
//1 element per internal node (number_of_internal_nodes == number_of_leaves - 1)
|
||||
b3OpenCLArray<b3SapAabb> m_internalNodeAabbs;
|
||||
b3OpenCLArray<b3Int2> m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index
|
||||
b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child
|
||||
b3OpenCLArray<int> m_internalNodeParentNodes;
|
||||
b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child; msb(0x80000000) is set to indicate internal node
|
||||
b3OpenCLArray<int> m_internalNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal
|
||||
|
||||
//1 element per internal node; for binary radix tree construction
|
||||
b3OpenCLArray<b3Int64> m_commonPrefixes;
|
||||
b3OpenCLArray<int> m_commonPrefixLengths;
|
||||
b3OpenCLArray<int> m_childNodeCount;
|
||||
b3OpenCLArray<int> m_distanceFromRoot;
|
||||
b3OpenCLArray<int> m_TEMP_leftLowerPrefix;
|
||||
b3OpenCLArray<int> m_TEMP_rightLowerPrefix;
|
||||
b3OpenCLArray<int> m_TEMP_leftSharedPrefixLength;
|
||||
b3OpenCLArray<int> m_TEMP_rightSharedPrefixLength;
|
||||
b3OpenCLArray<int> m_distanceFromRoot; //Number of internal nodes between this node and the root
|
||||
|
||||
//1 element per leaf node (leaf nodes only include small AABBs)
|
||||
b3OpenCLArray<int> m_leafNodeParentNodes;
|
||||
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index
|
||||
b3OpenCLArray<int> m_leafNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal
|
||||
b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index in m_leafNodeAabbs
|
||||
b3OpenCLArray<b3SapAabb> m_mergedAabb; //m_mergedAabb[0] contains the merged AABB of all leaf nodes
|
||||
b3OpenCLArray<b3SapAabb> m_leafNodeAabbs; //Contains only small AABBs
|
||||
|
||||
|
@ -448,6 +448,8 @@ b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j)
|
||||
|
||||
return sharedBits & bitmask;
|
||||
}
|
||||
|
||||
//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths
|
||||
int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)
|
||||
{
|
||||
return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );
|
||||
@ -521,8 +523,6 @@ __kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, _
|
||||
__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,
|
||||
__global int2* out_childNodes,
|
||||
__global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,
|
||||
__global int* TEMP_out_leftLowerPrefix, __global int* TEMP_out_rightLowerPrefix,
|
||||
__global int* TEMP_spl_left, __global int* TEMP_spl_right,
|
||||
int numInternalNodes)
|
||||
{
|
||||
int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
|
||||
@ -536,6 +536,7 @@ __kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes
|
||||
int leftIndex = -1;
|
||||
int rightIndex = -1;
|
||||
|
||||
//Find nearest element to left with a lower common prefix
|
||||
for(int i = internalNodeIndex - 1; i >= 0; --i)
|
||||
{
|
||||
int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);
|
||||
@ -546,6 +547,7 @@ __kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes
|
||||
}
|
||||
}
|
||||
|
||||
//Find nearest element to right with a lower common prefix
|
||||
for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)
|
||||
{
|
||||
int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);
|
||||
@ -645,11 +647,6 @@ __kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes
|
||||
}
|
||||
#endif
|
||||
|
||||
TEMP_out_leftLowerPrefix[internalNodeIndex] = leftIndex;
|
||||
TEMP_out_rightLowerPrefix[internalNodeIndex] = rightIndex;
|
||||
TEMP_spl_left[internalNodeIndex] = (leftIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[leftIndex], commonPrefixLengths[leftIndex]) : -1;
|
||||
TEMP_spl_right[internalNodeIndex] = (rightIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[rightIndex], commonPrefixLengths[rightIndex]) : -1;
|
||||
|
||||
//Select parent
|
||||
{
|
||||
int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
|
||||
|
@ -418,6 +418,7 @@ static const char* parallelLinearBvhCL= \
|
||||
" \n"
|
||||
" return sharedBits & bitmask;\n"
|
||||
"}\n"
|
||||
"//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths\n"
|
||||
"int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)\n"
|
||||
"{\n"
|
||||
" return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );\n"
|
||||
@ -487,8 +488,6 @@ static const char* parallelLinearBvhCL= \
|
||||
"__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,\n"
|
||||
" __global int2* out_childNodes,\n"
|
||||
" __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,\n"
|
||||
" __global int* TEMP_out_leftLowerPrefix, __global int* TEMP_out_rightLowerPrefix,\n"
|
||||
" __global int* TEMP_spl_left, __global int* TEMP_spl_right,\n"
|
||||
" int numInternalNodes)\n"
|
||||
"{\n"
|
||||
" int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
|
||||
@ -502,6 +501,7 @@ static const char* parallelLinearBvhCL= \
|
||||
" int leftIndex = -1;\n"
|
||||
" int rightIndex = -1;\n"
|
||||
" \n"
|
||||
" //Find nearest element to left with a lower common prefix\n"
|
||||
" for(int i = internalNodeIndex - 1; i >= 0; --i)\n"
|
||||
" {\n"
|
||||
" int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n"
|
||||
@ -512,6 +512,7 @@ static const char* parallelLinearBvhCL= \
|
||||
" }\n"
|
||||
" }\n"
|
||||
" \n"
|
||||
" //Find nearest element to right with a lower common prefix\n"
|
||||
" for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)\n"
|
||||
" {\n"
|
||||
" int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n"
|
||||
@ -610,11 +611,6 @@ static const char* parallelLinearBvhCL= \
|
||||
" }\n"
|
||||
"#endif\n"
|
||||
" \n"
|
||||
" TEMP_out_leftLowerPrefix[internalNodeIndex] = leftIndex;\n"
|
||||
" TEMP_out_rightLowerPrefix[internalNodeIndex] = rightIndex;\n"
|
||||
" TEMP_spl_left[internalNodeIndex] = (leftIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[leftIndex], commonPrefixLengths[leftIndex]) : -1;\n"
|
||||
" TEMP_spl_right[internalNodeIndex] = (rightIndex != -1) ? getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[rightIndex], commonPrefixLengths[rightIndex]) : -1;\n"
|
||||
" \n"
|
||||
" //Select parent\n"
|
||||
" {\n"
|
||||
" int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
|
||||
|
Loading…
Reference in New Issue
Block a user