From 26dfaa441ee30923aa140baf2b9bf8a5918dc2b2 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Tue, 19 Nov 2013 13:42:53 -0800 Subject: [PATCH] add some command-line options --use_jacobi --allow_opencl_cpu b3LauncherCL constructor takes string, to make it easier to determine failing OpenCL kernel b3SetCustomErrorMessageFunc, printf error and exit(0) --- Demos3/GpuDemos/GpuDemo.cpp | 13 +- Demos3/GpuDemos/ParticleDemo.cpp | 8 +- Demos3/GpuDemos/broadphase/PairBench.cpp | 8 +- Demos3/GpuDemos/main_opengl3core.cpp | 39 ++- .../GpuDemos/rigidbody/GpuRigidBodyDemo.cpp | 2 +- Demos3/GpuDemos/softbody/GpuSoftBodyDemo.cpp | 2 +- Demos3/GpuGuiInitialize/main.cpp | 5 +- .../b3GpuGridBroadphase.cpp | 14 +- .../b3GpuSapBroadphase.cpp | 16 +- .../b3ConvexHullContact.cpp | 38 +-- .../ParallelPrimitives/b3BoundSearchCL.cpp | 6 +- .../ParallelPrimitives/b3FillCL.cpp | 8 +- .../ParallelPrimitives/b3LauncherCL.cpp | 287 ++++++++++++++++++ .../ParallelPrimitives/b3LauncherCL.h | 277 +---------------- .../ParallelPrimitives/b3PrefixScanCL.cpp | 6 +- .../b3PrefixScanFloat4CL.cpp | 6 +- .../ParallelPrimitives/b3RadixSort32CL.cpp | 12 +- src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp | 2 +- .../RigidBody/b3GpuJacobiContactSolver.cpp | 25 +- .../RigidBody/b3GpuPgsConstraintSolver.cpp | 14 +- .../RigidBody/b3GpuPgsContactSolver.cpp | 80 ++--- .../RigidBody/b3GpuRigidBodyPipeline.cpp | 54 ++-- src/Bullet3OpenCL/RigidBody/b3Solver.cpp | 12 +- test/OpenCL/KernelLaunch/main.cpp | 2 +- test/OpenCL/KernelLaunch/premake4.lua | 1 + test/OpenCL/ParallelPrimitives/premake4.lua | 1 + test/OpenCL/RadixSortBenchmark/premake4.lua | 1 + 27 files changed, 491 insertions(+), 448 deletions(-) create mode 100644 src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp diff --git a/Demos3/GpuDemos/GpuDemo.cpp b/Demos3/GpuDemos/GpuDemo.cpp index d1c626d6b..aaa6c86b1 100644 --- a/Demos3/GpuDemos/GpuDemo.cpp +++ b/Demos3/GpuDemos/GpuDemo.cpp @@ -5,6 +5,8 @@ #include "OpenGLWindow/ShapeData.h" #include "OpenGLWindow/GLInstancingRenderer.h" +bool gAllowCpuOpenCL = false; + GpuDemo::GpuDemo() :m_clData(0) { @@ -32,6 +34,8 @@ void GpuDemo::exitCL() } + + void GpuDemo::initCL(int preferredDeviceIndex, int preferredPlatformIndex) { void* glCtx=0; @@ -40,12 +44,11 @@ void GpuDemo::initCL(int preferredDeviceIndex, int preferredPlatformIndex) int ciErrNum = 0; - //#ifdef CL_PLATFORM_INTEL - //cl_device_type deviceType = CL_DEVICE_TYPE_ALL; - //#else + cl_device_type deviceType = CL_DEVICE_TYPE_GPU; - //#endif - + if (gAllowCpuOpenCL) + deviceType = CL_DEVICE_TYPE_ALL; + // if (useInterop) diff --git a/Demos3/GpuDemos/ParticleDemo.cpp b/Demos3/GpuDemos/ParticleDemo.cpp index 1a10d13f2..e68cfca65 100644 --- a/Demos3/GpuDemos/ParticleDemo.cpp +++ b/Demos3/GpuDemos/ParticleDemo.cpp @@ -363,7 +363,7 @@ void ParticleDemo::clientMoveAndDisplay() b3BufferInfoCL( m_data->m_clPositionBuffer) }; - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel ); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel,"m_updatePositionsKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numParticles); @@ -382,7 +382,7 @@ void ParticleDemo::clientMoveAndDisplay() b3BufferInfoCL( m_data->m_simParamGPU->getBufferCL(),true) }; - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel2 ); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel2 ,"m_updatePositionsKernel2"); launcher.setConst( numParticles); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); @@ -401,7 +401,7 @@ void ParticleDemo::clientMoveAndDisplay() b3BufferInfoCL( m_data->m_broadphaseGPU->getAabbBufferWS()), }; - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbsKernel ); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbsKernel,"m_updateAabbsKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( m_data->m_simParamCPU[0].m_particleRad); launcher.setConst( numParticles); @@ -428,7 +428,7 @@ void ParticleDemo::clientMoveAndDisplay() b3BufferInfoCL( m_data->m_broadphaseGPU->getOverlappingPairBuffer(),true), }; - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_collideParticlesKernel); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_collideParticlesKernel,"m_collideParticlesKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numPairsGPU); launcher.launch1D( numPairsGPU); diff --git a/Demos3/GpuDemos/broadphase/PairBench.cpp b/Demos3/GpuDemos/broadphase/PairBench.cpp index 2fe4ab5ad..abc5edc94 100644 --- a/Demos3/GpuDemos/broadphase/PairBench.cpp +++ b/Demos3/GpuDemos/broadphase/PairBench.cpp @@ -431,7 +431,7 @@ void PairBench::clientMoveAndDisplay() if (1) { - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_sineWaveKernel); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_sineWaveKernel,"m_sineWaveKernel"); launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setBuffer(m_data->m_bodyTimes->getBufferCL() ); launcher.setConst( numObjects); @@ -441,7 +441,7 @@ void PairBench::clientMoveAndDisplay() else { - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_moveObjectsKernel); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_moveObjectsKernel,"m_moveObjectsKernel"); launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setConst( numObjects); launcher.launch1D( numObjects); @@ -455,7 +455,7 @@ void PairBench::clientMoveAndDisplay() if (updateOnGpu) { B3_PROFILE("updateOnGpu"); - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple,"m_updateAabbSimple"); launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setConst( numObjects); launcher.setBuffer(m_data->m_broadphaseGPU->getAabbBufferWS()); @@ -542,7 +542,7 @@ void PairBench::clientMoveAndDisplay() int numPairs = m_data->m_broadphaseGPU->getNumOverlap(); cl_mem pairBuf = m_data->m_broadphaseGPU->getOverlappingPairBuffer(); - b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_colorPairsKernel); + b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_colorPairsKernel,"m_colorPairsKernel"); launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setConst( numObjects); launcher.setBuffer( pairBuf); diff --git a/Demos3/GpuDemos/main_opengl3core.cpp b/Demos3/GpuDemos/main_opengl3core.cpp index ba515258a..99f31e6d3 100644 --- a/Demos3/GpuDemos/main_opengl3core.cpp +++ b/Demos3/GpuDemos/main_opengl3core.cpp @@ -57,14 +57,23 @@ extern char OpenSansData[]; extern char* gPairBenchFileName; extern float shadowMapWidth; extern float shadowMapHeight; +extern bool gDebugLauncherCL; +extern bool gAllowCpuOpenCL; extern bool gDebugForceLoadingFromSource; extern bool gDebugSkipLoadingBinary; extern bool useShadowMap; extern float shadowMapWorldSize; -extern bool useJacobi; +extern bool gUseJacobi; extern bool useUniformGrid; +extern bool gUseDbvt; +extern bool gDumpContactStats; +extern bool gCalcWorldSpaceAabbOnCpu; +extern bool gUseCalculateOverlappingPairsHost; +extern bool gIntegrateOnCpu; +extern bool gConvertConstraintOnCpu; + static void MyResizeCallback( float width, float height) { g_OpenGLWidth = width; @@ -91,7 +100,7 @@ enum }; b3AlignedObjectArray demoNames; -int selectedDemo = 0; +int selectedDemo = 1; GpuDemo::CreateFunc* allDemos[]= { //ConcaveCompound2Scene::MyCreateFunc, @@ -554,6 +563,11 @@ void writeTextureToPng(int textureWidth, int textureHeight, const char* fileName #include "Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h" #include "Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h" +void MyErrorFunc(const char* msg) +{ + printf("Error: %s\n",msg); + exit(0); +} int main(int argc, char* argv[]) { @@ -566,7 +580,8 @@ int main(int argc, char* argv[]) int sz6 = sizeof(b3Transform); //b3OpenCLUtils::setCachePath("/Users/erwincoumans/develop/mycache"); - + b3SetCustomErrorMessageFunc(MyErrorFunc); + b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile); b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile); @@ -608,9 +623,8 @@ int main(int argc, char* argv[]) } args.GetCmdLineArgument("pair_benchmark_file",gPairBenchFileName); - useJacobi = args.CheckCmdLineFlag("use_jacobi"); - useUniformGrid = args.CheckCmdLineFlag("use_uniform_grid"); + gDebugLauncherCL = args.CheckCmdLineFlag("debug_kernel_launch"); dump_timings=args.CheckCmdLineFlag("dump_timings"); ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl"); @@ -622,6 +636,21 @@ int main(int argc, char* argv[]) ci.m_useInstancedCollisionShapes = !args.CheckCmdLineFlag("no_instanced_collision_shapes"); args.GetCmdLineArgument("cl_device", ci.preferredOpenCLDeviceIndex); args.GetCmdLineArgument("cl_platform", ci.preferredOpenCLPlatformIndex); + gAllowCpuOpenCL = args.CheckCmdLineFlag("allow_opencl_cpu"); + + + gUseJacobi = args.CheckCmdLineFlag("use_jacobi"); + gUseDbvt = args.CheckCmdLineFlag("use_dbvt"); + gDumpContactStats = args.CheckCmdLineFlag("dump_contact_stats"); + gCalcWorldSpaceAabbOnCpu = args.CheckCmdLineFlag("calc_aabb_cpu"); + gUseCalculateOverlappingPairsHost = args.CheckCmdLineFlag("calc_pairs_cpu"); + gIntegrateOnCpu = args.CheckCmdLineFlag("integrate_cpu"); + gConvertConstraintOnCpu = args.CheckCmdLineFlag("convert_constraints_cpu"); + useUniformGrid = args.CheckCmdLineFlag("use_uniform_grid"); + + + + args.GetCmdLineArgument("x_dim", ci.arraySizeX); args.GetCmdLineArgument("y_dim", ci.arraySizeY); args.GetCmdLineArgument("z_dim", ci.arraySizeZ); diff --git a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp index 1bce4f6f1..6bd0bb5c5 100644 --- a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp @@ -251,7 +251,7 @@ void GpuRigidBodyDemo::clientMoveAndDisplay() B3_PROFILE("cl2gl_convert"); int ciErrNum = 0; cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer(); - b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel); + b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel,"m_copyTransformsToVBOKernel"); launch.setBuffer(bodies); launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL()); launch.setConst(numObjects); diff --git a/Demos3/GpuDemos/softbody/GpuSoftBodyDemo.cpp b/Demos3/GpuDemos/softbody/GpuSoftBodyDemo.cpp index a553c2e43..fb4208ef3 100644 --- a/Demos3/GpuDemos/softbody/GpuSoftBodyDemo.cpp +++ b/Demos3/GpuDemos/softbody/GpuSoftBodyDemo.cpp @@ -255,7 +255,7 @@ void GpuSoftBodyDemo::clientMoveAndDisplay() B3_PROFILE("cl2gl_convert"); int ciErrNum = 0; cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer(); - b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel); + b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel,"m_copyTransformsToVBOKernel"); launch.setBuffer(bodies); launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL()); launch.setConst(numObjects); diff --git a/Demos3/GpuGuiInitialize/main.cpp b/Demos3/GpuGuiInitialize/main.cpp index 0aac120fa..1283ffe57 100644 --- a/Demos3/GpuGuiInitialize/main.cpp +++ b/Demos3/GpuGuiInitialize/main.cpp @@ -163,7 +163,8 @@ public: cl_device_type deviceType = CL_DEVICE_TYPE_ALL; cl_int errNum; cl_context context = b3OpenCLUtils::createContextFromPlatform(platform,deviceType,&errNum); - + if (context) + { Gwen::UnicodeString strIn = Gwen::Utility::StringToUnicode(platformInfo.m_platformName); Gwen::UnicodeString txt = Gwen::Utility::Format( L"Platform %d (",i)+strIn + Gwen::Utility::Format(L")"); @@ -305,7 +306,7 @@ public: } } } - + } /* diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp index 294452514..43b17bad6 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp @@ -165,7 +165,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()), }; - b3LauncherCL launcher(m_queue, m_copyAabbsKernel ); + b3LauncherCL launcher(m_queue, m_copyAabbsKernel,"m_copyAabbsKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numSmallAabbs ); int num = numSmallAabbs; @@ -185,7 +185,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) b3BufferInfoCL( m_largeAabbsGPU.getBufferCL()), }; - b3LauncherCL launcher(m_queue, m_copyAabbsKernel ); + b3LauncherCL launcher(m_queue, m_copyAabbsKernel ,"m_copyAabbsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numLargeAabbs ); int num = numLargeAabbs; @@ -212,7 +212,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) b3BufferInfoCL( m_smallAabbsGPU.getBufferCL() ), b3BufferInfoCL( m_gpuPairs.getBufferCL() ), b3BufferInfoCL(pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sap2Kernel); + b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numLargeAabbs ); launcher.setConst( numSmallAabbs); @@ -239,7 +239,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) m_hashGpu.resize(numSmallAabbs); { B3_PROFILE("kCalcHashAABB"); - b3LauncherCL launch(m_queue,kCalcHashAABB); + b3LauncherCL launch(m_queue,kCalcHashAABB,"kCalcHashAABB"); launch.setConst(numSmallAabbs); launch.setBuffer(m_smallAabbsGPU.getBufferCL()); launch.setBuffer(m_hashGpu.getBufferCL()); @@ -256,7 +256,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) { B3_PROFILE("kClearCellStart"); - b3LauncherCL launch(m_queue,kClearCellStart); + b3LauncherCL launch(m_queue,kClearCellStart,"kClearCellStart"); launch.setConst(numCells); launch.setBuffer(m_cellStartGpu.getBufferCL()); launch.launch1D(numCells); @@ -268,7 +268,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) { B3_PROFILE("kFindCellStart"); - b3LauncherCL launch(m_queue,kFindCellStart); + b3LauncherCL launch(m_queue,kFindCellStart,"kFindCellStart"); launch.setConst(numSmallAabbs); launch.setBuffer(m_hashGpu.getBufferCL()); launch.setBuffer(m_cellStartGpu.getBufferCL()); @@ -303,7 +303,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) pairStartCurGpu.copyFromHost(pairStartCpu); - b3LauncherCL launch(m_queue,kFindOverlappingPairs); + b3LauncherCL launch(m_queue,kFindOverlappingPairs,"kFindOverlappingPairs"); launch.setConst(numSmallAabbs); launch.setBuffer(m_smallAabbsGPU.getBufferCL()); launch.setBuffer(m_hashGpu.getBufferCL()); diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp index 233da4dd1..5ba4035ff 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp @@ -461,7 +461,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap() { B3_PROFILE("launch1D"); - b3LauncherCL launcher(m_queue, m_computePairsIncremental3dSapKernel); + b3LauncherCL launcher(m_queue, m_computePairsIncremental3dSapKernel,"m_computePairsIncremental3dSapKernel"); launcher.setBuffer(m_objectMinMaxIndexGPUaxis0.getBufferCL()); launcher.setBuffer(m_objectMinMaxIndexGPUaxis1.getBufferCL()); launcher.setBuffer(m_objectMinMaxIndexGPUaxis2.getBufferCL()); @@ -1040,7 +1040,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()), }; - b3LauncherCL launcher(m_queue, m_copyAabbsKernel ); + b3LauncherCL launcher(m_queue, m_copyAabbsKernel ,"m_copyAabbsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numSmallAabbs ); int num = numSmallAabbs; @@ -1063,7 +1063,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) m_sum2.at(numSmallAabbs)=b3MakeVector3(0,0,0); //slow? } - b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel ); + b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel ,"m_prepareSumVarianceKernel"); launcher.setBuffer(m_smallAabbsGPU.getBufferCL()); launcher.setBuffer(m_sum.getBufferCL()); launcher.setBuffer(m_sum2.getBufferCL()); @@ -1117,7 +1117,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) b3BufferInfoCL( m_largeAabbsGPU.getBufferCL()), }; - b3LauncherCL launcher(m_queue, m_copyAabbsKernel ); + b3LauncherCL launcher(m_queue, m_copyAabbsKernel ,"m_copyAabbsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numLargeAabbs ); int num = numLargeAabbs; @@ -1140,7 +1140,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) { B3_PROFILE("flipFloatKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), b3BufferInfoCL( m_gpuSmallSortData.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_flipFloatKernel ); + b3LauncherCL launcher(m_queue, m_flipFloatKernel ,"m_flipFloatKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numSmallAabbs ); launcher.setConst( axis ); @@ -1162,7 +1162,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) { B3_PROFILE("scatterKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), b3BufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_scatterKernel ); + b3LauncherCL launcher(m_queue, m_scatterKernel ,"m_scatterKernel "); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numSmallAabbs); int num = numSmallAabbs; @@ -1184,7 +1184,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) { B3_PROFILE("sap2Kernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_largeAabbsGPU.getBufferCL() ),b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sap2Kernel); + b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numLargeAabbs ); launcher.setConst( numSmallAabbs); @@ -1205,7 +1205,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) { B3_PROFILE("sapKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sapKernel); + b3LauncherCL launcher(m_queue, m_sapKernel,"m_sapKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numSmallAabbs ); launcher.setConst( axis ); diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index d927588c4..0968a45f5 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -2898,11 +2898,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { //printf("hostPairs[i].z=%d\n",hostPairs[i].z); - //int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, - // hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, - hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity, - oldHostContacts); + int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, + hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, + // hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity, + // oldHostContacts); if (contactIndex>=0) @@ -2954,7 +2954,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_primitiveContactsKernel); + b3LauncherCL launcher(m_queue, m_primitiveContactsKernel,"m_primitiveContactsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nPairs ); launcher.setConst(maxContactCapacity); @@ -3020,7 +3020,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel); + b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel,"m_findSeparatingAxisKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nPairs ); @@ -3043,7 +3043,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* numConcavePairs = m_numConcavePairsOut.at(0); - b3LauncherCL launcher(m_queue, m_bvhTraversalKernel); + b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); launcher.setBuffer( pairs->getBufferCL()); launcher.setBuffer( bodyBuf->getBufferCL()); launcher.setBuffer( gpuCollidables.getBufferCL()); @@ -3088,7 +3088,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( m_concaveSepNormals.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel); + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numConcavePairs ); @@ -3132,7 +3132,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL(bvhInfo->getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel); + b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel,"m_findCompoundPairsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nPairs ); launcher.setConst( compoundPairCapacity); @@ -3263,7 +3263,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel); + b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel,"m_processCompoundPairsPrimitivesKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numCompoundPairs ); launcher.setConst(maxContactCapacity); @@ -3302,7 +3302,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel); + b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel,"m_processCompoundPairsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numCompoundPairs ); @@ -3348,7 +3348,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel); + b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel,"m_findConcaveSphereContactsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numConcavePairs ); @@ -3406,7 +3406,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel); + b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numConcavePairs ); int num = numConcavePairs; @@ -3474,7 +3474,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( worldVertsB1GPU.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_findClippingFacesKernel); + b3LauncherCL launcher(m_queue, m_findClippingFacesKernel,"m_findClippingFacesKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( vertexFaceCapacity); launcher.setConst( nPairs ); @@ -3509,7 +3509,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_clipFacesAndContactReductionKernel); + b3LauncherCL launcher(m_queue, m_clipFacesAndContactReductionKernel,"m_clipFacesAndContactReductionKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(vertexFaceCapacity); @@ -3552,7 +3552,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_newContactReductionKernel); + b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(vertexFaceCapacity); launcher.setConst( nPairs ); @@ -3588,7 +3588,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_clipHullHullKernel); + b3LauncherCL launcher(m_queue, m_clipHullHullKernel,"m_clipHullHullKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nPairs ); launcher.setConst(maxContactCapacity); @@ -3625,7 +3625,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; - b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel); + b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel,"m_clipCompoundsHullHullKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nCompoundsPairs ); launcher.setConst(maxContactCapacity); diff --git a/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp index cd48cc990..a4980f71e 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp @@ -87,7 +87,7 @@ void b3BoundSearchCL::execute(b3OpenCLArray& src, int nSrc, b3OpenCL { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL()) }; - b3LauncherCL launcher( m_queue, m_lowerSortDataKernel ); + b3LauncherCL launcher( m_queue, m_lowerSortDataKernel,"m_lowerSortDataKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nSrc ); launcher.setConst( nDst ); @@ -98,7 +98,7 @@ void b3BoundSearchCL::execute(b3OpenCLArray& src, int nSrc, b3OpenCL { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) }; - b3LauncherCL launcher(m_queue, m_upperSortDataKernel ); + b3LauncherCL launcher(m_queue, m_upperSortDataKernel,"m_upperSortDataKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nSrc ); launcher.setConst( nDst ); @@ -122,7 +122,7 @@ void b3BoundSearchCL::execute(b3OpenCLArray& src, int nSrc, b3OpenCL { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_upper->getBufferCL(), true ), b3BufferInfoCL( m_lower->getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) }; - b3LauncherCL launcher( m_queue, m_subtractKernel ); + b3LauncherCL launcher( m_queue, m_subtractKernel ,"m_subtractKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nSrc ); launcher.setConst( nDst ); diff --git a/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp index 3379aa0af..f05c2648f 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp @@ -47,7 +47,7 @@ void b3FillCL::execute(b3OpenCLArray& src, const float value, int n, int b3Assert( n>0 ); { - b3LauncherCL launcher( m_commandQueue, m_fillFloatKernel ); + b3LauncherCL launcher( m_commandQueue, m_fillFloatKernel,"m_fillFloatKernel" ); launcher.setBuffer( src.getBufferCL()); launcher.setConst( n ); launcher.setConst( value ); @@ -63,7 +63,7 @@ void b3FillCL::execute(b3OpenCLArray& src, const int value, int n, int offs { - b3LauncherCL launcher( m_commandQueue, m_fillIntKernel ); + b3LauncherCL launcher( m_commandQueue, m_fillIntKernel ,"m_fillIntKernel"); launcher.setBuffer(src.getBufferCL()); launcher.setConst( n); launcher.setConst( value); @@ -80,7 +80,7 @@ void b3FillCL::execute(b3OpenCLArray& src, const unsigned int valu { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel ); + b3LauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel,"m_fillUnsignedIntKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( n ); launcher.setConst(value); @@ -114,7 +114,7 @@ void b3FillCL::execute(b3OpenCLArray &src, const b3Int2 &value, int n, i { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) }; - b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2); + b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2,"m_fillKernelInt2"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(n); launcher.setConst(value); diff --git a/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp new file mode 100644 index 000000000..ae53d25f7 --- /dev/null +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp @@ -0,0 +1,287 @@ +#include "b3LauncherCL.h" + +bool gDebugLauncherCL = false; + +b3LauncherCL::b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name) +:m_commandQueue(queue), +m_kernel(kernel), +m_idx(0), +m_enableSerialization(false), +m_name(name) +{ + if (gDebugLauncherCL) + { + static int counter = 0; + printf("[%d] Prepare to launch OpenCL kernel %s\n", counter++, name); + } + + m_serializationSizeInBytes = sizeof(int); +} + +b3LauncherCL::~b3LauncherCL() + { + for (int i=0;igetBufferCL()); + } + if (gDebugLauncherCL) + { + static int counter = 0; + printf("[%d] Finished launching OpenCL kernel %s [%d]\n", counter++,m_name); + } + } + +void b3LauncherCL::setBuffer( cl_mem clBuffer) +{ + if (m_enableSerialization) + { + b3KernelArgData kernelArg; + kernelArg.m_argIndex = m_idx; + kernelArg.m_isBuffer = 1; + kernelArg.m_clBuffer = clBuffer; + + cl_mem_info param_name = CL_MEM_SIZE; + size_t param_value; + size_t sizeInBytes = sizeof(size_t); + size_t actualSizeInBytes; + cl_int err; + err = clGetMemObjectInfo ( kernelArg.m_clBuffer, + param_name, + sizeInBytes, + ¶m_value, + &actualSizeInBytes); + + b3Assert( err == CL_SUCCESS ); + kernelArg.m_argSizeInBytes = param_value; + + m_kernelArguments.push_back(kernelArg); + m_serializationSizeInBytes+= sizeof(b3KernelArgData); + m_serializationSizeInBytes+=param_value; + } + cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &clBuffer); + b3Assert( status == CL_SUCCESS ); +} + + +void b3LauncherCL::setBuffers( b3BufferInfoCL* buffInfo, int n ) +{ + for(int i=0; im_isBuffer) + { + b3OpenCLArray* clData = new b3OpenCLArray(ctx,m_commandQueue, arg->m_argSizeInBytes); + clData->resize(arg->m_argSizeInBytes); + + clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes); + + arg->m_clBuffer = clData->getBufferCL(); + + m_arrays.push_back(clData); + + cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer); + b3Assert( status == CL_SUCCESS ); + index+=arg->m_argSizeInBytes; + } else + { + cl_int status = clSetKernelArg( m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData); + b3Assert( status == CL_SUCCESS ); + } + m_kernelArguments.push_back(*arg); + } +m_serializationSizeInBytes = index; + return index; +} + +int b3LauncherCL::validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx) + { + int index=0; + + int numArguments = *(int*) &goldBuffer[index]; + index+=sizeof(int); + + if (numArguments != m_kernelArguments.size()) + { + printf("failed validation: expected %d arguments, found %d\n",numArguments, m_kernelArguments.size()); + return -1; + } + + for (int ii=0;iim_argSizeInBytes) + { + printf("failed validation: argument %d sizeInBytes expected: %d, found %d\n",ii, argGold->m_argSizeInBytes, m_kernelArguments[ii].m_argSizeInBytes); + return -2; + } + + { + int expected = argGold->m_isBuffer; + int found = m_kernelArguments[ii].m_isBuffer; + + if (expected != found) + { + printf("failed validation: argument %d isBuffer expected: %d, found %d\n",ii,expected, found); + return -3; + } + } + index+=sizeof(b3KernelArgData); + + if (argGold->m_isBuffer) + { + + unsigned char* memBuf= (unsigned char*) malloc(m_kernelArguments[ii].m_argSizeInBytes); + unsigned char* goldBuf = &goldBuffer[index]; + for (int j=0;jm_argSizeInBytes; + } else + { + + //compare content + for (int b=0;bm_argData[b]; + int found =m_kernelArguments[ii].m_argData[b]; + if (expected != found) + { + printf("failed validation: argument %d const data at byte position %d expected: %d, found %d\n", + ii, b, expected, found); + return -5; + } + } + + } + } + return index; + +} + +int b3LauncherCL::serializeArguments(unsigned char* destBuffer, int destBufferCapacity) +{ +//initialize to known values +for (int i=0;i=m_serializationSizeInBytes); + + //todo: use the b3Serializer for this to allow for 32/64bit, endianness etc + int numArguments = m_kernelArguments.size(); + int curBufferSize = 0; + int* dest = (int*)&destBuffer[curBufferSize]; + *dest = numArguments; + curBufferSize += sizeof(int); + + + + for (int i=0;im_kernelArguments.size();i++) + { + b3KernelArgData* arg = (b3KernelArgData*) &destBuffer[curBufferSize]; + *arg = m_kernelArguments[i]; + curBufferSize+=sizeof(b3KernelArgData); + if (arg->m_isBuffer==1) + { + //copy the OpenCL buffer content + cl_int status = 0; + status = clEnqueueReadBuffer( m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes, + &destBuffer[curBufferSize], 0,0,0 ); + b3Assert( status==CL_SUCCESS ); + clFinish(m_commandQueue); + curBufferSize+=arg->m_argSizeInBytes; + } + + } + return curBufferSize; +} + +void b3LauncherCL::serializeToFile(const char* fileName, int numWorkItems) +{ + int num = numWorkItems; + int buffSize = getSerializationBufferSize(); + unsigned char* buf = new unsigned char[buffSize+sizeof(int)]; + for (int i=0;i* > m_arrays; - b3LauncherCL(cl_command_queue queue, cl_kernel kernel) - :m_commandQueue(queue), - m_kernel(kernel), - m_idx(0), - m_enableSerialization(false) - { - m_serializationSizeInBytes = sizeof(int); - } + b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name); - virtual ~b3LauncherCL() - { - for (int i=0;igetBufferCL()); - } - } + virtual ~b3LauncherCL(); + + void setBuffer( cl_mem clBuffer); - inline void setBuffer( cl_mem clBuffer) - { - if (m_enableSerialization) - { - b3KernelArgData kernelArg; - kernelArg.m_argIndex = m_idx; - kernelArg.m_isBuffer = 1; - kernelArg.m_clBuffer = clBuffer; - - cl_mem_info param_name = CL_MEM_SIZE; - size_t param_value; - size_t sizeInBytes = sizeof(size_t); - size_t actualSizeInBytes; - cl_int err; - err = clGetMemObjectInfo ( kernelArg.m_clBuffer, - param_name, - sizeInBytes, - ¶m_value, - &actualSizeInBytes); - - b3Assert( err == CL_SUCCESS ); - kernelArg.m_argSizeInBytes = param_value; - - m_kernelArguments.push_back(kernelArg); - m_serializationSizeInBytes+= sizeof(b3KernelArgData); - m_serializationSizeInBytes+=param_value; - } - cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &clBuffer); - b3Assert( status == CL_SUCCESS ); - } - - - inline void setBuffers( b3BufferInfoCL* buffInfo, int n ) - { - for(int i=0; im_isBuffer) - { - b3OpenCLArray* clData = new b3OpenCLArray(ctx,m_commandQueue, arg->m_argSizeInBytes); - clData->resize(arg->m_argSizeInBytes); - - clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes); - - arg->m_clBuffer = clData->getBufferCL(); - - m_arrays.push_back(clData); - - cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer); - b3Assert( status == CL_SUCCESS ); - index+=arg->m_argSizeInBytes; - } else - { - cl_int status = clSetKernelArg( m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData); - b3Assert( status == CL_SUCCESS ); - } - m_kernelArguments.push_back(*arg); - } - m_serializationSizeInBytes = index; - return index; - } - - inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx) - { - int index=0; - - int numArguments = *(int*) &goldBuffer[index]; - index+=sizeof(int); - - if (numArguments != m_kernelArguments.size()) - { - printf("failed validation: expected %d arguments, found %d\n",numArguments, m_kernelArguments.size()); - return -1; - } - - for (int ii=0;iim_argSizeInBytes) - { - printf("failed validation: argument %d sizeInBytes expected: %d, found %d\n",ii, argGold->m_argSizeInBytes, m_kernelArguments[ii].m_argSizeInBytes); - return -2; - } - - { - int expected = argGold->m_isBuffer; - int found = m_kernelArguments[ii].m_isBuffer; - - if (expected != found) - { - printf("failed validation: argument %d isBuffer expected: %d, found %d\n",ii,expected, found); - return -3; - } - } - index+=sizeof(b3KernelArgData); - - if (argGold->m_isBuffer) - { - - unsigned char* memBuf= (unsigned char*) malloc(m_kernelArguments[ii].m_argSizeInBytes); - unsigned char* goldBuf = &goldBuffer[index]; - for (int j=0;jm_argSizeInBytes; - } else - { - - //compare content - for (int b=0;bm_argData[b]; - int found =m_kernelArguments[ii].m_argData[b]; - if (expected != found) - { - printf("failed validation: argument %d const data at byte position %d expected: %d, found %d\n", - ii, b, expected, found); - return -5; - } - } - - } - } - return index; - - } - - inline int serializeArguments(unsigned char* destBuffer, int destBufferCapacity) - { - //initialize to known values - for (int i=0;i=m_serializationSizeInBytes); - - //todo: use the b3Serializer for this to allow for 32/64bit, endianness etc - int numArguments = m_kernelArguments.size(); - int curBufferSize = 0; - int* dest = (int*)&destBuffer[curBufferSize]; - *dest = numArguments; - curBufferSize += sizeof(int); - - - - for (int i=0;im_kernelArguments.size();i++) - { - b3KernelArgData* arg = (b3KernelArgData*) &destBuffer[curBufferSize]; - *arg = m_kernelArguments[i]; - curBufferSize+=sizeof(b3KernelArgData); - if (arg->m_isBuffer==1) - { - //copy the OpenCL buffer content - cl_int status = 0; - status = clEnqueueReadBuffer( m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes, - &destBuffer[curBufferSize], 0,0,0 ); - b3Assert( status==CL_SUCCESS ); - clFinish(m_commandQueue); - curBufferSize+=arg->m_argSizeInBytes; - } - - } - return curBufferSize; - } - - void serializeToFile(const char* fileName, int numWorkItems) - { - int num = numWorkItems; - int buffSize = getSerializationBufferSize(); - unsigned char* buf = new unsigned char[buffSize+sizeof(int)]; - for (int i=0;i inline void setConst( const T& consts ) diff --git a/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp index 6b64ad336..42cd19774 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp @@ -63,7 +63,7 @@ void b3PrefixScanCL::execute(b3OpenCLArray& src, b3OpenCLArraygetBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_localScanKernel ); + b3LauncherCL launcher( m_commandQueue, m_localScanKernel,"m_localScanKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( constBuffer ); launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE ); @@ -72,7 +72,7 @@ void b3PrefixScanCL::execute(b3OpenCLArray& src, b3OpenCLArraygetBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_blockSumKernel ); + b3LauncherCL launcher( m_commandQueue, m_blockSumKernel,"m_blockSumKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( constBuffer ); launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE ); @@ -82,7 +82,7 @@ void b3PrefixScanCL::execute(b3OpenCLArray& src, b3OpenCLArray 1 ) { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_propagationKernel ); + b3LauncherCL launcher( m_commandQueue, m_propagationKernel,"m_propagationKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( constBuffer ); launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE ); diff --git a/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp index 8512b7fa6..80560d793 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp @@ -63,7 +63,7 @@ void b3PrefixScanFloat4CL::execute(b3OpenCLArray& src, b3OpenCLArray< { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_localScanKernel ); + b3LauncherCL launcher( m_commandQueue, m_localScanKernel ,"m_localScanKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( constBuffer ); launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE ); @@ -72,7 +72,7 @@ void b3PrefixScanFloat4CL::execute(b3OpenCLArray& src, b3OpenCLArray< { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_blockSumKernel ); + b3LauncherCL launcher( m_commandQueue, m_blockSumKernel ,"m_blockSumKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( constBuffer ); launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE ); @@ -82,7 +82,7 @@ void b3PrefixScanFloat4CL::execute(b3OpenCLArray& src, b3OpenCLArray< if( numBlocks > 1 ) { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_propagationKernel ); + b3LauncherCL launcher( m_commandQueue, m_propagationKernel ,"m_propagationKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( constBuffer ); launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE ); diff --git a/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp index 4f722edc0..f11ae4bcd 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp @@ -294,7 +294,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray& keyValuesInOut, int sor if (src->size()) { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) }; - b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel); + b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel,"m_streamCountSortDataKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); @@ -328,7 +328,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray& keyValuesInOut, int sor if (fastScan) {// prefix scan group histogram b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel ); + b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel,"m_prefixScanKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( 128, 128 ); @@ -362,7 +362,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray& keyValuesInOut, int sor if (src->size()) {// local sort and distribute b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )}; - b3LauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel ); + b3LauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel,"m_sortAndScatterSortDataKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); @@ -641,7 +641,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray& keysInOut, int sortBi if (src->size()) { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) }; - b3LauncherCL launcher(m_commandQueue, m_streamCountKernel); + b3LauncherCL launcher(m_commandQueue, m_streamCountKernel,"m_streamCountKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); @@ -662,7 +662,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray& keysInOut, int sortBi if (fastScan) {// prefix scan group histogram b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel ); + b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel,"m_prefixScanKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( 128, 128 ); @@ -676,7 +676,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray& keysInOut, int sortBi if (src->size()) {// local sort and distribute b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )}; - b3LauncherCL launcher( m_commandQueue, m_sortAndScatterKernel ); + b3LauncherCL launcher( m_commandQueue, m_sortAndScatterKernel ,"m_sortAndScatterKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp index 6edc6dbaa..c1b865bf8 100644 --- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp @@ -230,7 +230,7 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray& rays, b3Align { B3_PROFILE("raycast launch1D"); - b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel); + b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel"); int numRays = rays.size(); launcher.setConst(numRays); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp index 3fe85b187..74fca91b5 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp @@ -790,7 +790,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m { B3_PROFILE("m_countBodiesKernel"); - b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel); + b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel,"m_countBodiesKernel"); launcher.setBuffer(contactBuf);//manifoldPtr->getBufferCL()); launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); @@ -815,7 +815,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m { B3_PROFILE("contactToConstraintSplitKernel"); - b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel); + b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel,"m_contactToConstraintSplitKernel"); launcher.setBuffer(contactBuf); launcher.setBuffer(bodyBuf); launcher.setBuffer(inertiaBuf); @@ -840,11 +840,12 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m { B3_PROFILE("m_clearVelocitiesKernel"); - b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel); + b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel,"m_clearVelocitiesKernel"); launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); launch.setConst(totalNumSplitBodies); launch.launch1D(totalNumSplitBodies); + clFinish(m_queue); } @@ -854,7 +855,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m { { B3_PROFILE("m_solveContactKernel"); - b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel ); + b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel,"m_solveContactKernel" ); launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); launcher.setBuffer(bodyBuf); launcher.setBuffer(inertiaBuf); @@ -869,14 +870,14 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m launcher.setConst(numManifolds); launcher.launch1D(numManifolds); - + clFinish(m_queue); } { B3_PROFILE("average velocities"); - b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); + b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel,"m_averageVelocitiesKernel"); launcher.setBuffer(bodyBuf); launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); @@ -884,13 +885,13 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); launcher.setConst(numBodies); launcher.launch1D(numBodies); - + clFinish(m_queue); } { B3_PROFILE("m_solveFrictionKernel"); - b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel); + b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel,"m_solveFrictionKernel"); launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); launcher.setBuffer(bodyBuf); launcher.setBuffer(inertiaBuf); @@ -905,13 +906,13 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m launcher.setConst(numManifolds); launcher.launch1D(numManifolds); - + clFinish(m_queue); } { B3_PROFILE("average velocities"); - b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); + b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel,"m_averageVelocitiesKernel"); launcher.setBuffer(bodyBuf); launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); @@ -919,7 +920,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); launcher.setConst(numBodies); launcher.launch1D(numBodies); - + clFinish(m_queue); } @@ -929,7 +930,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m { B3_PROFILE("update body velocities"); - b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel); + b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel,"m_updateBodyVelocitiesKernel"); launcher.setBuffer(bodyBuf); launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp index 95ce234cb..e0bcdf52c 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp @@ -231,7 +231,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue,m_gpuData->m_initSolverBodiesKernel); + b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initSolverBodiesKernel,"m_initSolverBodiesKernel"); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setConst(numBodies); @@ -280,7 +280,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue,m_gpuData->m_getInfo1Kernel); + b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo1Kernel,"m_getInfo1Kernel"); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setConst(numConstraints); @@ -300,7 +300,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue,m_gpuData->m_initBatchConstraintsKernel); + b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initBatchConstraintsKernel,"m_initBatchConstraintsKernel"); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); @@ -348,7 +348,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArraym_queue,m_gpuData->m_getInfo2Kernel); + b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo2Kernel,"m_getInfo2Kernel"); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); @@ -759,7 +759,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArr int numConstraintsInBatch*/ - b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_solveJointConstraintRowsKernels); + b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_solveJointConstraintRowsKernels,"m_solveJointConstraintRowsKernels"); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); @@ -1040,7 +1040,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArraym_queue,m_gpuData->m_breakViolatedConstraintsKernel); + b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_breakViolatedConstraintsKernel,"m_breakViolatedConstraintsKernel"); launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); @@ -1090,7 +1090,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArraym_queue,m_gpuData->m_writeBackVelocitiesKernel); + b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_writeBackVelocitiesKernel,"m_writeBackVelocitiesKernel"); launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setConst(numBodies); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp index cb4e4ee3e..6a32273be 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp @@ -1,32 +1,16 @@ -//#define USE_CPU -#ifdef USE_CPU - bool b3GpuBatchContacts = false; - bool b3GpuSolveConstraint = false; - bool gpuRadixSort=false; - bool gpuSetSortData = false; - bool optionalSortContactsDeterminism = true; - bool gpuSortContactsDeterminism = false; - bool useCpuCopyConstraints = true; +bool gCpuBatchContacts = false; +bool gCpuSolveConstraint = false; +bool gCpuRadixSort=false; +bool gCpuSetSortData = false; +bool gCpuSortContactsDeterminism = false; +bool gUseCpuCopyConstraints = false; +bool gUseScanHost = false; +bool gReorderContactsOnCpu = false; - bool useScanHost = true; - bool reorderContactsOnCpu = true; +bool optionalSortContactsDeterminism = true; -#else - bool b3GpuBatchContacts = true; - bool b3GpuSolveConstraint = true; - bool gpuRadixSort=true; - bool gpuSetSortData = true; - - bool optionalSortContactsDeterminism = true; - bool gpuSortContactsDeterminism = true; - bool useCpuCopyConstraints = false; - - bool useScanHost = false; - bool reorderContactsOnCpu = false; - -#endif #include "b3GpuPgsContactSolver.h" #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" @@ -336,7 +320,7 @@ void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArraym_queue, m_data->m_solveContactKernel ); + b3LauncherCL launcher( m_data->m_queue, m_data->m_solveContactKernel,"m_solveContactKernel" ); #if 1 b3BufferInfoCL bInfo[] = { @@ -441,7 +425,7 @@ void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArraym_queue, m_data->m_solveFrictionKernel ); + b3LauncherCL launcher( m_data->m_queue, m_data->m_solveFrictionKernel,"m_solveFrictionKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); //launcher.setConst( cdata.x ); launcher.setConst( cdata.y ); @@ -598,7 +582,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem if (optionalSortContactsDeterminism) { - if (gpuSortContactsDeterminism) + if (!gCpuSortContactsDeterminism) { B3_PROFILE("GPU Sort contact constraints (determinism)"); @@ -608,7 +592,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0); { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel); + b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); @@ -616,7 +600,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel); + b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); @@ -624,7 +608,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel); + b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); @@ -634,7 +618,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel); + b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); @@ -651,7 +635,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem //b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) // , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; - b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel); + b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); @@ -755,7 +739,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem b3OpenCLArray* offsetsNative = m_data->m_solverGPU->m_offsets; - if (gpuSetSortData) + if (!gCpuSetSortData) { // 2. set cell idx B3_PROFILE("GPU set cell idx"); struct CB @@ -779,7 +763,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel ); + b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata.m_nContacts ); launcher.setConst( cdata.m_scale ); @@ -812,7 +796,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem - if (gpuRadixSort) + if (!gCpuRadixSort) { // 3. sort by cell idx B3_PROFILE("gpuRadixSort"); //int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; @@ -836,7 +820,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem } - if (useScanHost) + if (gUseScanHost) { // 4. find entries B3_PROFILE("cpuBoundSearch"); @@ -879,7 +863,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem if (nContacts) { // 5. sort constraints by cellIdx - if (reorderContactsOnCpu) + if (gReorderContactsOnCpu) { B3_PROFILE("cpu m_reorderContactKernel"); b3AlignedObjectArray sortDataHost; @@ -918,7 +902,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; - b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel); + b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); @@ -944,7 +928,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem if (nContacts) { - if (useCpuCopyConstraints) + if (gUseCpuCopyConstraints) { for (int i=0;im_pBufContactOutGPU->getBufferCL() ) }; - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel ); + b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); @@ -975,7 +959,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem bool compareGPU = false; if (nContacts) { - if (b3GpuBatchContacts) + if (!gCpuBatchContacts) { B3_PROFILE("gpu batchContacts"); maxNumBatches = 150;//250; @@ -984,10 +968,12 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem } else { B3_PROFILE("cpu batchContacts"); - b3AlignedObjectArray cpuContacts; + static b3AlignedObjectArray cpuContacts; b3OpenCLArray* contactsIn = m_data->m_solverGPU->m_contactBuffer2; - contactsIn->copyToHost(cpuContacts); - + { + B3_PROFILE("copyToHost"); + contactsIn->copyToHost(cpuContacts); + } b3OpenCLArray* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray* offsetsNative = m_data->m_solverGPU->m_offsets; @@ -1025,7 +1011,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem } } - clFinish(m_data->m_queue); + //clFinish(m_data->m_queue); } { B3_PROFILE("m_contactBuffer->copyFromHost"); @@ -1063,7 +1049,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem int numIter = 4; m_data->m_solverGPU->m_nIterations = numIter;//10 - if (b3GpuSolveConstraint) + if (!gCpuSolveConstraint) { B3_PROFILE("GPU solveContactConstraint"); diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index 145fbf22a..a9a79a9c5 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -33,27 +33,15 @@ subject to the following restrictions: #define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" #define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" +bool useBullet2CpuSolver = true; + //choice of contact solver -bool useJacobi = false; - -//#define USE_CPU -#ifdef USE_CPU - bool useDbvt = true; - bool useBullet2CpuSolver = true; - bool dumpContactStats = false; - bool calcWorldSpaceAabbOnCpu = true; - bool useCalculateOverlappingPairsHost = true; - bool integrateOnCpu = true; - -#else - bool useDbvt = false; - bool useBullet2CpuSolver = true; - bool dumpContactStats = false; - bool calcWorldSpaceAabbOnCpu = false;//true; - bool useCalculateOverlappingPairsHost = false; - bool integrateOnCpu = false; - -#endif +bool gUseJacobi = false; +bool gUseDbvt = false; +bool gDumpContactStats = false; +bool gCalcWorldSpaceAabbOnCpu = false; +bool gUseCalculateOverlappingPairsHost = false; +bool gIntegrateOnCpu = false; #define TEST_OTHER_GPU_SOLVER 1 #ifdef TEST_OTHER_GPU_SOLVER @@ -241,7 +229,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) //compute overlapping pairs { - if (useDbvt) + if (gUseDbvt) { { B3_PROFILE("setAabb"); @@ -261,7 +249,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs(); } else { - if (useCalculateOverlappingPairsHost) + if (gUseCalculateOverlappingPairsHost) { m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs); } else @@ -284,7 +272,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) { cl_mem pairs =0; cl_mem aabbsWS =0; - if (useDbvt) + if (gUseDbvt) { B3_PROFILE("m_overlappingPairsGPU->copyFromHost"); m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray()); @@ -300,13 +288,13 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) m_data->m_narrowphase->computeContacts(pairs,numPairs,aabbsWS,numBodies); numContacts = m_data->m_narrowphase->getNumContactsGpu(); - if (useDbvt) + if (gUseDbvt) { ///store the cached information (contact locations in the 'z' component) B3_PROFILE("m_overlappingPairsGPU->copyToHost"); m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray()); } - if (dumpContactStats && numContacts) + if (gDumpContactStats && numContacts) { m_data->m_narrowphase->getContactsGpu(); @@ -369,7 +357,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) #ifdef TEST_OTHER_GPU_SOLVER - if (useJacobi) + if (gUseJacobi) { bool useGpu = true; if (useGpu) @@ -453,7 +441,7 @@ void b3GpuRigidBodyPipeline::integrate(float timeStep) int numBodies = m_data->m_narrowphase->getNumRigidBodies(); float angularDamp = 0.99f; - if (integrateOnCpu) + if (gIntegrateOnCpu) { if(numBodies) { @@ -470,7 +458,7 @@ void b3GpuRigidBodyPipeline::integrate(float timeStep) } } else { - b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel); + b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel,"m_integrateTransformsKernel"); launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu()); launcher.setConst(numBodies); @@ -492,12 +480,12 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull() if (!numBodies) return; - if (calcWorldSpaceAabbOnCpu) + if (gCalcWorldSpaceAabbOnCpu) { if (numBodies) { - if (useDbvt) + if (gUseDbvt) { m_data->m_allAabbsCPU.resize(numBodies); m_data->m_narrowphase->readbackAllBodiesToCpu(); @@ -521,7 +509,7 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull() } else { //__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB) - b3LauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel); + b3LauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel,"m_updateAabbsKernel"); launcher.setConst(numBodies); cl_mem bodies = m_data->m_narrowphase->getBodiesGpu(); launcher.setBuffer(bodies); @@ -531,7 +519,7 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull() launcher.setBuffer(localAabbs); cl_mem worldAabbs =0; - if (useDbvt) + if (gUseDbvt) { worldAabbs = m_data->m_allAabbsGPU->getBufferCL(); } else @@ -624,7 +612,7 @@ int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* po if (bodyIndex>=0) { - if (useDbvt) + if (gUseDbvt) { m_data->m_broadphaseDbvt->createProxy(aabbMin,aabbMax,bodyIndex,0,1,1); b3SapAabb aabb; diff --git a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp index 1df89cfea..1d313aded 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -18,7 +18,7 @@ subject to the following restrictions: ///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments bool useNewBatchingKernel = true; -bool convertConstraintOnCpu = false; +bool gConvertConstraintOnCpu = false; #define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" #define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" @@ -824,7 +824,7 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray* body cdata.z = ib; - b3LauncherCL launcher( m_queue, m_solveContactKernel ); + b3LauncherCL launcher( m_queue, m_solveContactKernel ,"m_solveContactKernel"); #if 1 b3BufferInfoCL bInfo[] = { @@ -929,7 +929,7 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray* body ,b3BufferInfoCL(&gpuDebugInfo) #endif //DEBUG_ME }; - b3LauncherCL launcher( m_queue, m_solveFrictionKernel ); + b3LauncherCL launcher( m_queue, m_solveFrictionKernel,"m_solveFrictionKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); //launcher.setConst( cdata.x ); launcher.setConst( cdata.y ); @@ -979,7 +979,7 @@ void b3Solver::convertToConstraints( const b3OpenCLArray* bodyBuf cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff; - if (convertConstraintOnCpu) + if (gConvertConstraintOnCpu) { b3AlignedObjectArray gBodies; bodyBuf->copyToHost(gBodies); @@ -1031,7 +1031,7 @@ void b3Solver::convertToConstraints( const b3OpenCLArray* bodyBuf b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()), b3BufferInfoCL( contactCOut->getBufferCL() )}; - b3LauncherCL launcher( m_queue, m_contactToConstraintKernel ); + b3LauncherCL launcher( m_queue, m_contactToConstraintKernel,"m_contactToConstraintKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); //launcher.setConst( cdata ); @@ -1169,7 +1169,7 @@ void b3Solver::batchContacts( b3OpenCLArray* contacts, int nContact //b3LauncherCL launcher( m_queue, m_batchingKernel); cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel; - b3LauncherCL launcher( m_queue, k); + b3LauncherCL launcher( m_queue, k,"*batchingKernel"); if (!useNewBatchingKernel ) { launcher.setBuffer( contacts->getBufferCL() ); diff --git a/test/OpenCL/KernelLaunch/main.cpp b/test/OpenCL/KernelLaunch/main.cpp index fe44a4d49..1d471dcaf 100644 --- a/test/OpenCL/KernelLaunch/main.cpp +++ b/test/OpenCL/KernelLaunch/main.cpp @@ -186,7 +186,7 @@ int main(int argc, char* argv[]) atomicCounter.push_back(0); deviceElements.resize(numWorkItems); - b3LauncherCL run(queue,testKernel); + b3LauncherCL run(queue,testKernel,"testKernel"); run.setBuffer(deviceElements.getBufferCL()); run.setBuffer(deviceContacts.getBufferCL()); run.setBuffer(atomicCounter.getBufferCL()); diff --git a/test/OpenCL/KernelLaunch/premake4.lua b/test/OpenCL/KernelLaunch/premake4.lua index dda79ace5..78882af96 100644 --- a/test/OpenCL/KernelLaunch/premake4.lua +++ b/test/OpenCL/KernelLaunch/premake4.lua @@ -22,6 +22,7 @@ function createProject(vendor) "../../../src/Bullet3Common/b3AlignedAllocator.cpp", "../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h", "../../../src/Bullet3Common/b3Logging.cpp", + "../../../src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp" } end diff --git a/test/OpenCL/ParallelPrimitives/premake4.lua b/test/OpenCL/ParallelPrimitives/premake4.lua index 78f9634ae..680c9e363 100644 --- a/test/OpenCL/ParallelPrimitives/premake4.lua +++ b/test/OpenCL/ParallelPrimitives/premake4.lua @@ -27,6 +27,7 @@ function createProject(vendor) "../../../src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h", "../../../src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp", "../../../src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h", + "../../../src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp", "../../../src/Bullet3Common/b3AlignedAllocator.cpp", "../../../src/Bullet3Common/b3AlignedAllocator.h", "../../../src/Bullet3Common/b3AlignedObjectArray.h", diff --git a/test/OpenCL/RadixSortBenchmark/premake4.lua b/test/OpenCL/RadixSortBenchmark/premake4.lua index 2b9e600be..a7da0b753 100644 --- a/test/OpenCL/RadixSortBenchmark/premake4.lua +++ b/test/OpenCL/RadixSortBenchmark/premake4.lua @@ -24,6 +24,7 @@ function createProject(vendor) "../../../src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp", "../../../src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp", "../../../src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp", + "../../../src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp", "../../../src/Bullet3Common/b3AlignedAllocator.cpp", "../../../src/Bullet3Common/b3AlignedAllocator.h", "../../../src/Bullet3Common/b3AlignedObjectArray.h",