From 596cc951636e6672416a5a5037a75c834326ff35 Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Wed, 19 Jun 2013 14:35:16 -0700 Subject: [PATCH 01/11] move files to correct location (case sensitive issue) w --- {demos3 => Demos3}/BasicGpuDemo/b3GpuDynamicsWorld.cpp | 0 {demos3 => Demos3}/BasicGpuDemo/b3GpuDynamicsWorld.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename {demos3 => Demos3}/BasicGpuDemo/b3GpuDynamicsWorld.cpp (100%) rename {demos3 => Demos3}/BasicGpuDemo/b3GpuDynamicsWorld.h (100%) diff --git a/demos3/BasicGpuDemo/b3GpuDynamicsWorld.cpp b/Demos3/BasicGpuDemo/b3GpuDynamicsWorld.cpp similarity index 100% rename from demos3/BasicGpuDemo/b3GpuDynamicsWorld.cpp rename to Demos3/BasicGpuDemo/b3GpuDynamicsWorld.cpp diff --git a/demos3/BasicGpuDemo/b3GpuDynamicsWorld.h b/Demos3/BasicGpuDemo/b3GpuDynamicsWorld.h similarity index 100% rename from demos3/BasicGpuDemo/b3GpuDynamicsWorld.h rename to Demos3/BasicGpuDemo/b3GpuDynamicsWorld.h From 7561e6a4f9ea69018bba48ddb99c8d0f9ff5d18f Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Wed, 19 Jun 2013 14:54:28 -0700 Subject: [PATCH 02/11] move build to build3 to avoid naming conflict with Bullet 2.x fix build error in BasicGpuDemo Thanks to joen66 for the report here: https://github.com/erwincoumans/bullet3/issues/5 --- Demos3/BasicGpuDemo/b3GpuDynamicsWorld.cpp | 3 ++- {build => build3}/bin2cpp.bat | 0 {build => build3}/bin2cpp.lua | 0 {build => build3}/findDirectX11.lua | 0 {build => build3}/findOpenCL.lua | 0 {build => build3}/findOpenGLGlewGlut.lua | 0 {build => build3}/premake4.exe | Bin {build => build3}/premake4.lua | 0 {build => build3}/premake4_linux | Bin {build => build3}/premake4_linux64 | Bin {build => build3}/premake4_osx | Bin {build => build3}/stringify.bat | 0 {build => build3}/stringifyKernel.lua | 0 {build => build3}/stringify_linux.sh | 0 {build => build3}/stringify_osx.sh | 0 {build => build3}/vs2010.bat | 0 {build => build3}/vs2010_bullet2gpu.bat | 0 {build => build3}/xcode.command | 0 18 files changed, 2 insertions(+), 1 deletion(-) rename {build => build3}/bin2cpp.bat (100%) rename {build => build3}/bin2cpp.lua (100%) rename {build => build3}/findDirectX11.lua (100%) rename {build => build3}/findOpenCL.lua (100%) rename {build => build3}/findOpenGLGlewGlut.lua (100%) rename {build => build3}/premake4.exe (100%) rename {build => build3}/premake4.lua (100%) rename {build => build3}/premake4_linux (100%) rename {build => build3}/premake4_linux64 (100%) rename {build => build3}/premake4_osx (100%) rename {build => build3}/stringify.bat (100%) rename {build => build3}/stringifyKernel.lua (100%) rename {build => build3}/stringify_linux.sh (100%) rename {build => build3}/stringify_osx.sh (100%) rename {build => build3}/vs2010.bat (100%) rename {build => build3}/vs2010_bullet2gpu.bat (100%) rename {build => build3}/xcode.command (100%) diff --git a/Demos3/BasicGpuDemo/b3GpuDynamicsWorld.cpp b/Demos3/BasicGpuDemo/b3GpuDynamicsWorld.cpp index 34779a429..3beb3bc4f 100644 --- a/Demos3/BasicGpuDemo/b3GpuDynamicsWorld.cpp +++ b/Demos3/BasicGpuDemo/b3GpuDynamicsWorld.cpp @@ -145,7 +145,8 @@ int b3GpuDynamicsWorld::stepSimulation( btScalar timeStepUnused, int maxSubStep if (body) { b3Vector3 pos = (const b3Vector3&)m_collisionObjects[i]->getWorldTransform().getOrigin(); - b3Quaternion orn = (const b3Quaternion&)m_collisionObjects[i]->getWorldTransform().getRotation(); + btQuaternion orn2 = m_collisionObjects[i]->getWorldTransform().getRotation(); + b3Quaternion orn(orn2[0],orn2[1],orn2[2],orn2[3]); body->integrateVelocities(fixedTimeStep); m_np->setObjectTransformCpu(&pos[0],&orn[0],i); b3Vector3 linVel = (const b3Vector3&)body->getLinearVelocity(); diff --git a/build/bin2cpp.bat b/build3/bin2cpp.bat similarity index 100% rename from build/bin2cpp.bat rename to build3/bin2cpp.bat diff --git a/build/bin2cpp.lua b/build3/bin2cpp.lua similarity index 100% rename from build/bin2cpp.lua rename to build3/bin2cpp.lua diff --git a/build/findDirectX11.lua b/build3/findDirectX11.lua similarity index 100% rename from build/findDirectX11.lua rename to build3/findDirectX11.lua diff --git a/build/findOpenCL.lua b/build3/findOpenCL.lua similarity index 100% rename from build/findOpenCL.lua rename to build3/findOpenCL.lua diff --git a/build/findOpenGLGlewGlut.lua b/build3/findOpenGLGlewGlut.lua similarity index 100% rename from build/findOpenGLGlewGlut.lua rename to build3/findOpenGLGlewGlut.lua diff --git a/build/premake4.exe b/build3/premake4.exe similarity index 100% rename from build/premake4.exe rename to build3/premake4.exe diff --git a/build/premake4.lua b/build3/premake4.lua similarity index 100% rename from build/premake4.lua rename to build3/premake4.lua diff --git a/build/premake4_linux b/build3/premake4_linux similarity index 100% rename from build/premake4_linux rename to build3/premake4_linux diff --git a/build/premake4_linux64 b/build3/premake4_linux64 similarity index 100% rename from build/premake4_linux64 rename to build3/premake4_linux64 diff --git a/build/premake4_osx b/build3/premake4_osx similarity index 100% rename from build/premake4_osx rename to build3/premake4_osx diff --git a/build/stringify.bat b/build3/stringify.bat similarity index 100% rename from build/stringify.bat rename to build3/stringify.bat diff --git a/build/stringifyKernel.lua b/build3/stringifyKernel.lua similarity index 100% rename from build/stringifyKernel.lua rename to build3/stringifyKernel.lua diff --git a/build/stringify_linux.sh b/build3/stringify_linux.sh similarity index 100% rename from build/stringify_linux.sh rename to build3/stringify_linux.sh diff --git a/build/stringify_osx.sh b/build3/stringify_osx.sh similarity index 100% rename from build/stringify_osx.sh rename to build3/stringify_osx.sh diff --git a/build/vs2010.bat b/build3/vs2010.bat similarity index 100% rename from build/vs2010.bat rename to build3/vs2010.bat diff --git a/build/vs2010_bullet2gpu.bat b/build3/vs2010_bullet2gpu.bat similarity index 100% rename from build/vs2010_bullet2gpu.bat rename to build3/vs2010_bullet2gpu.bat diff --git a/build/xcode.command b/build3/xcode.command similarity index 100% rename from build/xcode.command rename to build3/xcode.command From 221a8cf46350fabb18f935f4711b21c0da71793e Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Wed, 19 Jun 2013 15:03:29 -0700 Subject: [PATCH 03/11] move binary to bin folder --- Demos3/premake4.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Demos3/premake4.lua b/Demos3/premake4.lua index d87300b6b..38cde6f93 100644 --- a/Demos3/premake4.lua +++ b/Demos3/premake4.lua @@ -56,7 +56,7 @@ function createGpuDemos( demos, incdirs, linknames, vendor) initOpenCL(vendor) kind "ConsoleApp" - targetdir ".." + targetdir "../bin" links {"Bullet3OpenCL_" .. vendor } From fa4394c378a0a30b4ceba1f26a8e7747d8a0128c Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Wed, 19 Jun 2013 23:14:02 -0700 Subject: [PATCH 04/11] remove bullet2.patch, the file is not necessary anymore. You can use Bullet 3.x standalone, or you can use Bullet3+Bullet2 by checking out the source code of both repositories at the same location, and then use premake --bullet2gpu gmake (or vs2010, xcode4) to generate the combined project, with App_BasicGpuDemo_* test app. --- bullet2.patch | 486 -------------------------------------------------- 1 file changed, 486 deletions(-) delete mode 100644 bullet2.patch diff --git a/bullet2.patch b/bullet2.patch deleted file mode 100644 index 64c1e3ec7..000000000 --- a/bullet2.patch +++ /dev/null @@ -1,486 +0,0 @@ -Index: build/findOpenCL.lua -=================================================================== ---- build/findOpenCL.lua (revision 2640) -+++ build/findOpenCL.lua (working copy) -@@ -1,5 +1,14 @@ -- -- todo: add Apple OpenCL environment vars - -+ -+ function findOpenCL_Apple() -+ if os.is("macosx") then -+ return true -+ else -+ return false -+ end -+ end -+ -+ - function findOpenCL_AMD() - local amdopenclpath = os.getenv("AMDAPPSDKROOT") - if (amdopenclpath) then -@@ -17,13 +26,33 @@ - end - - function findOpenCL_Intel() -- local intelopenclpath = os.getenv("INTELOCLSDKROOT") -- if (intelopenclpath) then -+ if os.is("Windows") then -+ local intelopenclpath = os.getenv("INTELOCLSDKROOT") -+ if (intelopenclpath) then - return true -+ end - end -+ if os.is("Linux") then -+ local intelsdk = io.open("/usr/include/CL/opencl.h","r") -+ if (intelsdk) then -+ return true; -+ end -+ end - return false - end -- -+ -+ function initOpenCL_Apple() -+ configuration{} -+ includedirs { -+ "/System/Library/Frameworks/OpenCL.framework" -+ } -+ libdirs "/System/Library/Frameworks/OpenCL.framework" -+ links -+ { -+ "OpenCL.framework" -+ } -+ end -+ - function initOpenCL_AMD() - configuration {} - local amdopenclpath = os.getenv("AMDAPPSDKROOT") -@@ -65,6 +94,7 @@ - - function initOpenCL_Intel() - configuration {} -+ if os.is("Windows") then - local intelopenclpath = os.getenv("INTELOCLSDKROOT") - if (intelopenclpath) then - defines { "ADL_ENABLE_CL" , "CL_PLATFORM_INTEL"} -@@ -79,6 +109,43 @@ - links {"OpenCL"} - return true - end -+ end -+ if os.is("Linux") then -+ defines { "ADL_ENABLE_CL" , "CL_PLATFORM_INTEL"} -+ configuration {} -+ links {"OpenCL"} -+ end - return false - end -- -\ No newline at end of file -+ -+ function findOpenCL (vendor ) -+ if vendor=="AMD" then -+ return findOpenCL_AMD() -+ end -+ if vendor=="NVIDIA" then -+ return findOpenCL_NVIDIA() -+ end -+ if vendor=="Intel" then -+ return findOpenCL_Intel() -+ end -+ if vendor=="Apple" then -+ return findOpenCL_Apple() -+ end -+ return false -+ end -+ -+ function initOpenCL ( vendor ) -+ if vendor=="AMD" then -+ initOpenCL_AMD() -+ end -+ if vendor=="NVIDIA" then -+ return initOpenCL_NVIDIA() -+ end -+ if vendor=="Intel" then -+ initOpenCL_Intel() -+ end -+ if vendor=="Apple" then -+ return initOpenCL_Apple() -+ end -+ end -+ -Index: build/premake4.lua -=================================================================== ---- build/premake4.lua (revision 2640) -+++ build/premake4.lua (working copy) -@@ -1,102 +1,47 @@ ----add the 0 so the solution comes first in the directory (when sorted on name) ----print "uncomment this hello premake4 world for debugging the script" - --solution "0BulletSolution" -+ solution "0MySolution" - -- newoption { -- trigger = "ios", -- description = "Enable iOS target (requires xcode4)" -- } -- -- newoption { -- trigger = "without-demos", -- description = "Disable demos and extras" -- } -+ -- Multithreaded compiling -+ if _ACTION == "vs2010" or _ACTION=="vs2008" then -+ buildoptions { "/MP" } -+ end -+ -+ act = "" -+ -+ if _ACTION then -+ act = _ACTION -+ end - -- newoption { -- trigger = "with-double-precision", -- description = "Enable double precision build" -- } - -- -- newoption { -- trigger = "with-nacl", -- description = "Enable Native Client build" -- } -+ newoption -+ { -+ trigger = "ios", -+ description = "Enable iOS target (requires xcode4)" -+ } -+ -+ newoption -+ { -+ trigger = "bullet2gpu", -+ description = "Enable Bullet 2.x GPU using b3GpuDynamicsWorld bridge to Bullet 3.x" -+ } - -- newoption { -- trigger = "with-dx11", -- description = "Enable DirectX11 build" -- } -- -- newoption { -- trigger = "with-opencl", -- description = "Enable OpenCL builds (various SDKs)" -- } -- -- newoption { -- trigger = "with-opencl-amd", -- description = "Enable OpenCL builds (AMD SDK)" -- } -- -- newoption { -- trigger = "with-opencl-intel", -- description = "Enable OpenCL builds (Intel SDK)" -- } -- newoption { -- trigger = "with-opencl-nvidia", -- description = "Enable OpenCL builds (NVIDIA SDK)" -- } -- -- - configurations {"Release", "Debug"} - configuration "Release" -- flags { "Optimize", "EnableSSE", "StaticRuntime", "NoMinimalRebuild", "FloatFast"} -+ flags { "Optimize", "EnableSSE","StaticRuntime", "NoMinimalRebuild", "FloatFast"} - configuration "Debug" -+ defines {"_DEBUG=1"} - flags { "Symbols", "StaticRuntime" , "NoMinimalRebuild", "NoEditAndContinue" ,"FloatFast"} - -- platforms {"x32", "x64"} -- --platforms {"x32"} -- -- configuration {"Windows"} -- defines { "_CRT_SECURE_NO_WARNINGS","_CRT_SECURE_NO_DEPRECATE"} -- -- configuration{} -- -- postfix=""; -- -- if _OPTIONS["with-double-precision"] then -- defines {"BT_USE_DOUBLE_PRECISION"} -- end -- -- if _ACTION == "xcode4" then -- if _OPTIONS["ios"] then -- postfix = "ios"; -- xcodebuildsettings -- { -- 'INFOPLIST_FILE = "../../Test/Info.plist"', -- 'CODE_SIGN_IDENTITY = "iPhone Developer"', -- "SDKROOT = iphoneos", -- 'ARCHS = "armv7"', -- 'TARGETED_DEVICE_FAMILY = "1,2"', -- 'VALID_ARCHS = "armv7"', -- } -- else -- xcodebuildsettings -- { -- 'ARCHS = "$(ARCHS_STANDARD_32_BIT) $(ARCHS_STANDARD_64_BIT)"', -- 'VALID_ARCHS = "x86_64 i386"', -- } -+ if os.is("Linux") then -+ if os.is64bit() then -+ platforms {"x64"} -+ else -+ platforms {"x32"} - end - else -- -+ platforms {"x32", "x64"} - end - -- act = "" -- -- if _ACTION then -- act = _ACTION -- end - configuration {"x32"} - targetsuffix ("_" .. act) - configuration "x64" -@@ -110,89 +55,91 @@ - - configuration{} - -+ postfix="" - -+ if _ACTION == "xcode4" then -+ if _OPTIONS["ios"] then -+ postfix = "ios"; -+ xcodebuildsettings -+ { -+ 'CODE_SIGN_IDENTITY = "iPhone Developer"', -+ "SDKROOT = iphoneos", -+ 'ARCHS = "armv7"', -+ 'TARGETED_DEVICE_FAMILY = "1,2"', -+ 'VALID_ARCHS = "armv7"', -+ } -+ else -+ xcodebuildsettings -+ { -+ 'ARCHS = "$(ARCHS_STANDARD_32_BIT) $(ARCHS_STANDARD_64_BIT)"', -+ 'VALID_ARCHS = "x86_64 i386"', -+ } -+ end -+ end - --if not _OPTIONS["with-nacl"] then -- -- flags { "NoRTTI"} -+ -+ flags { "NoRTTI", "NoExceptions"} -+ defines { "_HAS_EXCEPTIONS=0" } - targetdir "../bin" -+ location("./" .. act .. postfix) - -- -- Disable exception handling on MSVC 2008 and higher. MSVC 2005 without service pack has some linker issue (ConvexDecompositionDemo uses STL through HACD library) -- if _ACTION == "vs2010" or _ACTION=="vs2008" then -- flags { "NoExceptions"} -- defines { "_HAS_EXCEPTIONS=0" } -- end -+ -+ projectRootDir = os.getcwd() .. "/../" -+ print("Project root directroy: " .. projectRootDir); - -- -- Multithreaded compiling -- if _ACTION == "vs2010" then -- buildoptions { "/MP" } -- end -- -- --else -- targetdir "../bin_html" --end -- -- - dofile ("findOpenCL.lua") - dofile ("findDirectX11.lua") -+ dofile ("findOpenGLGlewGlut.lua") - - language "C++" - -- location("./" .. act .. postfix) - -+ -+ if not _OPTIONS["ios"] then -+-- include "../demo/gpudemo" -+-- include "../btgui/MidiTest" -+-- include "../opencl/vector_add_simplified" -+-- include "../opencl/vector_add" -+ include "../btgui/Gwen" -+ include "../btgui/GwenOpenGLTest" -+ -+ include "../test/OpenCL/BasicInitialize" -+-- include "../test/OpenCL/BroadphaseCollision" -+-- include "../test/OpenCL/NarrowphaseCollision" -+ include "../test/OpenCL/ParallelPrimitives" -+ include "../test/OpenCL/RadixSortBenchmark" -+ include "../test/OpenCL/BitonicSort" -+ -+ include "../src/Bullet3Dynamics" -+ include "../src/Bullet3Common" -+ include "../src/Bullet3Geometry" -+ include "../src/Bullet3Collision" -+ include "../src/Bullet3Serialize/Bullet2FileLoader" - -- if _OPTIONS["with-dx11"] then -- include "../Demos/DX11ClothDemo" -- include "../src/BulletMultiThreaded/GpuSoftBodySolvers/DX11" -- end -+ include "../src/Bullet3OpenCL" -+ include "../Demos3/GpuDemos" -+ -+-- include "../demo/gpu_initialize" -+-- include "../opencl/lds_bank_conflict" -+-- include "../opencl/reduce" -+-- include "../btgui/OpenGLTrueTypeFont" -+-- include "../btgui/OpenGLWindow" -+-- include "../demo/ObjLoader" - ----choose any OpenCL sdk that is installed on the system -- if _OPTIONS["with-opencl"] then -- include "../Demos/OpenCLClothDemo/AMD" -- include "../src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD" -- include "../Demos/OpenCLClothDemo/NVidia" -- include "../src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia" -- include "../Demos/OpenCLClothDemo/Intel" -- include "../src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel" -- end -+ -+-- include "../test/b3DynamicBvhBroadphase" -+ - ----choose a particular OpenCL sdk, this is useful for distributing project files that just work for one OpenCL SDK -- if _OPTIONS["with-opencl-amd"] then -- include "../Demos/OpenCLClothDemo/AMD" -- include "../Demos/OpenGL" -- include "../Demos/SoftDemo/AMD" -- include "../src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD" -- end -- -- if _OPTIONS["with-opencl-intel"] then -- include "../Demos/OpenCLClothDemo/Intel" -- include "../src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel" -- end -+ - -- if _OPTIONS["with-opencl-nvidia"] then -- include "../Demos/OpenCLClothDemo/NVidia" -- include "../src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia" -+ if _OPTIONS["bullet2gpu"] then -+ include "../src/LinearMath" -+ include "../src/BulletCollision" -+ include "../src/BulletDynamics" -+ include "../src/BulletSoftBody" -+ include "../Demos/HelloWorld" -+ -+ include "../Demos3" - end - -- if not _OPTIONS["without-demos"] then -- if not _OPTIONS["ios"] then -- include "../Demos" - end -- include "../Extras" -- end -- -- -- if _OPTIONS["with-nacl"] then -- include "../Demos/NativeClient" -- else -- include "../src/LinearMath" -- include "../src/BulletCollision" -- include "../src/BulletDynamics" -- include "../src/BulletSoftBody" -- end -- -- include "../Test" -- include "../Demos/HelloWorld" -- include "../Demos/Benchmarks" -- -Index: build/premake4_linux -=================================================================== -Cannot display: file marked as a binary type. -svn:mime-type = application/octet-stream -Index: build/premake4_osx -=================================================================== -Cannot display: file marked as a binary type. -svn:mime-type = application/octet-stream -Index: build/vs2010.bat -=================================================================== ---- build/vs2010.bat (revision 2640) -+++ build/vs2010.bat (working copy) -@@ -1,4 +1,6 @@ - -+rem premake4 --with-pe vs2010 - premake4 vs2010 - -+mkdir vs2010\cache - pause -\ No newline at end of file -Index: Demos/OpenGL/DemoApplication.cpp -=================================================================== ---- Demos/OpenGL/DemoApplication.cpp (revision 2640) -+++ Demos/OpenGL/DemoApplication.cpp (working copy) -@@ -1182,8 +1182,8 @@ - } - } - -- btVector3 aabbMin,aabbMax; -- m_dynamicsWorld->getBroadphase()->getBroadphaseAabb(aabbMin,aabbMax); -+ btVector3 aabbMin(0,0,0),aabbMax(0,0,0); -+ //m_dynamicsWorld->getBroadphase()->getBroadphaseAabb(aabbMin,aabbMax); - - aabbMin-=btVector3(BT_LARGE_FLOAT,BT_LARGE_FLOAT,BT_LARGE_FLOAT); - aabbMax+=btVector3(BT_LARGE_FLOAT,BT_LARGE_FLOAT,BT_LARGE_FLOAT); -Index: Demos/OpenGL/DemoApplication.h -=================================================================== ---- Demos/OpenGL/DemoApplication.h (revision 2640) -+++ Demos/OpenGL/DemoApplication.h (working copy) -@@ -149,6 +149,11 @@ - { - m_azi = azi; - } -+ -+ void setEle(float ele) -+ { -+ m_ele = ele; -+ } - - void setCameraUp(const btVector3& camUp) - { -Index: src/BulletCollision/CollisionDispatch/btCollisionWorld.cpp -=================================================================== ---- src/BulletCollision/CollisionDispatch/btCollisionWorld.cpp (revision 2640) -+++ src/BulletCollision/CollisionDispatch/btCollisionWorld.cpp (working copy) -@@ -73,7 +73,7 @@ - m_debugDrawer(0), - m_forceUpdateAllAabbs(true) - { -- m_stackAlloc = collisionConfiguration->getStackAllocator(); -+ m_stackAlloc = 0;//collisionConfiguration->getStackAllocator(); - m_dispatchInfo.m_stackAllocator = m_stackAlloc; - } - -Index: src/BulletDynamics/Dynamics/btDynamicsWorld.h -=================================================================== ---- src/BulletDynamics/Dynamics/btDynamicsWorld.h (revision 2640) -+++ src/BulletDynamics/Dynamics/btDynamicsWorld.h (working copy) -@@ -33,7 +33,8 @@ - BT_SIMPLE_DYNAMICS_WORLD=1, - BT_DISCRETE_DYNAMICS_WORLD=2, - BT_CONTINUOUS_DYNAMICS_WORLD=3, -- BT_SOFT_RIGID_DYNAMICS_WORLD=4 -+ BT_SOFT_RIGID_DYNAMICS_WORLD=4, -+ BT_GPU_DYNAMICS_WORLD=5 - }; - - ///The btDynamicsWorld is the interface class for several dynamics implementation, basic, discrete, parallel, and continuous etc. From 7d79555f504159f2bfd5302d2b25b0a88ee4abaa Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Wed, 19 Jun 2013 23:18:17 -0700 Subject: [PATCH 05/11] only disable fastscan in radix sort for __APPLE__, this should improve Linux performance (previously it went through the slower path under Linux) --- .../ParallelPrimitives/b3RadixSort32CL.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp index a72e7a183..4f722edc0 100644 --- a/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp @@ -319,10 +319,10 @@ void b3RadixSort32CL::execute(b3OpenCLArray& keyValuesInOut, int sor //fast prefix scan is not working properly on Mac OSX yet -#ifdef _WIN32 - bool fastScan=!m_deviceCPU;//only use fast scan on GPU -#else +#ifdef __APPLE__ bool fastScan=false; +#else + bool fastScan=!m_deviceCPU;//only use fast scan on GPU #endif if (fastScan) @@ -653,11 +653,10 @@ void b3RadixSort32CL::execute(b3OpenCLArray& keysInOut, int sortBi //fast prefix scan is not working properly on Mac OSX yet -#ifdef _WIN32 - bool fastScan=!m_deviceCPU; - +#ifdef __APPLE__ + bool fastScan=false; #else - bool fastScan=false; + bool fastScan=!m_deviceCPU; #endif if (fastScan) From 6acf4d03a780937ae91ee38bddcc82efe311fa22 Mon Sep 17 00:00:00 2001 From: erwincoumans Date: Wed, 19 Jun 2013 17:29:31 -0700 Subject: [PATCH 06/11] add Raycast kernel to stringify_linux.sh --- build3/stringify_linux.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build3/stringify_linux.sh b/build3/stringify_linux.sh index e4ff43bd6..95f56424e 100755 --- a/build3/stringify_linux.sh +++ b/build3/stringify_linux.sh @@ -24,3 +24,5 @@ ./premake4_linux --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody//kernels/solveContact.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solveContact.h" --stringname="solveContactCL" stringify ./premake4_linux --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/RigidBody//kernels/solveFriction.cl" --headerfile="../src/Bullet3OpenCL/RigidBody//kernels/solveFriction.h" --stringname="solveFrictionCL" stringify +./premake4_linux --file=stringifyKernel.lua --kernelfile="../src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl" --headerfile="../src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h" --stringname="rayCastKernelCL" stringify + From a69ba48de47892b6aab3e072804f5b9519bad352 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Wed, 19 Jun 2013 22:08:03 -0700 Subject: [PATCH 07/11] Move b3Quickprof.* from Bullet 3.x src folder to btgui/Timing The Bullet 3.x B3_PROFILE can be customized using b3SetCustomEnterProfileZoneFunc/b3SetCustomLeaveProfileZoneFunc defined in Bullet3Common/b3Logging, so you can hook Bullet 3.x up to your profiler of choice. The Demos3/BasicGpuDemo will show the Bullet 3.x timings inside the Bullet 2.x btQuickprof profiler. --- Demos3/BasicGpuDemo/BasicGpuDemo.cpp | 4 + Demos3/GpuDemos/ParticleDemo.cpp | 1 - Demos3/GpuDemos/broadphase/PairBench.cpp | 5 +- Demos3/GpuDemos/main_opengl3core.cpp | 10 +- Demos3/GpuDemos/premake4.lua | 4 + .../rigidbody/BulletDataExtractor.cpp | 1 - Demos3/GpuDemos/rigidbody/ConcaveScene.cpp | 1 - .../GpuDemos/rigidbody/GpuCompoundScene.cpp | 1 - Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp | 1 - .../GpuDemos/rigidbody/GpuRigidBodyDemo.cpp | 1 - Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp | 1 - Demos3/GpuDemos/softbody/GpuSoftBodyDemo.cpp | 1 - Demos3/GpuGuiInitialize/main.cpp | 3 +- Demos3/GpuGuiInitialize/premake4.lua | 10 +- btgui/GwenOpenGLTest/premake4.lua | 6 +- btgui/OpenGLTrueTypeFont/premake4.lua | 6 +- btgui/OpenGLWindow/GLInstancingRenderer.cpp | 1 - btgui/OpenGLWindow/premake4.lua | 6 +- .../BroadPhaseCollision/b3DynamicBvh.cpp | 2 +- .../b3DynamicBvhBroadphase.h | 2 +- src/Bullet3Common/b3Logging.cpp | 31 + src/Bullet3Common/b3Logging.h | 68 +- src/Bullet3Common/b3Quickprof.cpp | 643 ------------------ src/Bullet3Common/b3Quickprof.h | 218 ------ .../ConstraintSolver/b3PgsJacobiSolver.cpp | 2 +- .../b3GpuSapBroadphase.cpp | 2 +- .../b3ConvexHullContact.cpp | 2 +- src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp | 2 +- .../RigidBody/b3GpuBatchingPgsSolver.cpp | 2 +- .../RigidBody/b3GpuRigidBodyPipeline.cpp | 1 - src/Bullet3OpenCL/RigidBody/b3Solver.cpp | 1 - test/OpenCL/BitonicSort/main.cpp | 2 +- test/OpenCL/BitonicSort/premake4.lua | 4 +- test/OpenCL/RadixSortBenchmark/main.cpp | 2 +- test/OpenCL/RadixSortBenchmark/premake4.lua | 6 +- 35 files changed, 138 insertions(+), 915 deletions(-) delete mode 100644 src/Bullet3Common/b3Quickprof.cpp delete mode 100644 src/Bullet3Common/b3Quickprof.h diff --git a/Demos3/BasicGpuDemo/BasicGpuDemo.cpp b/Demos3/BasicGpuDemo/BasicGpuDemo.cpp index 1022513e1..080755df4 100644 --- a/Demos3/BasicGpuDemo/BasicGpuDemo.cpp +++ b/Demos3/BasicGpuDemo/BasicGpuDemo.cpp @@ -182,6 +182,10 @@ BasicGpuDemo::~BasicGpuDemo() void BasicGpuDemo::initPhysics() { + //use the Bullet 2.x btQuickprof for profiling of Bullet 3.x + b3SetCustomEnterProfileZoneFunc(CProfileManager::Start_Profile); + b3SetCustomLeaveProfileZoneFunc(CProfileManager::Stop_Profile); + setTexturing(true); setShadows(false);//too slow with many objects diff --git a/Demos3/GpuDemos/ParticleDemo.cpp b/Demos3/GpuDemos/ParticleDemo.cpp index cf8d95908..bbaaae7c7 100644 --- a/Demos3/GpuDemos/ParticleDemo.cpp +++ b/Demos3/GpuDemos/ParticleDemo.cpp @@ -19,7 +19,6 @@ static char* particleKernelsString = #include "GpuDemoInternalData.h" -#include "Bullet3Common/b3Quickprof.h" //1000000 particles //#define NUM_PARTICLES_X 100 diff --git a/Demos3/GpuDemos/broadphase/PairBench.cpp b/Demos3/GpuDemos/broadphase/PairBench.cpp index f3d2077e6..2cae268e9 100644 --- a/Demos3/GpuDemos/broadphase/PairBench.cpp +++ b/Demos3/GpuDemos/broadphase/PairBench.cpp @@ -1,5 +1,4 @@ #include "PairBench.h" -#include "Bullet3Common/b3Quickprof.h" #include "OpenGLWindow/ShapeData.h" #include "OpenGLWindow/GLInstancingRenderer.h" #include "Bullet3Common/b3Quaternion.h" @@ -10,6 +9,7 @@ #include "OpenGLWindow/OpenGLInclude.h" #include "OpenGLWindow/GLInstanceRendererInternalData.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "../../../btgui/Timing/b3Quickprof.h" static b3KeyboardCallback oldCallback = 0; extern bool gReset; @@ -172,7 +172,10 @@ void PairBench::initPhysics(const ConstructionInfo& ci) m_instancingRenderer = ci.m_instancingRenderer; +#ifndef B3_NO_PROFILE b3ProfileManager::CleanupMemory(); +#endif //B3_NO_PROFILE + int strideInBytes = 9*sizeof(float); int numVertices = sizeof(cube_vertices)/strideInBytes; int numIndices = sizeof(cube_vertices)/sizeof(int); diff --git a/Demos3/GpuDemos/main_opengl3core.cpp b/Demos3/GpuDemos/main_opengl3core.cpp index c6745cbe4..336fb8611 100644 --- a/Demos3/GpuDemos/main_opengl3core.cpp +++ b/Demos3/GpuDemos/main_opengl3core.cpp @@ -21,7 +21,6 @@ #include "OpenGLWindow/GLPrimitiveRenderer.h" #include "OpenGLWindow/GLInstancingRenderer.h" //#include "OpenGL3CoreRenderer.h" -#include "Bullet3Common/b3Quickprof.h" //#include "b3GpuDynamicsWorld.h" #include #include @@ -37,6 +36,9 @@ #include "rigidbody/GpuSphereScene.h" #include "rigidbody/Bullet2FileDemo.h" #include "softbody/GpuSoftBodyDemo.h" +#include "../btgui/Timing/b3Quickprof.h" + + //#include "BroadphaseBenchmark.h" @@ -412,6 +414,10 @@ int main(int argc, char* argv[]) { //b3OpenCLUtils::setCachePath("/Users/erwincoumans/develop/mycache"); + b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile); + b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile); + + b3SetCustomPrintfFunc(myprintf); b3Vector3 test(1,2,3); test.x = 1; @@ -456,7 +462,7 @@ int main(int argc, char* argv[]) - #ifndef B3_NO_PROFILE +#ifndef B3_NO_PROFILE b3ProfileManager::Reset(); #endif //B3_NO_PROFILE diff --git a/Demos3/GpuDemos/premake4.lua b/Demos3/GpuDemos/premake4.lua index 75e5f26c6..5d3076077 100644 --- a/Demos3/GpuDemos/premake4.lua +++ b/Demos3/GpuDemos/premake4.lua @@ -63,6 +63,10 @@ function createProject(vendor) "../../btgui/FontFiles/OpenSans.cpp", "../../btgui/stb_image/stb_image.cpp", "../../btgui/stb_image/stb_image.h", + "../../btgui/Timing/b3Quickprof.cpp", + "../../btgui/Timing/b3Quickprof.h", + "../../btgui/Timing/b3Clock.cpp", + "../../btgui/Timing/b3Clock.h", } if os.is("Windows") then diff --git a/Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp b/Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp index 4dfbd362d..b187cf20d 100644 --- a/Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp +++ b/Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp @@ -19,7 +19,6 @@ extern bool enableExperimentalCpuConcaveCollision; #include "OpenGLWindow/GLInstancingRenderer.h" -//#include "LinearMath/b3Quickprof.h" #include "Bullet3Common/b3Quaternion.h" #include "Bullet3Common/b3Matrix3x3.h" #include "Bullet3OpenCL/NarrowphaseCollision/b3ConvexUtility.h" diff --git a/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp b/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp index 092e6f260..abe0d8654 100644 --- a/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp +++ b/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp @@ -1,6 +1,5 @@ #include "ConcaveScene.h" #include "GpuRigidBodyDemo.h" -#include "Bullet3Common/b3Quickprof.h" #include "OpenGLWindow/ShapeData.h" #include "OpenGLWindow/GLInstancingRenderer.h" diff --git a/Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp b/Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp index 0a2fa4543..f9977811d 100644 --- a/Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp @@ -1,6 +1,5 @@ #include "GpuCompoundScene.h" #include "GpuRigidBodyDemo.h" -#include "Bullet3Common/b3Quickprof.h" #include "OpenGLWindow/ShapeData.h" #include "OpenGLWindow/GLInstancingRenderer.h" diff --git a/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp b/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp index 7e08e1c99..d531d4f92 100644 --- a/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp @@ -1,6 +1,5 @@ #include "GpuConvexScene.h" #include "GpuRigidBodyDemo.h" -#include "Bullet3Common/b3Quickprof.h" #include "OpenGLWindow/ShapeData.h" #include "OpenGLWindow/GLInstancingRenderer.h" diff --git a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp index 3f8cc9a63..a46a01648 100644 --- a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp @@ -1,5 +1,4 @@ #include "GpuRigidBodyDemo.h" -#include "Bullet3Common/b3Quickprof.h" #include "OpenGLWindow/ShapeData.h" #include "OpenGLWindow/GLInstancingRenderer.h" #include "Bullet3Common/b3Quaternion.h" diff --git a/Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp b/Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp index e6d9cf616..d4daa77ce 100644 --- a/Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp @@ -1,6 +1,5 @@ #include "GpuSphereScene.h" #include "GpuRigidBodyDemo.h" -#include "Bullet3Common/b3Quickprof.h" #include "OpenGLWindow/ShapeData.h" #include "OpenGLWindow/GLInstancingRenderer.h" #include "Bullet3Common/b3Quaternion.h" diff --git a/Demos3/GpuDemos/softbody/GpuSoftBodyDemo.cpp b/Demos3/GpuDemos/softbody/GpuSoftBodyDemo.cpp index 3c566a50e..67304eaee 100644 --- a/Demos3/GpuDemos/softbody/GpuSoftBodyDemo.cpp +++ b/Demos3/GpuDemos/softbody/GpuSoftBodyDemo.cpp @@ -1,5 +1,4 @@ #include "GpuSoftBodyDemo.h" -#include "Bullet3Common/b3Quickprof.h" #include "OpenGLWindow/ShapeData.h" #include "OpenGLWindow/GLInstancingRenderer.h" #include "Bullet3Common/b3Quaternion.h" diff --git a/Demos3/GpuGuiInitialize/main.cpp b/Demos3/GpuGuiInitialize/main.cpp index 6692c93e1..0aac120fa 100644 --- a/Demos3/GpuGuiInitialize/main.cpp +++ b/Demos3/GpuGuiInitialize/main.cpp @@ -11,7 +11,6 @@ #include "OpenGLWindow/GLPrimitiveRenderer.h" #include "OpenGLWindow/GLInstancingRenderer.h" //#include "OpenGL3CoreRenderer.h" -#include "Bullet3Common/b3Quickprof.h" //#include "b3GpuDynamicsWorld.h" #include #include @@ -19,7 +18,7 @@ #include "OpenGLTrueTypeFont/opengl_fontstashcallbacks.h" #include "OpenGLWindow/GwenOpenGL3CoreRenderer.h" - +#include "../btgui/Timing/b3Quickprof.h" #include "Gwen/Gwen.h" #include "Gwen/Controls/Button.h" diff --git a/Demos3/GpuGuiInitialize/premake4.lua b/Demos3/GpuGuiInitialize/premake4.lua index 47915e1e7..e20ea04d7 100644 --- a/Demos3/GpuGuiInitialize/premake4.lua +++ b/Demos3/GpuGuiInitialize/premake4.lua @@ -49,10 +49,12 @@ function createProject(vendor) "../../src/Bullet3Geometry/b3ConvexHullComputer.cpp", "../../src/Bullet3Geometry/b3ConvexHullComputer.h", "../../src/Bullet3Common/b3AlignedAllocator.cpp", - "../../src/Bullet3Common/b3Quickprof.cpp", - "../../src/Bullet3Common/b3Quickprof.h", - "../../src/Bullet3Common/b3Logging.cpp", - "../../src/Bullet3Common/b3Logging.h", + "../../src/Bullet3Common/b3logging.cpp", + "../../src/Bullet3Common/b3logging.h", + "../../btgui/Timing/b3Quickprof.cpp", + "../../btgui/Timing/b3Quickprof.h", + "../../btgui/Timing/b3Clock.cpp", + "../../btgui/Timing/b3Clock.h", } diff --git a/btgui/GwenOpenGLTest/premake4.lua b/btgui/GwenOpenGLTest/premake4.lua index 484be8ba9..8146c80b7 100644 --- a/btgui/GwenOpenGLTest/premake4.lua +++ b/btgui/GwenOpenGLTest/premake4.lua @@ -42,8 +42,10 @@ "../../src/Bullet3Common/b3Logging.h", "../../src/Bullet3Common/b3Logging.cpp", "../../src/Bullet3Common/b3AlignedAllocator.cpp", - "../../src/Bullet3Common/b3Quickprof.cpp", - "../../src/Bullet3Common/b3Quickprof.h", + "../../btgui/Timing/b3Quickprof.cpp", + "../../btgui/Timing/b3Quickprof.h", + "../../btgui/Timing/b3Clock.cpp", + "../../btgui/Timing/b3Clock.h", "**.cpp", "**.h", } diff --git a/btgui/OpenGLTrueTypeFont/premake4.lua b/btgui/OpenGLTrueTypeFont/premake4.lua index 024def253..c941569c4 100644 --- a/btgui/OpenGLTrueTypeFont/premake4.lua +++ b/btgui/OpenGLTrueTypeFont/premake4.lua @@ -24,8 +24,10 @@ "../OpenGLWindow/LoadShader.cpp", "../OpenGLWindow/LoadShader.h", "../../src/Bullet3Common/b3AlignedAllocator.cpp", - "../../src/Bullet3Common/b3Quickprof.cpp", - "../../src/Bullet3Common/b3Quickprof.h" , + "../Timing/b3Quickprof.cpp", + "../Timing/b3Quickprof.h" , + "../Timing/b3Clock.cpp", + "../Timing/b3Clock.h" , "fontstash.cpp", "fontstash.h", "opengl_fontstashcallbacks.cpp", diff --git a/btgui/OpenGLWindow/GLInstancingRenderer.cpp b/btgui/OpenGLWindow/GLInstancingRenderer.cpp index 597d1ee0a..c69ef242e 100644 --- a/btgui/OpenGLWindow/GLInstancingRenderer.cpp +++ b/btgui/OpenGLWindow/GLInstancingRenderer.cpp @@ -29,7 +29,6 @@ subject to the following restrictions: #include #include "Bullet3Common/b3Vector3.h" #include "Bullet3Common/b3Quaternion.h" -#include "Bullet3Common/b3Quickprof.h" #include "Bullet3Common/b3Matrix3x3.h" #include "LoadShader.h" diff --git a/btgui/OpenGLWindow/premake4.lua b/btgui/OpenGLWindow/premake4.lua index de648d5d6..b50702eda 100644 --- a/btgui/OpenGLWindow/premake4.lua +++ b/btgui/OpenGLWindow/premake4.lua @@ -44,8 +44,10 @@ "../../src/Bullet3Geometry/b3ConvexHullComputer.cpp", "../../src/Bullet3Geometry/b3ConvexHullComputer.h", "../../src/Bullet3Common/b3AlignedAllocator.cpp", - "../../src/Bullet3Common/b3Quickprof.cpp", - "../../src/Bullet3Common/b3Quickprof.h" + "../Timing/b3Quickprof.cpp", + "../Timing/b3Quickprof.h", + "../Timing/b3Clock.cpp", + "../Timing/b3Clock.h", } if os.is("Windows") then diff --git a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp index 2f755abba..1b13be75e 100644 --- a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp +++ b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvh.cpp @@ -651,7 +651,7 @@ void b3DynamicBvh::extractLeaves(const b3DbvtNode* node,b3AlignedObjectArray #include -#include "LinearMath/b3QuickProf.h" + /* q6600,2.4ghz diff --git a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h index 7685290ed..c06612bf6 100644 --- a/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h +++ b/src/Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h @@ -36,7 +36,7 @@ subject to the following restrictions: #if B3_DBVT_BP_PROFILE #define B3_DBVT_BP_PROFILING_RATE 256 -#include "LinearMath/b3Quickprof.h" + #endif diff --git a/src/Bullet3Common/b3Logging.cpp b/src/Bullet3Common/b3Logging.cpp index 01965a0a2..5c647d313 100644 --- a/src/Bullet3Common/b3Logging.cpp +++ b/src/Bullet3Common/b3Logging.cpp @@ -82,6 +82,37 @@ void b3OutputErrorMessageVarArgsInternal(const char *str, ...) va_end(argList); } + + +void b3EnterProfileZoneDefault(const char* name) +{ +} +void b3LeaveProfileZoneDefault() +{ +} +static b3EnterProfileZoneFunc* b3s_enterFunc = b3EnterProfileZoneDefault; +static b3LeaveProfileZoneFunc* b3s_leaveFunc = b3LeaveProfileZoneDefault; +void b3EnterProfileZone(const char* name) +{ + (b3s_enterFunc)(name); +} +void b3LeaveProfileZone() +{ + (b3s_leaveFunc)(); +} + +void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc) +{ + b3s_enterFunc = enterFunc; +} +void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc) +{ + b3s_leaveFunc = leaveFunc; +} + + + + #ifndef _WIN32 #undef vsprintf_s #endif diff --git a/src/Bullet3Common/b3Logging.h b/src/Bullet3Common/b3Logging.h index a366c58c6..8d8ad4dcb 100644 --- a/src/Bullet3Common/b3Logging.h +++ b/src/Bullet3Common/b3Logging.h @@ -2,21 +2,6 @@ #ifndef B3_LOGGING_H #define B3_LOGGING_H - -typedef void (b3PrintfFunc)(const char* msg); -typedef void (b3WarningMessageFunc)(const char* msg); -typedef void (b3ErrorMessageFunc)(const char* msg); - -///The developer can route b3Printf output using their own implementation -void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc); -void b3SetCustomWarningMessageFunc(b3WarningMessageFunc* warningMsgFunc); -void b3SetCustomErrorMessageFunc(b3ErrorMessageFunc* errorMsgFunc); - -///Don't use those internal functions directly, use the b3Printf or b3SetCustomPrintfFunc instead (or warning/error version) -void b3OutputPrintfVarArgsInternal(const char *str, ...); -void b3OutputWarningMessageVarArgsInternal(const char *str, ...); -void b3OutputErrorMessageVarArgsInternal(const char *str, ...); - ///We add the do/while so that the statement "if (condition) b3Printf("test"); else {...}" would fail ///You can also customize the message by uncommenting out a different line below #define b3Printf(...) b3OutputPrintfVarArgsInternal(__VA_ARGS__) @@ -28,4 +13,57 @@ void b3OutputErrorMessageVarArgsInternal(const char *str, ...); #define b3Warning(...) do {b3OutputWarningMessageVarArgsInternal("b3Warning[%s,%d]:\n",__FILE__,__LINE__);b3OutputWarningMessageVarArgsInternal(__VA_ARGS__); }while(0) #define b3Error(...) do {b3OutputErrorMessageVarArgsInternal("b3Error[%s,%d]:\n",__FILE__,__LINE__);b3OutputErrorMessageVarArgsInternal(__VA_ARGS__); } while(0) + +#ifndef B3_NO_PROFILE + +void b3EnterProfileZone(const char* name); +void b3LeaveProfileZone(); + +class b3ProfileZone +{ +public: + b3ProfileZone(const char* name) + { + b3EnterProfileZone( name ); + } + + ~b3ProfileZone() + { + b3LeaveProfileZone(); + } +}; + +#define B3_PROFILE( name ) b3ProfileZone __profile( name ) + +#else //B3_NO_PROFILE + +#define B3_PROFILE( name ) +#define b3StartProfile(a) +#define b3StopProfile + +#endif //#ifndef B3_NO_PROFILE + + +typedef void (b3PrintfFunc)(const char* msg); +typedef void (b3WarningMessageFunc)(const char* msg); +typedef void (b3ErrorMessageFunc)(const char* msg); +typedef void (b3EnterProfileZoneFunc)(const char* msg); +typedef void (b3LeaveProfileZoneFunc)(); + +///The developer can route b3Printf output using their own implementation +void b3SetCustomPrintfFunc(b3PrintfFunc* printfFunc); +void b3SetCustomWarningMessageFunc(b3WarningMessageFunc* warningMsgFunc); +void b3SetCustomErrorMessageFunc(b3ErrorMessageFunc* errorMsgFunc); + +///Set custom profile zone functions (zones can be nested) +void b3SetCustomEnterProfileZoneFunc(b3EnterProfileZoneFunc* enterFunc); +void b3SetCustomLeaveProfileZoneFunc(b3LeaveProfileZoneFunc* leaveFunc); + +///Don't use those internal functions directly, use the b3Printf or b3SetCustomPrintfFunc instead (or warning/error version) +void b3OutputPrintfVarArgsInternal(const char *str, ...); +void b3OutputWarningMessageVarArgsInternal(const char *str, ...); +void b3OutputErrorMessageVarArgsInternal(const char *str, ...); + + + #endif//B3_LOGGING_H \ No newline at end of file diff --git a/src/Bullet3Common/b3Quickprof.cpp b/src/Bullet3Common/b3Quickprof.cpp deleted file mode 100644 index 466e4536c..000000000 --- a/src/Bullet3Common/b3Quickprof.cpp +++ /dev/null @@ -1,643 +0,0 @@ -/* -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/* - -*************************************************************************************************** -** -** profile.cpp -** -** Real-Time Hierarchical Profiling for Game Programming Gems 3 -** -** by Greg Hjelstrom & Byon Garrabrant -** -***************************************************************************************************/ - -// Credits: The Clock class was inspired by the Timer classes in -// Ogre (www.ogre3d.org). - -#include "b3Quickprof.h" -#include "b3MinMax.h" - -#ifndef B3_NO_PROFILE - - -static b3Clock b3s_profileClock; - - -#ifdef __CELLOS_LV2__ -#include -#include -#include -#endif - -#if defined (SUNOS) || defined (__SUNOS__) -#include -#endif - -#if defined(WIN32) || defined(_WIN32) - -#define B3_USE_WINDOWS_TIMERS -#define WIN32_LEAN_AND_MEAN -#define NOWINRES -#define NOMCX -#define NOIME - -#ifdef _XBOX - #include -#else //_XBOX - #include -#endif //_XBOX - -#include - - -#else //_WIN32 -#include -#endif //_WIN32 - - - -struct b3ClockData -{ - -#ifdef B3_USE_WINDOWS_TIMERS - LARGE_INTEGER mClockFrequency; - DWORD mStartTick; - LONGLONG mPrevElapsedTime; - LARGE_INTEGER mStartTime; -#else -#ifdef __CELLOS_LV2__ - uint64_t mStartTime; -#else - struct timeval mStartTime; -#endif -#endif //__CELLOS_LV2__ - -}; - -///The b3Clock is a portable basic clock that measures accurate time in seconds, use for profiling. -b3Clock::b3Clock() -{ - m_data = new b3ClockData; -#ifdef B3_USE_WINDOWS_TIMERS - QueryPerformanceFrequency(&m_data->mClockFrequency); -#endif - reset(); -} - -b3Clock::~b3Clock() -{ - delete m_data; -} - -b3Clock::b3Clock(const b3Clock& other) -{ - m_data = new b3ClockData; - *m_data = *other.m_data; -} - -b3Clock& b3Clock::operator=(const b3Clock& other) -{ - *m_data = *other.m_data; - return *this; -} - - - /// Resets the initial reference time. -void b3Clock::reset() -{ -#ifdef B3_USE_WINDOWS_TIMERS - QueryPerformanceCounter(&m_data->mStartTime); - m_data->mStartTick = GetTickCount(); - m_data->mPrevElapsedTime = 0; -#else -#ifdef __CELLOS_LV2__ - - typedef uint64_t ClockSize; - ClockSize newTime; - //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); - SYS_TIMEBASE_GET( newTime ); - m_data->mStartTime = newTime; -#else - gettimeofday(&m_data->mStartTime, 0); -#endif -#endif -} - -/// Returns the time in ms since the last call to reset or since -/// the b3Clock was created. -unsigned long int b3Clock::getTimeMilliseconds() -{ -#ifdef B3_USE_WINDOWS_TIMERS - LARGE_INTEGER currentTime; - QueryPerformanceCounter(¤tTime); - LONGLONG elapsedTime = currentTime.QuadPart - - m_data->mStartTime.QuadPart; - // Compute the number of millisecond ticks elapsed. - unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / - m_data->mClockFrequency.QuadPart); - // Check for unexpected leaps in the Win32 performance counter. - // (This is caused by unexpected data across the PCI to ISA - // bridge, aka south bridge. See Microsoft KB274323.) - unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick; - signed long msecOff = (signed long)(msecTicks - elapsedTicks); - if (msecOff < -100 || msecOff > 100) - { - // Adjust the starting time forwards. - LONGLONG msecAdjustment = b3Min(msecOff * - m_data->mClockFrequency.QuadPart / 1000, elapsedTime - - m_data->mPrevElapsedTime); - m_data->mStartTime.QuadPart += msecAdjustment; - elapsedTime -= msecAdjustment; - - // Recompute the number of millisecond ticks elapsed. - msecTicks = (unsigned long)(1000 * elapsedTime / - m_data->mClockFrequency.QuadPart); - } - - // Store the current elapsed time for adjustments next time. - m_data->mPrevElapsedTime = elapsedTime; - - return msecTicks; -#else - -#ifdef __CELLOS_LV2__ - uint64_t freq=sys_time_get_timebase_frequency(); - double dFreq=((double) freq) / 1000.0; - typedef uint64_t ClockSize; - ClockSize newTime; - SYS_TIMEBASE_GET( newTime ); - //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); - - return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq); -#else - - struct timeval currentTime; - gettimeofday(¤tTime, 0); - return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 + - (currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000; -#endif //__CELLOS_LV2__ -#endif -} - - /// Returns the time in us since the last call to reset or since - /// the Clock was created. -unsigned long int b3Clock::getTimeMicroseconds() -{ -#ifdef B3_USE_WINDOWS_TIMERS - LARGE_INTEGER currentTime; - QueryPerformanceCounter(¤tTime); - LONGLONG elapsedTime = currentTime.QuadPart - - m_data->mStartTime.QuadPart; - - // Compute the number of millisecond ticks elapsed. - unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / - m_data->mClockFrequency.QuadPart); - - // Check for unexpected leaps in the Win32 performance counter. - // (This is caused by unexpected data across the PCI to ISA - // bridge, aka south bridge. See Microsoft KB274323.) - unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick; - signed long msecOff = (signed long)(msecTicks - elapsedTicks); - if (msecOff < -100 || msecOff > 100) - { - // Adjust the starting time forwards. - LONGLONG msecAdjustment = b3Min(msecOff * - m_data->mClockFrequency.QuadPart / 1000, elapsedTime - - m_data->mPrevElapsedTime); - m_data->mStartTime.QuadPart += msecAdjustment; - elapsedTime -= msecAdjustment; - } - - // Store the current elapsed time for adjustments next time. - m_data->mPrevElapsedTime = elapsedTime; - - // Convert to microseconds. - unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime / - m_data->mClockFrequency.QuadPart); - - return usecTicks; -#else - -#ifdef __CELLOS_LV2__ - uint64_t freq=sys_time_get_timebase_frequency(); - double dFreq=((double) freq)/ 1000000.0; - typedef uint64_t ClockSize; - ClockSize newTime; - //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); - SYS_TIMEBASE_GET( newTime ); - - return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq); -#else - - struct timeval currentTime; - gettimeofday(¤tTime, 0); - return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 + - (currentTime.tv_usec - m_data->mStartTime.tv_usec); -#endif//__CELLOS_LV2__ -#endif -} - - - - - -inline void b3Profile_Get_Ticks(unsigned long int * ticks) -{ - *ticks = b3s_profileClock.getTimeMicroseconds(); -} - -inline float b3Profile_Get_Tick_Rate(void) -{ -// return 1000000.f; - return 1000.f; - -} - - - -/*************************************************************************************************** -** -** b3ProfileNode -** -***************************************************************************************************/ - -/*********************************************************************************************** - * INPUT: * - * name - pointer to a static string which is the name of this profile node * - * parent - parent pointer * - * * - * WARNINGS: * - * The name is assumed to be a static pointer, only the pointer is stored and compared for * - * efficiency reasons. * - *=============================================================================================*/ -b3ProfileNode::b3ProfileNode( const char * name, b3ProfileNode * parent ) : - Name( name ), - TotalCalls( 0 ), - TotalTime( 0 ), - StartTime( 0 ), - RecursionCounter( 0 ), - Parent( parent ), - Child( NULL ), - Sibling( NULL ), - m_userPtr(0) -{ - Reset(); -} - - -void b3ProfileNode::CleanupMemory() -{ - delete ( Child); - Child = NULL; - delete ( Sibling); - Sibling = NULL; -} - -b3ProfileNode::~b3ProfileNode( void ) -{ - delete ( Child); - delete ( Sibling); -} - - -/*********************************************************************************************** - * INPUT: * - * name - static string pointer to the name of the node we are searching for * - * * - * WARNINGS: * - * All profile names are assumed to be static strings so this function uses pointer compares * - * to find the named node. * - *=============================================================================================*/ -b3ProfileNode * b3ProfileNode::Get_Sub_Node( const char * name ) -{ - // Try to find this sub node - b3ProfileNode * child = Child; - while ( child ) { - if ( child->Name == name ) { - return child; - } - child = child->Sibling; - } - - // We didn't find it, so add it - - b3ProfileNode * node = new b3ProfileNode( name, this ); - node->Sibling = Child; - Child = node; - return node; -} - - -void b3ProfileNode::Reset( void ) -{ - TotalCalls = 0; - TotalTime = 0.0f; - - - if ( Child ) { - Child->Reset(); - } - if ( Sibling ) { - Sibling->Reset(); - } -} - - -void b3ProfileNode::Call( void ) -{ - TotalCalls++; - if (RecursionCounter++ == 0) { - b3Profile_Get_Ticks(&StartTime); - } -} - - -bool b3ProfileNode::Return( void ) -{ - if ( --RecursionCounter == 0 && TotalCalls != 0 ) { - unsigned long int time; - b3Profile_Get_Ticks(&time); - time-=StartTime; - TotalTime += (float)time / b3Profile_Get_Tick_Rate(); - } - return ( RecursionCounter == 0 ); -} - - -/*************************************************************************************************** -** -** b3ProfileIterator -** -***************************************************************************************************/ -b3ProfileIterator::b3ProfileIterator( b3ProfileNode * start ) -{ - CurrentParent = start; - CurrentChild = CurrentParent->Get_Child(); -} - - -void b3ProfileIterator::First(void) -{ - CurrentChild = CurrentParent->Get_Child(); -} - - -void b3ProfileIterator::Next(void) -{ - CurrentChild = CurrentChild->Get_Sibling(); -} - - -bool b3ProfileIterator::Is_Done(void) -{ - return CurrentChild == NULL; -} - - -void b3ProfileIterator::Enter_Child( int index ) -{ - CurrentChild = CurrentParent->Get_Child(); - while ( (CurrentChild != NULL) && (index != 0) ) { - index--; - CurrentChild = CurrentChild->Get_Sibling(); - } - - if ( CurrentChild != NULL ) { - CurrentParent = CurrentChild; - CurrentChild = CurrentParent->Get_Child(); - } -} - - -void b3ProfileIterator::Enter_Parent( void ) -{ - if ( CurrentParent->Get_Parent() != NULL ) { - CurrentParent = CurrentParent->Get_Parent(); - } - CurrentChild = CurrentParent->Get_Child(); -} - - -/*************************************************************************************************** -** -** b3ProfileManager -** -***************************************************************************************************/ - -b3ProfileNode b3ProfileManager::Root( "Root", NULL ); -b3ProfileNode * b3ProfileManager::CurrentNode = &b3ProfileManager::Root; -int b3ProfileManager::FrameCounter = 0; -unsigned long int b3ProfileManager::ResetTime = 0; - - -/*********************************************************************************************** - * b3ProfileManager::Start_Profile -- Begin a named profile * - * * - * Steps one level deeper into the tree, if a child already exists with the specified name * - * then it accumulates the profiling; otherwise a new child node is added to the profile tree. * - * * - * INPUT: * - * name - name of this profiling record * - * * - * WARNINGS: * - * The string used is assumed to be a static string; pointer compares are used throughout * - * the profiling code for efficiency. * - *=============================================================================================*/ -void b3ProfileManager::Start_Profile( const char * name ) -{ - if (name != CurrentNode->Get_Name()) { - CurrentNode = CurrentNode->Get_Sub_Node( name ); - } - - CurrentNode->Call(); -} - - -/*********************************************************************************************** - * b3ProfileManager::Stop_Profile -- Stop timing and record the results. * - *=============================================================================================*/ -void b3ProfileManager::Stop_Profile( void ) -{ - // Return will indicate whether we should back up to our parent (we may - // be profiling a recursive function) - if (CurrentNode->Return()) { - CurrentNode = CurrentNode->Get_Parent(); - } -} - - -/*********************************************************************************************** - * b3ProfileManager::Reset -- Reset the contents of the profiling system * - * * - * This resets everything except for the tree structure. All of the timing data is reset. * - *=============================================================================================*/ -void b3ProfileManager::Reset( void ) -{ - b3s_profileClock.reset(); - Root.Reset(); - Root.Call(); - FrameCounter = 0; - b3Profile_Get_Ticks(&ResetTime); -} - - -/*********************************************************************************************** - * b3ProfileManager::Increment_Frame_Counter -- Increment the frame counter * - *=============================================================================================*/ -void b3ProfileManager::Increment_Frame_Counter( void ) -{ - FrameCounter++; -} - - -/*********************************************************************************************** - * b3ProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset * - *=============================================================================================*/ -float b3ProfileManager::Get_Time_Since_Reset( void ) -{ - unsigned long int time; - b3Profile_Get_Ticks(&time); - time -= ResetTime; - return (float)time / b3Profile_Get_Tick_Rate(); -} - -#include - -void b3ProfileManager::dumpRecursive(b3ProfileIterator* profileIterator, int spacing) -{ - profileIterator->First(); - if (profileIterator->Is_Done()) - return; - - float accumulated_time=0,parent_time = profileIterator->Is_Root() ? b3ProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time(); - int i; - int frames_since_reset = b3ProfileManager::Get_Frame_Count_Since_Reset(); - for (i=0;iGet_Current_Parent_Name(), parent_time ); - float totalTime = 0.f; - - - int numChildren = 0; - - for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next()) - { - numChildren++; - float current_total_time = profileIterator->Get_Current_Total_Time(); - accumulated_time += current_total_time; - float fraction = parent_time > B3_EPSILON ? (current_total_time / parent_time) * 100 : 0.f; - { - int i; for (i=0;iGet_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls()); - totalTime += current_total_time; - //recurse into children - } - - if (parent_time < accumulated_time) - { - b3Printf("what's wrong\n"); - } - for (i=0;i B3_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time); - - for (i=0;iEnter_Child(i); - dumpRecursive(profileIterator,spacing+3); - profileIterator->Enter_Parent(); - } -} - - - - -void b3ProfileManager::dumpAll() -{ - b3ProfileIterator* profileIterator = 0; - profileIterator = b3ProfileManager::Get_Iterator(); - - dumpRecursive(profileIterator,0); - - b3ProfileManager::Release_Iterator(profileIterator); -} - - -void b3ProfileManager::dumpRecursive(FILE* f, b3ProfileIterator* profileIterator, int spacing) -{ - profileIterator->First(); - if (profileIterator->Is_Done()) - return; - - float accumulated_time=0,parent_time = profileIterator->Is_Root() ? b3ProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time(); - int i; - int frames_since_reset = b3ProfileManager::Get_Frame_Count_Since_Reset(); - for (i=0;iGet_Current_Parent_Name(), parent_time ); - float totalTime = 0.f; - - - int numChildren = 0; - - for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next()) - { - numChildren++; - float current_total_time = profileIterator->Get_Current_Total_Time(); - accumulated_time += current_total_time; - float fraction = parent_time > B3_EPSILON ? (current_total_time / parent_time) * 100 : 0.f; - { - int i; for (i=0;iGet_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls()); - totalTime += current_total_time; - //recurse into children - } - - if (parent_time < accumulated_time) - { - fprintf(f,"what's wrong\n"); - } - for (i=0;i B3_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time); - - for (i=0;iEnter_Child(i); - dumpRecursive(f,profileIterator,spacing+3); - profileIterator->Enter_Parent(); - } -} - - - - -void b3ProfileManager::dumpAll(FILE* f) -{ - b3ProfileIterator* profileIterator = 0; - profileIterator = b3ProfileManager::Get_Iterator(); - - dumpRecursive(f, profileIterator,0); - - b3ProfileManager::Release_Iterator(profileIterator); -} - - - -#endif //B3_NO_PROFILE diff --git a/src/Bullet3Common/b3Quickprof.h b/src/Bullet3Common/b3Quickprof.h deleted file mode 100644 index 62d0d1f71..000000000 --- a/src/Bullet3Common/b3Quickprof.h +++ /dev/null @@ -1,218 +0,0 @@ -/* -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/*************************************************************************************************** -** -** Real-Time Hierarchical Profiling for Game Programming Gems 3 -** -** by Greg Hjelstrom & Byon Garrabrant -** -***************************************************************************************************/ - -// Credits: The Clock class was inspired by the Timer classes in -// Ogre (www.ogre3d.org). - - - -#ifndef B3_QUICK_PROF_H -#define B3_QUICK_PROF_H - -//To disable built-in profiling, please comment out next line -//#define B3_NO_PROFILE 1 -#ifndef B3_NO_PROFILE -#include //@todo remove this, backwards compatibility -#include "b3Scalar.h" -#include "b3AlignedAllocator.h" -#include - - - - - -#define B3_USE_CLOCK 1 - -#ifdef B3_USE_CLOCK - -///The b3Clock is a portable basic clock that measures accurate time in seconds, use for profiling. -class b3Clock -{ -public: - b3Clock(); - - b3Clock(const b3Clock& other); - b3Clock& operator=(const b3Clock& other); - - ~b3Clock(); - - /// Resets the initial reference time. - void reset(); - - /// Returns the time in ms since the last call to reset or since - /// the b3Clock was created. - unsigned long int getTimeMilliseconds(); - - /// Returns the time in us since the last call to reset or since - /// the Clock was created. - unsigned long int getTimeMicroseconds(); -private: - struct b3ClockData* m_data; -}; - -#endif //B3_USE_CLOCK - - - - -///A node in the Profile Hierarchy Tree -class b3ProfileNode { - -public: - b3ProfileNode( const char * name, b3ProfileNode * parent ); - ~b3ProfileNode( void ); - - b3ProfileNode * Get_Sub_Node( const char * name ); - - b3ProfileNode * Get_Parent( void ) { return Parent; } - b3ProfileNode * Get_Sibling( void ) { return Sibling; } - b3ProfileNode * Get_Child( void ) { return Child; } - - void CleanupMemory(); - void Reset( void ); - void Call( void ); - bool Return( void ); - - const char * Get_Name( void ) { return Name; } - int Get_Total_Calls( void ) { return TotalCalls; } - float Get_Total_Time( void ) { return TotalTime; } - void* GetUserPointer() const {return m_userPtr;} - void SetUserPointer(void* ptr) { m_userPtr = ptr;} -protected: - - const char * Name; - int TotalCalls; - float TotalTime; - unsigned long int StartTime; - int RecursionCounter; - - b3ProfileNode * Parent; - b3ProfileNode * Child; - b3ProfileNode * Sibling; - void* m_userPtr; -}; - -///An iterator to navigate through the tree -class b3ProfileIterator -{ -public: - // Access all the children of the current parent - void First(void); - void Next(void); - bool Is_Done(void); - bool Is_Root(void) { return (CurrentParent->Get_Parent() == 0); } - - void Enter_Child( int index ); // Make the given child the new parent - void Enter_Largest_Child( void ); // Make the largest child the new parent - void Enter_Parent( void ); // Make the current parent's parent the new parent - - // Access the current child - const char * Get_Current_Name( void ) { return CurrentChild->Get_Name(); } - int Get_Current_Total_Calls( void ) { return CurrentChild->Get_Total_Calls(); } - float Get_Current_Total_Time( void ) { return CurrentChild->Get_Total_Time(); } - - void* Get_Current_UserPointer( void ) { return CurrentChild->GetUserPointer(); } - void Set_Current_UserPointer(void* ptr) {CurrentChild->SetUserPointer(ptr);} - // Access the current parent - const char * Get_Current_Parent_Name( void ) { return CurrentParent->Get_Name(); } - int Get_Current_Parent_Total_Calls( void ) { return CurrentParent->Get_Total_Calls(); } - float Get_Current_Parent_Total_Time( void ) { return CurrentParent->Get_Total_Time(); } - - - -protected: - - b3ProfileNode * CurrentParent; - b3ProfileNode * CurrentChild; - - - b3ProfileIterator( b3ProfileNode * start ); - friend class b3ProfileManager; -}; - - -///The Manager for the Profile system -class b3ProfileManager { -public: - static void Start_Profile( const char * name ); - static void Stop_Profile( void ); - - static void CleanupMemory(void) - { - Root.CleanupMemory(); - } - - static void Reset( void ); - static void Increment_Frame_Counter( void ); - static int Get_Frame_Count_Since_Reset( void ) { return FrameCounter; } - static float Get_Time_Since_Reset( void ); - - static b3ProfileIterator * Get_Iterator( void ) - { - - return new b3ProfileIterator( &Root ); - } - static void Release_Iterator( b3ProfileIterator * iterator ) { delete ( iterator); } - - static void dumpRecursive(b3ProfileIterator* profileIterator, int spacing); - static void dumpAll(); - - static void dumpRecursive(FILE* f, b3ProfileIterator* profileIterator, int spacing); - static void dumpAll(FILE* f); - -private: - static b3ProfileNode Root; - static b3ProfileNode * CurrentNode; - static int FrameCounter; - static unsigned long int ResetTime; -}; - - -///ProfileSampleClass is a simple way to profile a function's scope -///Use the B3_PROFILE macro at the start of scope to time -class b3ProfileSample { -public: - b3ProfileSample( const char * name ) - { - b3ProfileManager::Start_Profile( name ); - } - - ~b3ProfileSample( void ) - { - b3ProfileManager::Stop_Profile(); - } -}; - - -#define B3_PROFILE( name ) b3ProfileSample __profile( name ) - -#else - -#define B3_PROFILE( name ) - -#endif //#ifndef B3_NO_PROFILE - - - -#endif //B3_QUICK_PROF_H - - diff --git a/src/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp b/src/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp index 1712ac221..162c9b2c9 100644 --- a/src/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp +++ b/src/Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.cpp @@ -25,7 +25,7 @@ subject to the following restrictions: #include "b3TypedConstraint.h" #include #include "Bullet3Common/b3StackAlloc.h" -#include "Bullet3Common/b3Quickprof.h" + //#include "b3SolverBody.h" //#include "b3SolverConstraint.h" #include "Bullet3Common/b3AlignedObjectArray.h" diff --git a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp index d6cb05890..22756c685 100644 --- a/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ b/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp @@ -2,7 +2,7 @@ #include "b3GpuSapBroadphase.h" #include "Bullet3Common/b3Vector3.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3Common/b3Quickprof.h" + #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "kernels/sapKernels.h" #include "kernels/sapFastKernels.h" diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index 1acffbfc5..fca41e6c9 100644 --- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -28,7 +28,7 @@ int b3g_actualSATPairTests=0; typedef b3AlignedObjectArray b3VertexArray; -#include "Bullet3Common/b3Quickprof.h" + #include //for FLT_MAX #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp index df6606273..db0d18bcb 100644 --- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp @@ -2,7 +2,7 @@ #include "b3GpuRaycast.h" #include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h" #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" -#include "Bullet3Common/b3Quickprof.h" + #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp index 7ce6b8f85..9fdb4c3a6 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp @@ -5,7 +5,7 @@ bool b3GpuSolveConstraint = true; #include "b3GpuBatchingPgsSolver.h" #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3Common/b3Quickprof.h" + #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" #include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h" #include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index e00f058a3..ecb566e6b 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -31,7 +31,6 @@ bool dumpContactStats = false; #include "b3GpuBatchingPgsSolver.h" #include "b3Solver.h" -#include "Bullet3Common/b3Quickprof.h" #include "b3Config.h" #include "Bullet3OpenCL/Raycast/b3GpuRaycast.h" diff --git a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp index 89672641b..b5b8680ef 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -37,7 +37,6 @@ bool useNewBatchingKernel = true; #include "kernels/batchingKernelsNew.h" -#include "Bullet3Common/b3Quickprof.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" #include "Bullet3Common/b3Vector3.h" diff --git a/test/OpenCL/BitonicSort/main.cpp b/test/OpenCL/BitonicSort/main.cpp index 4f9a4d2c7..dc304209d 100644 --- a/test/OpenCL/BitonicSort/main.cpp +++ b/test/OpenCL/BitonicSort/main.cpp @@ -18,8 +18,8 @@ subject to the following restrictions: #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" #include "Bullet3Common/b3Int2.h" -#include "Bullet3Common/b3Quickprof.h" +#include "../btgui/Timing/b3Clock.h" #include "b3BitonicSort.h" #include diff --git a/test/OpenCL/BitonicSort/premake4.lua b/test/OpenCL/BitonicSort/premake4.lua index cae959c2c..c45828a61 100644 --- a/test/OpenCL/BitonicSort/premake4.lua +++ b/test/OpenCL/BitonicSort/premake4.lua @@ -21,12 +21,12 @@ function createProject(vendor) "b3BitonicSort.cpp", "../../../src/Bullet3Common/b3AlignedAllocator.cpp", "../../../src/Bullet3Common/b3AlignedAllocator.h", - "../../../src/Bullet3Common/b3Quickprof.cpp", - "../../../src/Bullet3Common/b3Quickprof.h", "../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp", "../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h", "../../../src/Bullet3Common/b3Logging.cpp", "../../../src/Bullet3Common/b3Logging.h", + "../../../btgui/Timing/b3Clock.cpp", + "../../../btgui/Timing/b3Clock.h", } end diff --git a/test/OpenCL/RadixSortBenchmark/main.cpp b/test/OpenCL/RadixSortBenchmark/main.cpp index 9e10cd7ab..7de5856ac 100644 --- a/test/OpenCL/RadixSortBenchmark/main.cpp +++ b/test/OpenCL/RadixSortBenchmark/main.cpp @@ -66,7 +66,7 @@ #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3Common/b3Quickprof.h" +#include "../btgui/Timing/b3Clock.h" cl_context g_cxMainContext; cl_device_id g_device; diff --git a/test/OpenCL/RadixSortBenchmark/premake4.lua b/test/OpenCL/RadixSortBenchmark/premake4.lua index 2f413ff0f..2b9e600be 100644 --- a/test/OpenCL/RadixSortBenchmark/premake4.lua +++ b/test/OpenCL/RadixSortBenchmark/premake4.lua @@ -27,10 +27,12 @@ function createProject(vendor) "../../../src/Bullet3Common/b3AlignedAllocator.cpp", "../../../src/Bullet3Common/b3AlignedAllocator.h", "../../../src/Bullet3Common/b3AlignedObjectArray.h", - "../../../src/Bullet3Common/b3Quickprof.cpp", - "../../../src/Bullet3Common/b3Quickprof.h", "../../../src/Bullet3Common/b3Logging.cpp", "../../../src/Bullet3Common/b3Logging.h", + "../../../btgui/Timing/b3Quickprof.cpp", + "../../../btgui/Timing/b3Quickprof.h", + "../../../btgui/Timing/b3Clock.cpp", + "../../../btgui/Timing/b3Clock.h", } From 531af1b394b432ec0b5d4cc910a50879cd3751c0 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Thu, 20 Jun 2013 00:29:09 -0700 Subject: [PATCH 08/11] add ray-convex CPU implementation, make CPU raytest default for now tweak/increase the b3Config values again, so it works better on the Macbook Retina GPU. --- Demos3/BasicGpuDemo/BasicGpuDemo.cpp | 5 +- Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp | 2 +- .../GpuDemos/rigidbody/GpuRigidBodyDemo.cpp | 2 +- src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp | 83 ++++++++++++++++- src/Bullet3OpenCL/Raycast/b3GpuRaycast.h | 7 +- src/Bullet3OpenCL/RigidBody/b3Config.h | 4 +- .../RigidBody/b3GpuNarrowPhase.cpp | 68 +------------- .../RigidBody/b3GpuNarrowPhase.h | 5 + .../RigidBody/b3GpuNarrowPhaseInternalData.h | 93 +++++++++++++++++++ .../RigidBody/b3GpuRigidBodyPipeline.cpp | 5 +- 10 files changed, 194 insertions(+), 80 deletions(-) create mode 100644 src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h diff --git a/Demos3/BasicGpuDemo/BasicGpuDemo.cpp b/Demos3/BasicGpuDemo/BasicGpuDemo.cpp index 080755df4..a41019458 100644 --- a/Demos3/BasicGpuDemo/BasicGpuDemo.cpp +++ b/Demos3/BasicGpuDemo/BasicGpuDemo.cpp @@ -288,9 +288,8 @@ void BasicGpuDemo::initPhysics() //create a few dynamic rigidbodies // Re-using the same collision is better for memory usage and performance - //btBoxShape* colShape = new btBoxShape(btVector3(SCALING*1,SCALING*1,SCALING*1)); - - btCollisionShape* colShape = new btSphereShape(btScalar(SCALING*1.f)); + btBoxShape* colShape = new btBoxShape(btVector3(SCALING*1,SCALING*1,SCALING*1)); + //btCollisionShape* colShape = new btSphereShape(btScalar(SCALING*1.f)); m_collisionShapes.push_back(colShape); /// Create Dynamic Objects diff --git a/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp b/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp index d531d4f92..83f9c4d79 100644 --- a/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp @@ -458,7 +458,7 @@ void GpuRaytraceScene::renderScene() //m_raycaster->castRaysHost(rays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu()); - m_raycaster->castRays(rays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu()); + m_raycaster->castRays(rays, hits, this->m_data->m_np->getNumRigidBodies(), m_data->m_np->getBodiesCpu(), m_data->m_np->getNumCollidablesGpu(), m_data->m_np->getCollidablesCpu(), m_data->m_np->getInternalData()); diff --git a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp index a46a01648..68e4be1c6 100644 --- a/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp @@ -109,7 +109,7 @@ void GpuRigidBodyDemo::initPhysics(const ConstructionInfo& ci) b3Config config; config.m_maxConvexBodies = b3Max(config.m_maxConvexBodies,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ+10); config.m_maxConvexShapes = config.m_maxConvexBodies; - config.m_maxBroadphasePairs = 8*config.m_maxConvexBodies; + config.m_maxBroadphasePairs = 12*config.m_maxConvexBodies; config.m_maxContactCapacity = config.m_maxBroadphasePairs; diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp index db0d18bcb..92dc65ba6 100644 --- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp @@ -2,6 +2,8 @@ #include "b3GpuRaycast.h" #include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h" #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" +#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h" + #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" @@ -73,9 +75,57 @@ bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vect return false; } +bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronCL& poly, + const struct b3GpuNarrowPhaseInternalData* narrowphaseData, float& hitFraction, b3Vector3& hitNormal) +{ + float exitFraction = hitFraction; + float enterFraction = -0.1f; + b3Vector3 curHitNormal(0,0,0); + for (int i=0;im_convexFaces[poly.m_faceOffset+i]; + float fromPlaneDist = b3Dot(rayFromLocal,face.m_plane)+face.m_plane.w; + float toPlaneDist = b3Dot(rayToLocal,face.m_plane)+face.m_plane.w; + if (fromPlaneDist<0.f) + { + if (toPlaneDist >= 0.f) + { + float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); + if (exitFraction>fraction) + { + exitFraction = fraction; + } + } + } else + { + if (toPlaneDist<0.f) + { + float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); + if (enterFraction <= fraction) + { + enterFraction = fraction; + curHitNormal = face.m_plane; + curHitNormal.w = 0.f; + } + } else + { + return false; + } + } + if (exitFraction <= enterFraction) + return false; + } + + if (enterFraction < 0.f) + return false; + + hitFraction = enterFraction; + hitNormal = curHitNormal; + return true; +} void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults, - int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables) + int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) { // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); @@ -88,6 +138,7 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray& rays, b3A float hitFraction = hitResults[r].m_hitFraction; int hitBodyIndex= -1; + b3Vector3 hitNormal; for (int b=0;b& rays, b3A if (sphere_intersect(pos, radius, rayFrom, rayTo,hitFraction)) { hitBodyIndex = b; + b3Vector3 hitPoint; + hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); + hitNormal = (hitPoint-bodies[b].m_pos).normalize(); } } + case SHAPE_CONVEX_HULL: + { + b3Transform convexWorldTransform; + convexWorldTransform.setIdentity(); + convexWorldTransform.setOrigin(bodies[b].m_pos); + convexWorldTransform.setRotation(bodies[b].m_quat); + b3Transform convexWorld2Local = convexWorldTransform.inverse(); + + b3Vector3 rayFromLocal = convexWorld2Local(rayFrom); + b3Vector3 rayToLocal = convexWorld2Local(rayTo); + + + int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex; + const b3ConvexPolyhedronCL& poly = narrowphaseData->m_convexPolyhedra[shapeIndex]; + if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData, hitFraction, hitNormal)) + { + hitBodyIndex = b; + } + + + break; + } default: { static bool once=true; @@ -122,7 +198,7 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray& rays, b3A hitResults[r].m_hitFraction = hitFraction; hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); - hitResults[r].m_hitNormal = (hitResults[r].m_hitPoint-bodies[hitBodyIndex].m_pos).normalize(); + hitResults[r].m_hitNormal = hitNormal; hitResults[r].m_hitResult0 = hitBodyIndex; } @@ -130,8 +206,9 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray& rays, b3A } void b3GpuRaycast::castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults, - int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables) + int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) { + B3_PROFILE("castRaysGPU"); b3OpenCLArray gpuRays(m_data->m_context,m_data->m_q); diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h index 2e3a7431d..66f2ce639 100644 --- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h +++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h @@ -18,10 +18,13 @@ public: virtual ~b3GpuRaycast(); void castRaysHost(const b3AlignedObjectArray& raysIn, b3AlignedObjectArray& hitResults, - int numBodies, const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables); + int numBodies, const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, + const struct b3GpuNarrowPhaseInternalData* narrowphaseData); void castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults, - int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables); + int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, + const struct b3GpuNarrowPhaseInternalData* narrowphaseData + ); /* const b3OpenCLArray* bodyBuf, b3OpenCLArray* contactOut, int& nContacts, diff --git a/src/Bullet3OpenCL/RigidBody/b3Config.h b/src/Bullet3OpenCL/RigidBody/b3Config.h index 1d46f8f17..e8b94cfec 100644 --- a/src/Bullet3OpenCL/RigidBody/b3Config.h +++ b/src/Bullet3OpenCL/RigidBody/b3Config.h @@ -19,7 +19,7 @@ struct b3Config int m_maxTriConvexPairCapacity; b3Config() - :m_maxConvexBodies(128*1024), + :m_maxConvexBodies(32*1024), m_maxVerticesPerFace(64), m_maxFacesPerShape(12), m_maxConvexVertices(8192), @@ -29,7 +29,7 @@ struct b3Config m_maxTriConvexPairCapacity(256*1024) { m_maxConvexShapes = m_maxConvexBodies; - m_maxBroadphasePairs = 8*m_maxConvexBodies; + m_maxBroadphasePairs = 12*m_maxConvexBodies; m_maxContactCapacity = m_maxBroadphasePairs; } }; diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp index 79920337e..5132c4d99 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp @@ -12,73 +12,7 @@ #include "Bullet3Geometry/b3AabbUtil.h" #include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h" -struct b3GpuNarrowPhaseInternalData -{ - b3AlignedObjectArray* m_convexData; - - b3AlignedObjectArray m_convexPolyhedra; - b3AlignedObjectArray m_uniqueEdges; - b3AlignedObjectArray m_convexVertices; - b3AlignedObjectArray m_convexIndices; - - b3OpenCLArray* m_convexPolyhedraGPU; - b3OpenCLArray* m_uniqueEdgesGPU; - b3OpenCLArray* m_convexVerticesGPU; - b3OpenCLArray* m_convexIndicesGPU; - - b3OpenCLArray* m_worldVertsB1GPU; - b3OpenCLArray* m_clippingFacesOutGPU; - b3OpenCLArray* m_worldNormalsAGPU; - b3OpenCLArray* m_worldVertsA1GPU; - b3OpenCLArray* m_worldVertsB2GPU; - - b3AlignedObjectArray m_cpuChildShapes; - b3OpenCLArray* m_gpuChildShapes; - - b3AlignedObjectArray m_convexFaces; - b3OpenCLArray* m_convexFacesGPU; - - GpuSatCollision* m_gpuSatCollision; - - b3AlignedObjectArray* m_pBufPairsCPU; - - //b3OpenCLArray* m_convexPairsOutGPU; - //b3OpenCLArray* m_planePairs; - - b3OpenCLArray* m_pBufContactOutGPU; - b3AlignedObjectArray* m_pBufContactOutCPU; - - - b3AlignedObjectArray* m_bodyBufferCPU; - b3OpenCLArray* m_bodyBufferGPU; - - b3AlignedObjectArray* m_inertiaBufferCPU; - b3OpenCLArray* m_inertiaBufferGPU; - - int m_numAcceleratedShapes; - int m_numAcceleratedRigidBodies; - - b3AlignedObjectArray m_collidablesCPU; - b3OpenCLArray* m_collidablesGPU; - - b3OpenCLArray* m_localShapeAABBGPU; - b3AlignedObjectArray* m_localShapeAABBCPU; - - b3AlignedObjectArray m_bvhData; - - b3AlignedObjectArray m_treeNodesCPU; - b3AlignedObjectArray m_subTreesCPU; - - b3AlignedObjectArray m_bvhInfoCPU; - b3OpenCLArray* m_bvhInfoGPU; - - b3OpenCLArray* m_treeNodesGPU; - b3OpenCLArray* m_subTreesGPU; - - - b3Config m_config; - -}; +#include "b3GpuNarrowPhaseInternalData.h" diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h index 80f0812a7..a916bbe57 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h @@ -91,6 +91,11 @@ public: b3Collidable& getCollidableCpu(int collidableIndex); const b3Collidable& getCollidableCpu(int collidableIndex) const; + const b3GpuNarrowPhaseInternalData* getInternalData() const + { + return m_data; + } + const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const; }; diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h new file mode 100644 index 000000000..8c7aa807d --- /dev/null +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h @@ -0,0 +1,93 @@ + +#ifndef B3_GPU_NARROWPHASE_INTERNAL_DATA_H +#define B3_GPU_NARROWPHASE_INTERNAL_DATA_H + +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h" +#include "b3Config.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" +#include "Bullet3Common/b3AlignedObjectArray.h" +#include "Bullet3Common/b3Vector3.h" + +#include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" +#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" + +#include "Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h" +#include "Bullet3Common/b3Int4.h" +#include "Bullet3Common/b3Int2.h" + + +class b3ConvexUtility; + +struct b3GpuNarrowPhaseInternalData +{ + b3AlignedObjectArray* m_convexData; + + b3AlignedObjectArray m_convexPolyhedra; + b3AlignedObjectArray m_uniqueEdges; + b3AlignedObjectArray m_convexVertices; + b3AlignedObjectArray m_convexIndices; + + b3OpenCLArray* m_convexPolyhedraGPU; + b3OpenCLArray* m_uniqueEdgesGPU; + b3OpenCLArray* m_convexVerticesGPU; + b3OpenCLArray* m_convexIndicesGPU; + + b3OpenCLArray* m_worldVertsB1GPU; + b3OpenCLArray* m_clippingFacesOutGPU; + b3OpenCLArray* m_worldNormalsAGPU; + b3OpenCLArray* m_worldVertsA1GPU; + b3OpenCLArray* m_worldVertsB2GPU; + + b3AlignedObjectArray m_cpuChildShapes; + b3OpenCLArray* m_gpuChildShapes; + + b3AlignedObjectArray m_convexFaces; + b3OpenCLArray* m_convexFacesGPU; + + struct GpuSatCollision* m_gpuSatCollision; + + b3AlignedObjectArray* m_pBufPairsCPU; + + //b3OpenCLArray* m_convexPairsOutGPU; + //b3OpenCLArray* m_planePairs; + + b3OpenCLArray* m_pBufContactOutGPU; + b3AlignedObjectArray* m_pBufContactOutCPU; + + + b3AlignedObjectArray* m_bodyBufferCPU; + b3OpenCLArray* m_bodyBufferGPU; + + b3AlignedObjectArray* m_inertiaBufferCPU; + b3OpenCLArray* m_inertiaBufferGPU; + + int m_numAcceleratedShapes; + int m_numAcceleratedRigidBodies; + + b3AlignedObjectArray m_collidablesCPU; + b3OpenCLArray* m_collidablesGPU; + + b3OpenCLArray* m_localShapeAABBGPU; + b3AlignedObjectArray* m_localShapeAABBCPU; + + b3AlignedObjectArray m_bvhData; + + b3AlignedObjectArray m_treeNodesCPU; + b3AlignedObjectArray m_subTreesCPU; + + b3AlignedObjectArray m_bvhInfoCPU; + b3OpenCLArray* m_bvhInfoGPU; + + b3OpenCLArray* m_treeNodesGPU; + b3OpenCLArray* m_subTreesGPU; + + + b3Config m_config; + +}; + +#endif //B3_GPU_NARROWPHASE_INTERNAL_DATA_H diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index ecb566e6b..e7bc308b3 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -457,6 +457,9 @@ int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* po void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults) { - this->m_data->m_raycaster->castRays(rays,hitResults,getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(),m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu()); + this->m_data->m_raycaster->castRaysHost(rays,hitResults, + getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(), + m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getInternalData() + ); } From c959f32d7e23dc27b27941ff9362a77186d96f45 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Thu, 20 Jun 2013 10:14:21 -0700 Subject: [PATCH 09/11] added b3Clock/b3Quickprof files --- btgui/Timing/b3Clock.cpp | 223 ++++++++++++++++++ btgui/Timing/b3Clock.h | 31 +++ btgui/Timing/b3Quickprof.cpp | 430 +++++++++++++++++++++++++++++++++++ btgui/Timing/b3Quickprof.h | 173 ++++++++++++++ 4 files changed, 857 insertions(+) create mode 100644 btgui/Timing/b3Clock.cpp create mode 100644 btgui/Timing/b3Clock.h create mode 100644 btgui/Timing/b3Quickprof.cpp create mode 100644 btgui/Timing/b3Quickprof.h diff --git a/btgui/Timing/b3Clock.cpp b/btgui/Timing/b3Clock.cpp new file mode 100644 index 000000000..53a82dee0 --- /dev/null +++ b/btgui/Timing/b3Clock.cpp @@ -0,0 +1,223 @@ +#include "b3Clock.h" + +template +const T& b3ClockMin(const T& a, const T& b) +{ + return a < b ? a : b ; +} + + +#ifdef __CELLOS_LV2__ +#include +#include +#include +#endif + +#if defined (SUNOS) || defined (__SUNOS__) +#include +#endif + +#if defined(WIN32) || defined(_WIN32) + +#define B3_USE_WINDOWS_TIMERS +#define WIN32_LEAN_AND_MEAN +#define NOWINRES +#define NOMCX +#define NOIME + +#ifdef _XBOX + #include +#else //_XBOX + #include +#endif //_XBOX + +#include + + +#else //_WIN32 +#include +#endif //_WIN32 + + + +struct b3ClockData +{ + +#ifdef B3_USE_WINDOWS_TIMERS + LARGE_INTEGER mClockFrequency; + DWORD mStartTick; + LONGLONG mPrevElapsedTime; + LARGE_INTEGER mStartTime; +#else +#ifdef __CELLOS_LV2__ + uint64_t mStartTime; +#else + struct timeval mStartTime; +#endif +#endif //__CELLOS_LV2__ + +}; + +///The b3Clock is a portable basic clock that measures accurate time in seconds, use for profiling. +b3Clock::b3Clock() +{ + m_data = new b3ClockData; +#ifdef B3_USE_WINDOWS_TIMERS + QueryPerformanceFrequency(&m_data->mClockFrequency); +#endif + reset(); +} + +b3Clock::~b3Clock() +{ + delete m_data; +} + +b3Clock::b3Clock(const b3Clock& other) +{ + m_data = new b3ClockData; + *m_data = *other.m_data; +} + +b3Clock& b3Clock::operator=(const b3Clock& other) +{ + *m_data = *other.m_data; + return *this; +} + + + /// Resets the initial reference time. +void b3Clock::reset() +{ +#ifdef B3_USE_WINDOWS_TIMERS + QueryPerformanceCounter(&m_data->mStartTime); + m_data->mStartTick = GetTickCount(); + m_data->mPrevElapsedTime = 0; +#else +#ifdef __CELLOS_LV2__ + + typedef uint64_t ClockSize; + ClockSize newTime; + //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); + SYS_TIMEBASE_GET( newTime ); + m_data->mStartTime = newTime; +#else + gettimeofday(&m_data->mStartTime, 0); +#endif +#endif +} + +/// Returns the time in ms since the last call to reset or since +/// the b3Clock was created. +unsigned long int b3Clock::getTimeMilliseconds() +{ +#ifdef B3_USE_WINDOWS_TIMERS + LARGE_INTEGER currentTime; + QueryPerformanceCounter(¤tTime); + LONGLONG elapsedTime = currentTime.QuadPart - + m_data->mStartTime.QuadPart; + // Compute the number of millisecond ticks elapsed. + unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / + m_data->mClockFrequency.QuadPart); + // Check for unexpected leaps in the Win32 performance counter. + // (This is caused by unexpected data across the PCI to ISA + // bridge, aka south bridge. See Microsoft KB274323.) + unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick; + signed long msecOff = (signed long)(msecTicks - elapsedTicks); + if (msecOff < -100 || msecOff > 100) + { + // Adjust the starting time forwards. + LONGLONG msecAdjustment = b3ClockMin(msecOff * + m_data->mClockFrequency.QuadPart / 1000, elapsedTime - + m_data->mPrevElapsedTime); + m_data->mStartTime.QuadPart += msecAdjustment; + elapsedTime -= msecAdjustment; + + // Recompute the number of millisecond ticks elapsed. + msecTicks = (unsigned long)(1000 * elapsedTime / + m_data->mClockFrequency.QuadPart); + } + + // Store the current elapsed time for adjustments next time. + m_data->mPrevElapsedTime = elapsedTime; + + return msecTicks; +#else + +#ifdef __CELLOS_LV2__ + uint64_t freq=sys_time_get_timebase_frequency(); + double dFreq=((double) freq) / 1000.0; + typedef uint64_t ClockSize; + ClockSize newTime; + SYS_TIMEBASE_GET( newTime ); + //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); + + return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq); +#else + + struct timeval currentTime; + gettimeofday(¤tTime, 0); + return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 + + (currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000; +#endif //__CELLOS_LV2__ +#endif +} + + /// Returns the time in us since the last call to reset or since + /// the Clock was created. +unsigned long int b3Clock::getTimeMicroseconds() +{ +#ifdef B3_USE_WINDOWS_TIMERS + LARGE_INTEGER currentTime; + QueryPerformanceCounter(¤tTime); + LONGLONG elapsedTime = currentTime.QuadPart - + m_data->mStartTime.QuadPart; + + // Compute the number of millisecond ticks elapsed. + unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / + m_data->mClockFrequency.QuadPart); + + // Check for unexpected leaps in the Win32 performance counter. + // (This is caused by unexpected data across the PCI to ISA + // bridge, aka south bridge. See Microsoft KB274323.) + unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick; + signed long msecOff = (signed long)(msecTicks - elapsedTicks); + if (msecOff < -100 || msecOff > 100) + { + // Adjust the starting time forwards. + LONGLONG msecAdjustment = b3ClockMin(msecOff * + m_data->mClockFrequency.QuadPart / 1000, elapsedTime - + m_data->mPrevElapsedTime); + m_data->mStartTime.QuadPart += msecAdjustment; + elapsedTime -= msecAdjustment; + } + + // Store the current elapsed time for adjustments next time. + m_data->mPrevElapsedTime = elapsedTime; + + // Convert to microseconds. + unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime / + m_data->mClockFrequency.QuadPart); + + return usecTicks; +#else + +#ifdef __CELLOS_LV2__ + uint64_t freq=sys_time_get_timebase_frequency(); + double dFreq=((double) freq)/ 1000000.0; + typedef uint64_t ClockSize; + ClockSize newTime; + //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory"); + SYS_TIMEBASE_GET( newTime ); + + return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq); +#else + + struct timeval currentTime; + gettimeofday(¤tTime, 0); + return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 + + (currentTime.tv_usec - m_data->mStartTime.tv_usec); +#endif//__CELLOS_LV2__ +#endif +} + diff --git a/btgui/Timing/b3Clock.h b/btgui/Timing/b3Clock.h new file mode 100644 index 000000000..d30c615a3 --- /dev/null +++ b/btgui/Timing/b3Clock.h @@ -0,0 +1,31 @@ +#ifndef B3_CLOCK_H +#define B3_CLOCK_H + + +///The b3Clock is a portable basic clock that measures accurate time in seconds, use for profiling. +class b3Clock +{ +public: + b3Clock(); + + b3Clock(const b3Clock& other); + b3Clock& operator=(const b3Clock& other); + + ~b3Clock(); + + /// Resets the initial reference time. + void reset(); + + /// Returns the time in ms since the last call to reset or since + /// the b3Clock was created. + unsigned long int getTimeMilliseconds(); + + /// Returns the time in us since the last call to reset or since + /// the Clock was created. + unsigned long int getTimeMicroseconds(); +private: + struct b3ClockData* m_data; +}; + + +#endif //B3_CLOCK_H diff --git a/btgui/Timing/b3Quickprof.cpp b/btgui/Timing/b3Quickprof.cpp new file mode 100644 index 000000000..4ba9158dd --- /dev/null +++ b/btgui/Timing/b3Quickprof.cpp @@ -0,0 +1,430 @@ +/* +Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +/* + +*************************************************************************************************** +** +** profile.cpp +** +** Real-Time Hierarchical Profiling for Game Programming Gems 3 +** +** by Greg Hjelstrom & Byon Garrabrant +** +***************************************************************************************************/ + +// Credits: The Clock class was inspired by the Timer classes in +// Ogre (www.ogre3d.org). + +#include "Bullet3Common/b3MinMax.h" +#include "b3Quickprof.h" + + +#ifndef B3_NO_PROFILE + + +static b3Clock b3s_profileClock; + + + + + + +inline void b3Profile_Get_Ticks(unsigned long int * ticks) +{ + *ticks = b3s_profileClock.getTimeMicroseconds(); +} + +inline float b3Profile_Get_Tick_Rate(void) +{ +// return 1000000.f; + return 1000.f; + +} + + + +/*************************************************************************************************** +** +** b3ProfileNode +** +***************************************************************************************************/ + +/*********************************************************************************************** + * INPUT: * + * name - pointer to a static string which is the name of this profile node * + * parent - parent pointer * + * * + * WARNINGS: * + * The name is assumed to be a static pointer, only the pointer is stored and compared for * + * efficiency reasons. * + *=============================================================================================*/ +b3ProfileNode::b3ProfileNode( const char * name, b3ProfileNode * parent ) : + Name( name ), + TotalCalls( 0 ), + TotalTime( 0 ), + StartTime( 0 ), + RecursionCounter( 0 ), + Parent( parent ), + Child( NULL ), + Sibling( NULL ), + m_userPtr(0) +{ + Reset(); +} + + +void b3ProfileNode::CleanupMemory() +{ + delete ( Child); + Child = NULL; + delete ( Sibling); + Sibling = NULL; +} + +b3ProfileNode::~b3ProfileNode( void ) +{ + delete ( Child); + delete ( Sibling); +} + + +/*********************************************************************************************** + * INPUT: * + * name - static string pointer to the name of the node we are searching for * + * * + * WARNINGS: * + * All profile names are assumed to be static strings so this function uses pointer compares * + * to find the named node. * + *=============================================================================================*/ +b3ProfileNode * b3ProfileNode::Get_Sub_Node( const char * name ) +{ + // Try to find this sub node + b3ProfileNode * child = Child; + while ( child ) { + if ( child->Name == name ) { + return child; + } + child = child->Sibling; + } + + // We didn't find it, so add it + + b3ProfileNode * node = new b3ProfileNode( name, this ); + node->Sibling = Child; + Child = node; + return node; +} + + +void b3ProfileNode::Reset( void ) +{ + TotalCalls = 0; + TotalTime = 0.0f; + + + if ( Child ) { + Child->Reset(); + } + if ( Sibling ) { + Sibling->Reset(); + } +} + + +void b3ProfileNode::Call( void ) +{ + TotalCalls++; + if (RecursionCounter++ == 0) { + b3Profile_Get_Ticks(&StartTime); + } +} + + +bool b3ProfileNode::Return( void ) +{ + if ( --RecursionCounter == 0 && TotalCalls != 0 ) { + unsigned long int time; + b3Profile_Get_Ticks(&time); + time-=StartTime; + TotalTime += (float)time / b3Profile_Get_Tick_Rate(); + } + return ( RecursionCounter == 0 ); +} + + +/*************************************************************************************************** +** +** b3ProfileIterator +** +***************************************************************************************************/ +b3ProfileIterator::b3ProfileIterator( b3ProfileNode * start ) +{ + CurrentParent = start; + CurrentChild = CurrentParent->Get_Child(); +} + + +void b3ProfileIterator::First(void) +{ + CurrentChild = CurrentParent->Get_Child(); +} + + +void b3ProfileIterator::Next(void) +{ + CurrentChild = CurrentChild->Get_Sibling(); +} + + +bool b3ProfileIterator::Is_Done(void) +{ + return CurrentChild == NULL; +} + + +void b3ProfileIterator::Enter_Child( int index ) +{ + CurrentChild = CurrentParent->Get_Child(); + while ( (CurrentChild != NULL) && (index != 0) ) { + index--; + CurrentChild = CurrentChild->Get_Sibling(); + } + + if ( CurrentChild != NULL ) { + CurrentParent = CurrentChild; + CurrentChild = CurrentParent->Get_Child(); + } +} + + +void b3ProfileIterator::Enter_Parent( void ) +{ + if ( CurrentParent->Get_Parent() != NULL ) { + CurrentParent = CurrentParent->Get_Parent(); + } + CurrentChild = CurrentParent->Get_Child(); +} + + +/*************************************************************************************************** +** +** b3ProfileManager +** +***************************************************************************************************/ + +b3ProfileNode b3ProfileManager::Root( "Root", NULL ); +b3ProfileNode * b3ProfileManager::CurrentNode = &b3ProfileManager::Root; +int b3ProfileManager::FrameCounter = 0; +unsigned long int b3ProfileManager::ResetTime = 0; + + +/*********************************************************************************************** + * b3ProfileManager::Start_Profile -- Begin a named profile * + * * + * Steps one level deeper into the tree, if a child already exists with the specified name * + * then it accumulates the profiling; otherwise a new child node is added to the profile tree. * + * * + * INPUT: * + * name - name of this profiling record * + * * + * WARNINGS: * + * The string used is assumed to be a static string; pointer compares are used throughout * + * the profiling code for efficiency. * + *=============================================================================================*/ +void b3ProfileManager::Start_Profile( const char * name ) +{ + if (name != CurrentNode->Get_Name()) { + CurrentNode = CurrentNode->Get_Sub_Node( name ); + } + + CurrentNode->Call(); +} + + +/*********************************************************************************************** + * b3ProfileManager::Stop_Profile -- Stop timing and record the results. * + *=============================================================================================*/ +void b3ProfileManager::Stop_Profile( void ) +{ + // Return will indicate whether we should back up to our parent (we may + // be profiling a recursive function) + if (CurrentNode->Return()) { + CurrentNode = CurrentNode->Get_Parent(); + } +} + + +/*********************************************************************************************** + * b3ProfileManager::Reset -- Reset the contents of the profiling system * + * * + * This resets everything except for the tree structure. All of the timing data is reset. * + *=============================================================================================*/ +void b3ProfileManager::Reset( void ) +{ + b3s_profileClock.reset(); + Root.Reset(); + Root.Call(); + FrameCounter = 0; + b3Profile_Get_Ticks(&ResetTime); +} + + +/*********************************************************************************************** + * b3ProfileManager::Increment_Frame_Counter -- Increment the frame counter * + *=============================================================================================*/ +void b3ProfileManager::Increment_Frame_Counter( void ) +{ + FrameCounter++; +} + + +/*********************************************************************************************** + * b3ProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset * + *=============================================================================================*/ +float b3ProfileManager::Get_Time_Since_Reset( void ) +{ + unsigned long int time; + b3Profile_Get_Ticks(&time); + time -= ResetTime; + return (float)time / b3Profile_Get_Tick_Rate(); +} + +#include + +void b3ProfileManager::dumpRecursive(b3ProfileIterator* profileIterator, int spacing) +{ + profileIterator->First(); + if (profileIterator->Is_Done()) + return; + + float accumulated_time=0,parent_time = profileIterator->Is_Root() ? b3ProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time(); + int i; + int frames_since_reset = b3ProfileManager::Get_Frame_Count_Since_Reset(); + for (i=0;iGet_Current_Parent_Name(), parent_time ); + float totalTime = 0.f; + + + int numChildren = 0; + + for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next()) + { + numChildren++; + float current_total_time = profileIterator->Get_Current_Total_Time(); + accumulated_time += current_total_time; + float fraction = parent_time > B3_EPSILON ? (current_total_time / parent_time) * 100 : 0.f; + { + int i; for (i=0;iGet_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls()); + totalTime += current_total_time; + //recurse into children + } + + if (parent_time < accumulated_time) + { + b3Printf("what's wrong\n"); + } + for (i=0;i B3_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time); + + for (i=0;iEnter_Child(i); + dumpRecursive(profileIterator,spacing+3); + profileIterator->Enter_Parent(); + } +} + + + + +void b3ProfileManager::dumpAll() +{ + b3ProfileIterator* profileIterator = 0; + profileIterator = b3ProfileManager::Get_Iterator(); + + dumpRecursive(profileIterator,0); + + b3ProfileManager::Release_Iterator(profileIterator); +} + + +void b3ProfileManager::dumpRecursive(FILE* f, b3ProfileIterator* profileIterator, int spacing) +{ + profileIterator->First(); + if (profileIterator->Is_Done()) + return; + + float accumulated_time=0,parent_time = profileIterator->Is_Root() ? b3ProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time(); + int i; + int frames_since_reset = b3ProfileManager::Get_Frame_Count_Since_Reset(); + for (i=0;iGet_Current_Parent_Name(), parent_time ); + float totalTime = 0.f; + + + int numChildren = 0; + + for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next()) + { + numChildren++; + float current_total_time = profileIterator->Get_Current_Total_Time(); + accumulated_time += current_total_time; + float fraction = parent_time > B3_EPSILON ? (current_total_time / parent_time) * 100 : 0.f; + { + int i; for (i=0;iGet_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls()); + totalTime += current_total_time; + //recurse into children + } + + if (parent_time < accumulated_time) + { + fprintf(f,"what's wrong\n"); + } + for (i=0;i B3_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time); + + for (i=0;iEnter_Child(i); + dumpRecursive(f,profileIterator,spacing+3); + profileIterator->Enter_Parent(); + } +} + + + + +void b3ProfileManager::dumpAll(FILE* f) +{ + b3ProfileIterator* profileIterator = 0; + profileIterator = b3ProfileManager::Get_Iterator(); + + dumpRecursive(f, profileIterator,0); + + b3ProfileManager::Release_Iterator(profileIterator); +} + + + +#endif //B3_NO_PROFILE diff --git a/btgui/Timing/b3Quickprof.h b/btgui/Timing/b3Quickprof.h new file mode 100644 index 000000000..44824baae --- /dev/null +++ b/btgui/Timing/b3Quickprof.h @@ -0,0 +1,173 @@ +/* +Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +/*************************************************************************************************** +** +** Real-Time Hierarchical Profiling for Game Programming Gems 3 +** +** by Greg Hjelstrom & Byon Garrabrant +** +***************************************************************************************************/ + +// Credits: The Clock class was inspired by the Timer classes in +// Ogre (www.ogre3d.org). + + + +#ifndef B3_QUICK_PROF_H +#define B3_QUICK_PROF_H + +//To disable built-in profiling, please comment out next line +//#define B3_NO_PROFILE 1 +#ifndef B3_NO_PROFILE +#include //@todo remove this, backwards compatibility +#include "Bullet3Common/b3Scalar.h" +#include "Bullet3Common/b3AlignedAllocator.h" +#include + + + + +#include "b3Clock.h" + + + + +///A node in the Profile Hierarchy Tree +class b3ProfileNode { + +public: + b3ProfileNode( const char * name, b3ProfileNode * parent ); + ~b3ProfileNode( void ); + + b3ProfileNode * Get_Sub_Node( const char * name ); + + b3ProfileNode * Get_Parent( void ) { return Parent; } + b3ProfileNode * Get_Sibling( void ) { return Sibling; } + b3ProfileNode * Get_Child( void ) { return Child; } + + void CleanupMemory(); + void Reset( void ); + void Call( void ); + bool Return( void ); + + const char * Get_Name( void ) { return Name; } + int Get_Total_Calls( void ) { return TotalCalls; } + float Get_Total_Time( void ) { return TotalTime; } + void* GetUserPointer() const {return m_userPtr;} + void SetUserPointer(void* ptr) { m_userPtr = ptr;} +protected: + + const char * Name; + int TotalCalls; + float TotalTime; + unsigned long int StartTime; + int RecursionCounter; + + b3ProfileNode * Parent; + b3ProfileNode * Child; + b3ProfileNode * Sibling; + void* m_userPtr; +}; + +///An iterator to navigate through the tree +class b3ProfileIterator +{ +public: + // Access all the children of the current parent + void First(void); + void Next(void); + bool Is_Done(void); + bool Is_Root(void) { return (CurrentParent->Get_Parent() == 0); } + + void Enter_Child( int index ); // Make the given child the new parent + void Enter_Largest_Child( void ); // Make the largest child the new parent + void Enter_Parent( void ); // Make the current parent's parent the new parent + + // Access the current child + const char * Get_Current_Name( void ) { return CurrentChild->Get_Name(); } + int Get_Current_Total_Calls( void ) { return CurrentChild->Get_Total_Calls(); } + float Get_Current_Total_Time( void ) { return CurrentChild->Get_Total_Time(); } + + void* Get_Current_UserPointer( void ) { return CurrentChild->GetUserPointer(); } + void Set_Current_UserPointer(void* ptr) {CurrentChild->SetUserPointer(ptr);} + // Access the current parent + const char * Get_Current_Parent_Name( void ) { return CurrentParent->Get_Name(); } + int Get_Current_Parent_Total_Calls( void ) { return CurrentParent->Get_Total_Calls(); } + float Get_Current_Parent_Total_Time( void ) { return CurrentParent->Get_Total_Time(); } + + + +protected: + + b3ProfileNode * CurrentParent; + b3ProfileNode * CurrentChild; + + + b3ProfileIterator( b3ProfileNode * start ); + friend class b3ProfileManager; +}; + + +///The Manager for the Profile system +class b3ProfileManager { +public: + static void Start_Profile( const char * name ); + static void Stop_Profile( void ); + + static void CleanupMemory(void) + { + Root.CleanupMemory(); + } + + static void Reset( void ); + static void Increment_Frame_Counter( void ); + static int Get_Frame_Count_Since_Reset( void ) { return FrameCounter; } + static float Get_Time_Since_Reset( void ); + + static b3ProfileIterator * Get_Iterator( void ) + { + + return new b3ProfileIterator( &Root ); + } + static void Release_Iterator( b3ProfileIterator * iterator ) { delete ( iterator); } + + static void dumpRecursive(b3ProfileIterator* profileIterator, int spacing); + static void dumpAll(); + + static void dumpRecursive(FILE* f, b3ProfileIterator* profileIterator, int spacing); + static void dumpAll(FILE* f); + +private: + static b3ProfileNode Root; + static b3ProfileNode * CurrentNode; + static int FrameCounter; + static unsigned long int ResetTime; +}; + + + + + +#else + + + +#endif //#ifndef B3_NO_PROFILE + + + +#endif //B3_QUICK_PROF_H + + From 330bf3ea094700b8d5fa3740767b357f81ad9a04 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Thu, 20 Jun 2013 11:33:00 -0700 Subject: [PATCH 10/11] start implementing ray-convex on GPU (work-in-progress) --- src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp | 22 +- .../Raycast/kernels/rayCastKernels.cl | 211 +++++++++++++++++- .../Raycast/kernels/rayCastKernels.h | 211 +++++++++++++++++- .../RigidBody/b3GpuRigidBodyPipeline.cpp | 2 +- 4 files changed, 421 insertions(+), 25 deletions(-) diff --git a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp index 92dc65ba6..9dff7fc89 100644 --- a/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ b/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp @@ -76,14 +76,14 @@ bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vect } bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronCL& poly, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData, float& hitFraction, b3Vector3& hitNormal) + const b3AlignedObjectArray& faces, float& hitFraction, b3Vector3& hitNormal) { float exitFraction = hitFraction; float enterFraction = -0.1f; b3Vector3 curHitNormal(0,0,0); for (int i=0;im_convexFaces[poly.m_faceOffset+i]; + const b3GpuFace& face = faces[poly.m_faceOffset+i]; float fromPlaneDist = b3Dot(rayFromLocal,face.m_plane)+face.m_plane.w; float toPlaneDist = b3Dot(rayToLocal,face.m_plane)+face.m_plane.w; if (fromPlaneDist<0.f) @@ -174,7 +174,7 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray& rays, b3A int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex; const b3ConvexPolyhedronCL& poly = narrowphaseData->m_convexPolyhedra[shapeIndex]; - if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData, hitFraction, hitNormal)) + if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal)) { hitBodyIndex = b; } @@ -218,14 +218,6 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray& rays, b3Align gpuHitResults.resize(hitResults.size()); gpuHitResults.copyFromHost(hitResults); - b3OpenCLArray gpuBodies(m_data->m_context,m_data->m_q); - gpuBodies.resize(numBodies); - gpuBodies.copyFromHostPointer(bodies,numBodies); - - b3OpenCLArray gpuCollidables(m_data->m_context,m_data->m_q); - gpuCollidables.resize(numCollidables); - gpuCollidables.copyFromHostPointer(collidables,numCollidables); - //run kernel { @@ -239,9 +231,11 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray& rays, b3Align launcher.setBuffer(gpuHitResults.getBufferCL()); launcher.setConst(numBodies); - launcher.setBuffer(gpuBodies.getBufferCL()); - launcher.setBuffer(gpuCollidables.getBufferCL()); - + launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); + launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); + launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); + launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); + launcher.launch1D(numRays); clFinish(m_data->m_q); } diff --git a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl index 9a627c141..4cf493d1f 100644 --- a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl +++ b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl @@ -47,6 +47,181 @@ typedef struct Collidable } Collidable; +typedef struct +{ + float4 m_localCenter; + float4 m_extents; + float4 mC; + float4 mE; + + float m_radius; + int m_faceOffset; + int m_numFaces; + int m_numVertices; + + int m_vertexOffset; + int m_uniqueEdgesOffset; + int m_numUniqueEdges; + int m_unused; + +} ConvexPolyhedronCL; + +typedef struct +{ + float4 m_plane; + int m_indexOffset; + int m_numIndices; +} b3GpuFace; + + + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline +Quaternion qtMul(Quaternion a, Quaternion b); + +__inline +Quaternion qtNormalize(Quaternion in); + +__inline +float4 qtRotate(Quaternion q, float4 vec); + +__inline +Quaternion qtInvert(Quaternion q); + + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = (float4)(a.xyz,0.f); + float4 b1 = (float4)(b.xyz,0.f); + return dot(a1, b1); +} + + +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtNormalize(Quaternion in) +{ + return fast_normalize(in); +// in /= length( in ); +// return in; +} +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( *orientation, *p ) + (*translation); +} + +void trInverse(float4 translationIn, Quaternion orientationIn, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtInvert(orientationIn); + *translationOut = qtRotate(*orientationOut, -translationIn); +} + +void trMul(float4 translationA, Quaternion orientationA, + float4 translationB, Quaternion orientationB, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtMul(orientationA,orientationB); + *translationOut = transform(&translationB,&translationA,&orientationA); +} + + + +bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset, + __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal) +{ + rayFromLocal.w = 0.f; + rayToLocal.w = 0.f; + + float exitFraction = *hitFraction; + float enterFraction = -0.1f; + float4 curHitNormal = (float4)(0,0,0,0); + for (int i=0;i= 0.f) + { + float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); + if (exitFraction>fraction) + { + exitFraction = fraction; + } + } + } else + { + if (toPlaneDist<0.f) + { + float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); + if (enterFraction <= fraction) + { + enterFraction = fraction; + curHitNormal = face.m_plane; + curHitNormal.w = 0.f; + } + } else + { + return false; + } + } + if (exitFraction <= enterFraction) + return false; + } + + if (enterFraction < 0.f) + return false; + + *hitFraction = enterFraction; + *hitNormal = curHitNormal; + return true; +} + + + + + bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction) { @@ -88,10 +263,11 @@ __kernel void rayCastKernel( __global b3RayHit* hitResults, const int numBodies, __global Body* bodies, - __global Collidable* collidables) + __global Collidable* collidables, + __global const b3GpuFace* faces, + __global const ConvexPolyhedronCL* convexShapes ) { - int i = get_global_id(0); if (i=0) { hitResults[i].m_hitFraction = hitFraction; - hitResults[i].m_hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction); - float4 hitNormal = (float4) (hitResults[i].m_hitPoint-bodies[hitBodyIndex].m_pos); + hitResults[i].m_hitPoint = hitPoint; hitResults[i].m_hitNormal = normalize(hitNormal); hitResults[i].m_hitResult0 = hitBodyIndex; } diff --git a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h index 3d1268c19..6f47345ef 100644 --- a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h +++ b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h @@ -49,6 +49,181 @@ static const char* rayCastKernelCL= \ "} Collidable;\n" "\n" "\n" +"typedef struct \n" +"{\n" +" float4 m_localCenter;\n" +" float4 m_extents;\n" +" float4 mC;\n" +" float4 mE;\n" +" \n" +" float m_radius;\n" +" int m_faceOffset;\n" +" int m_numFaces;\n" +" int m_numVertices;\n" +" \n" +" int m_vertexOffset;\n" +" int m_uniqueEdgesOffset;\n" +" int m_numUniqueEdges;\n" +" int m_unused;\n" +"\n" +"} ConvexPolyhedronCL;\n" +"\n" +"typedef struct\n" +"{\n" +" float4 m_plane;\n" +" int m_indexOffset;\n" +" int m_numIndices;\n" +"} b3GpuFace;\n" +"\n" +"\n" +"\n" +"///////////////////////////////////////\n" +"// Quaternion\n" +"///////////////////////////////////////\n" +"\n" +"typedef float4 Quaternion;\n" +"\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b);\n" +"\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in);\n" +"\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec);\n" +"\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q);\n" +"\n" +"\n" +"__inline\n" +"float dot3F4(float4 a, float4 b)\n" +"{\n" +" float4 a1 = (float4)(a.xyz,0.f);\n" +" float4 b1 = (float4)(b.xyz,0.f);\n" +" return dot(a1, b1);\n" +"}\n" +"\n" +"\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b)\n" +"{\n" +" Quaternion ans;\n" +" ans = cross( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in)\n" +"{\n" +" return fast_normalize(in);\n" +"// in /= length( in );\n" +"// return in;\n" +"}\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec)\n" +"{\n" +" Quaternion qInv = qtInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q)\n" +"{\n" +" return (Quaternion)(-q.xyz, q.w);\n" +"}\n" +"\n" +"__inline\n" +"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +"{\n" +" return qtRotate( qtInvert( q ), vec );\n" +"}\n" +"\n" +"__inline\n" +"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +"{\n" +" return qtRotate( *orientation, *p ) + (*translation);\n" +"}\n" +"\n" +"void trInverse(float4 translationIn, Quaternion orientationIn,\n" +" float4* translationOut, Quaternion* orientationOut)\n" +"{\n" +" *orientationOut = qtInvert(orientationIn);\n" +" *translationOut = qtRotate(*orientationOut, -translationIn);\n" +"}\n" +"\n" +"void trMul(float4 translationA, Quaternion orientationA,\n" +" float4 translationB, Quaternion orientationB,\n" +" float4* translationOut, Quaternion* orientationOut)\n" +"{\n" +" *orientationOut = qtMul(orientationA,orientationB);\n" +" *translationOut = transform(&translationB,&translationA,&orientationA);\n" +"}\n" +"\n" +"\n" +"\n" +"bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,\n" +" __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)\n" +"{\n" +" rayFromLocal.w = 0.f;\n" +" rayToLocal.w = 0.f;\n" +"\n" +" float exitFraction = *hitFraction;\n" +" float enterFraction = -0.1f;\n" +" float4 curHitNormal = (float4)(0,0,0,0);\n" +" for (int i=0;i= 0.f)\n" +" {\n" +" float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n" +" if (exitFraction>fraction)\n" +" {\n" +" exitFraction = fraction;\n" +" }\n" +" } \n" +" } else\n" +" {\n" +" if (toPlaneDist<0.f)\n" +" {\n" +" float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n" +" if (enterFraction <= fraction)\n" +" {\n" +" enterFraction = fraction;\n" +" curHitNormal = face.m_plane;\n" +" curHitNormal.w = 0.f;\n" +" }\n" +" } else\n" +" {\n" +" return false;\n" +" }\n" +" }\n" +" if (exitFraction <= enterFraction)\n" +" return false;\n" +" }\n" +"\n" +" if (enterFraction < 0.f)\n" +" return false;\n" +"\n" +" *hitFraction = enterFraction;\n" +" *hitNormal = curHitNormal;\n" +" return true;\n" +"}\n" +"\n" +"\n" +"\n" +"\n" +"\n" "\n" "bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction)\n" "{\n" @@ -90,10 +265,11 @@ static const char* rayCastKernelCL= \ " __global b3RayHit* hitResults, \n" " const int numBodies, \n" " __global Body* bodies,\n" -" __global Collidable* collidables)\n" +" __global Collidable* collidables,\n" +" __global const b3GpuFace* faces,\n" +" __global const ConvexPolyhedronCL* convexShapes )\n" "{\n" "\n" -"\n" " int i = get_global_id(0);\n" " if (i=0)\n" " {\n" " hitResults[i].m_hitFraction = hitFraction;\n" -" hitResults[i].m_hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n" -" float4 hitNormal = (float4) (hitResults[i].m_hitPoint-bodies[hitBodyIndex].m_pos);\n" +" hitResults[i].m_hitPoint = hitPoint;\n" " hitResults[i].m_hitNormal = normalize(hitNormal);\n" " hitResults[i].m_hitResult0 = hitBodyIndex;\n" " }\n" diff --git a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index e7bc308b3..a77de1fb4 100644 --- a/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -457,7 +457,7 @@ int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* po void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray& rays, b3AlignedObjectArray& hitResults) { - this->m_data->m_raycaster->castRaysHost(rays,hitResults, + this->m_data->m_raycaster->castRays(rays,hitResults, getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getInternalData() ); From 92f2f330dffb0d0f125d216a0be7bd76879a3de2 Mon Sep 17 00:00:00 2001 From: erwin coumans Date: Thu, 20 Jun 2013 11:50:19 -0700 Subject: [PATCH 11/11] fix OpenCL kernel: NVIDIA crashes in clBuildProgram and AMD reports an error 'irreducable flow detected" ?!? ray-convex works on GPU --- .../Raycast/kernels/rayCastKernels.cl | 25 +++++++++++-------- .../Raycast/kernels/rayCastKernels.h | 25 +++++++++++-------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl index 4cf493d1f..8bc21f51b 100644 --- a/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl +++ b/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl @@ -171,11 +171,12 @@ bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOff { rayFromLocal.w = 0.f; rayToLocal.w = 0.f; - + bool result = true; + float exitFraction = *hitFraction; float enterFraction = -0.1f; float4 curHitNormal = (float4)(0,0,0,0); - for (int i=0;i