diff --git a/demo/gpudemo/GpuDemo.cpp b/Demos3/GpuDemos/GpuDemo.cpp similarity index 98% rename from demo/gpudemo/GpuDemo.cpp rename to Demos3/GpuDemos/GpuDemo.cpp index ee8ca45c2..00a035130 100644 --- a/demo/gpudemo/GpuDemo.cpp +++ b/Demos3/GpuDemos/GpuDemo.cpp @@ -1,7 +1,7 @@ #include "GpuDemo.h" #include "GpuDemoInternalData.h" #include "Bullet3Common/b3Scalar.h" -#include "basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "OpenGLWindow/ShapeData.h" #include "OpenGLWindow/GLInstancingRenderer.h" diff --git a/demo/gpudemo/GpuDemo.h b/Demos3/GpuDemos/GpuDemo.h similarity index 100% rename from demo/gpudemo/GpuDemo.h rename to Demos3/GpuDemos/GpuDemo.h diff --git a/demo/gpudemo/GpuDemoInternalData.h b/Demos3/GpuDemos/GpuDemoInternalData.h similarity index 85% rename from demo/gpudemo/GpuDemoInternalData.h rename to Demos3/GpuDemos/GpuDemoInternalData.h index 38912db2c..b4460e3c6 100644 --- a/demo/gpudemo/GpuDemoInternalData.h +++ b/Demos3/GpuDemos/GpuDemoInternalData.h @@ -1,7 +1,7 @@ #ifndef GPU_DEMO_INTERNAL_DATA_H #define GPU_DEMO_INTERNAL_DATA_H -#include "basic_initialize/b3OpenCLInclude.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" struct GpuDemoInternalData { diff --git a/demo/gpudemo/ParticleDemo.cpp b/Demos3/GpuDemos/ParticleDemo.cpp similarity index 98% rename from demo/gpudemo/ParticleDemo.cpp rename to Demos3/GpuDemos/ParticleDemo.cpp index 780d67487..e226b8778 100644 --- a/demo/gpudemo/ParticleDemo.cpp +++ b/Demos3/GpuDemos/ParticleDemo.cpp @@ -2,7 +2,7 @@ #include "OpenGLWindow/GLInstancingRenderer.h" #include "OpenGLWindow/ShapeData.h" -#include "basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #define MSTRINGIFY(A) #A static char* particleKernelsString = @@ -12,10 +12,10 @@ static char* particleKernelsString = #include "Bullet3Common/b3Vector3.h" #include "OpenGLWindow/OpenGLInclude.h" #include "OpenGLWindow/GLInstanceRendererInternalData.h" -#include "parallel_primitives/host/b3LauncherCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" //#include "../../opencl/primitives/AdlPrimitives/Math/Math.h" //#include "../../opencl/broadphase_benchmark/b3GridBroadphaseCL.h" -#include "gpu_broadphase/host/b3GpuSapBroadphase.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h" #include "GpuDemoInternalData.h" diff --git a/demo/gpudemo/ParticleDemo.h b/Demos3/GpuDemos/ParticleDemo.h similarity index 100% rename from demo/gpudemo/ParticleDemo.h rename to Demos3/GpuDemos/ParticleDemo.h diff --git a/demo/gpudemo/ParticleKernels.cl b/Demos3/GpuDemos/ParticleKernels.cl similarity index 100% rename from demo/gpudemo/ParticleKernels.cl rename to Demos3/GpuDemos/ParticleKernels.cl diff --git a/demo/gpudemo/broadphase/PairBench.cpp b/Demos3/GpuDemos/broadphase/PairBench.cpp similarity index 98% rename from demo/gpudemo/broadphase/PairBench.cpp rename to Demos3/GpuDemos/broadphase/PairBench.cpp index 61b4ed4ce..ed1d32ce4 100644 --- a/demo/gpudemo/broadphase/PairBench.cpp +++ b/Demos3/GpuDemos/broadphase/PairBench.cpp @@ -4,12 +4,12 @@ #include "OpenGLWindow/GLInstancingRenderer.h" #include "Bullet3Common/b3Quaternion.h" #include "OpenGLWindow/b3gWindowInterface.h" -#include "gpu_broadphase/host/b3GpuSapBroadphase.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h" #include "../GpuDemoInternalData.h" -#include "basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "OpenGLWindow/OpenGLInclude.h" #include "OpenGLWindow/GLInstanceRendererInternalData.h" -#include "parallel_primitives/host/b3LauncherCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" static b3KeyboardCallback oldCallback = 0; extern bool gReset; diff --git a/demo/gpudemo/broadphase/PairBench.h b/Demos3/GpuDemos/broadphase/PairBench.h similarity index 100% rename from demo/gpudemo/broadphase/PairBench.h rename to Demos3/GpuDemos/broadphase/PairBench.h diff --git a/demo/gpudemo/gwenUserInterface.cpp b/Demos3/GpuDemos/gwenUserInterface.cpp similarity index 100% rename from demo/gpudemo/gwenUserInterface.cpp rename to Demos3/GpuDemos/gwenUserInterface.cpp diff --git a/demo/gpudemo/gwenUserInterface.h b/Demos3/GpuDemos/gwenUserInterface.h similarity index 100% rename from demo/gpudemo/gwenUserInterface.h rename to Demos3/GpuDemos/gwenUserInterface.h diff --git a/demo/gpudemo/main_opengl3core.cpp b/Demos3/GpuDemos/main_opengl3core.cpp similarity index 100% rename from demo/gpudemo/main_opengl3core.cpp rename to Demos3/GpuDemos/main_opengl3core.cpp diff --git a/demo/gpudemo/premake4.lua b/Demos3/GpuDemos/premake4.lua similarity index 59% rename from demo/gpudemo/premake4.lua rename to Demos3/GpuDemos/premake4.lua index 00ac422b4..e7da99be2 100644 --- a/demo/gpudemo/premake4.lua +++ b/Demos3/GpuDemos/premake4.lua @@ -4,7 +4,7 @@ function createProject(vendor) if (hasCL) then - project ("Bullet3_OpenCL_gpu_demo_" .. vendor) + project ("App_Bullet3_OpenCL_Demos_" .. vendor) initOpenCL(vendor) @@ -20,8 +20,7 @@ function createProject(vendor) includedirs { "..", "../../src", - "../../btgui", - "../../opencl" + "../../btgui" } links { @@ -30,21 +29,23 @@ function createProject(vendor) "Bullet3Geometry", "Bullet3Collision", "Bullet3Dynamics", - "Bullet2FileLoader" + "Bullet2FileLoader", + "Bullet3OpenCL_" .. vendor + } files { "**.cpp", "**.h", - "../ObjLoader/string_extra.cpp", - "../ObjLoader/string_extra.h", - "../ObjLoader/objLoader.cpp", - "../ObjLoader/objLoader.h", - "../ObjLoader/obj_parser.cpp", - "../ObjLoader/obj_parser.h", - "../ObjLoader/list.cpp", - "../ObjLoader/list.h", + "../Wavefront/string_extra.cpp", + "../Wavefront/string_extra.h", + "../Wavefront/objLoader.cpp", + "../Wavefront/objLoader.h", + "../Wavefront/obj_parser.cpp", + "../Wavefront/obj_parser.h", + "../Wavefront/list.cpp", + "../Wavefront/list.h", "../../btgui/OpenGLWindow/GLInstancingRenderer.cpp", @@ -60,21 +61,6 @@ function createProject(vendor) "../../btgui/OpenGLTrueTypeFont/opengl_fontstashcallbacks.cpp", "../../btgui/OpenGLTrueTypeFont/opengl_fontstashcallbacks.h", "../../btgui/FontFiles/OpenSans.cpp", - "../../opencl/basic_initialize/b3OpenCLUtils.cpp", - "../../opencl/basic_initialize/b3OpenCLUtils.h", - "../../opencl/gpu_broadphase/host/b3GpuSapBroadphase.cpp", - "../../opencl/gpu_narrowphase/host/**.cpp", - "../../opencl/gpu_narrowphase/host/**.h", - "../../opencl/parallel_primitives/host/b3BoundSearchCL.cpp", - "../../opencl/parallel_primitives/host/b3BoundSearchCL.h", - "../../opencl/parallel_primitives/host/b3FillCL.cpp", - "../../opencl/parallel_primitives/host/b3FillCL.h", - "../../opencl/parallel_primitives/host/b3PrefixScanCL.cpp", - "../../opencl/parallel_primitives/host/b3PrefixScanCL.h", - "../../opencl/parallel_primitives/host/b3RadixSort32CL.cpp", - "../../opencl/parallel_primitives/host/b3RadixSort32CL.h", - "../../opencl/gpu_rigidbody/host/**.cpp", - "../../opencl/gpu_rigidbody/host/**.h", } diff --git a/demo/gpudemo/rigidbody/Bullet2FileDemo.cpp b/Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp similarity index 100% rename from demo/gpudemo/rigidbody/Bullet2FileDemo.cpp rename to Demos3/GpuDemos/rigidbody/Bullet2FileDemo.cpp diff --git a/demo/gpudemo/rigidbody/Bullet2FileDemo.h b/Demos3/GpuDemos/rigidbody/Bullet2FileDemo.h similarity index 100% rename from demo/gpudemo/rigidbody/Bullet2FileDemo.h rename to Demos3/GpuDemos/rigidbody/Bullet2FileDemo.h diff --git a/demo/gpudemo/rigidbody/BulletDataExtractor.cpp b/Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp similarity index 99% rename from demo/gpudemo/rigidbody/BulletDataExtractor.cpp rename to Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp index 0106a2a03..4dfbd362d 100644 --- a/demo/gpudemo/rigidbody/BulletDataExtractor.cpp +++ b/Demos3/GpuDemos/rigidbody/BulletDataExtractor.cpp @@ -22,11 +22,11 @@ extern bool enableExperimentalCpuConcaveCollision; //#include "LinearMath/b3Quickprof.h" #include "Bullet3Common/b3Quaternion.h" #include "Bullet3Common/b3Matrix3x3.h" -#include "gpu_narrowphase/host/b3ConvexUtility.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3ConvexUtility.h" #include "OpenGLWindow/ShapeData.h" -#include "../../ObjLoader/objLoader.h" -#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h" -#include "gpu_rigidbody/host/b3GpuNarrowPhase.h" +#include "../../Wavefront/objLoader.h" +#include "Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h" +#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h" ///work-in-progress ///This ReadBulletSample is kept as simple as possible without dependencies to the Bullet SDK. diff --git a/demo/gpudemo/rigidbody/BulletDataExtractor.h b/Demos3/GpuDemos/rigidbody/BulletDataExtractor.h similarity index 100% rename from demo/gpudemo/rigidbody/BulletDataExtractor.h rename to Demos3/GpuDemos/rigidbody/BulletDataExtractor.h diff --git a/demo/gpudemo/rigidbody/ConcaveScene.cpp b/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp similarity index 98% rename from demo/gpudemo/rigidbody/ConcaveScene.cpp rename to Demos3/GpuDemos/rigidbody/ConcaveScene.cpp index 97f6440f2..40b8516ba 100644 --- a/demo/gpudemo/rigidbody/ConcaveScene.cpp +++ b/Demos3/GpuDemos/rigidbody/ConcaveScene.cpp @@ -6,17 +6,17 @@ #include "OpenGLWindow/GLInstancingRenderer.h" #include "Bullet3Common/b3Quaternion.h" #include "OpenGLWindow/b3gWindowInterface.h" -#include "gpu_broadphase/host/b3GpuSapBroadphase.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h" #include "../GpuDemoInternalData.h" -#include "basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "OpenGLWindow/OpenGLInclude.h" #include "OpenGLWindow/GLInstanceRendererInternalData.h" -#include "parallel_primitives/host/b3LauncherCL.h" -#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h" -#include "gpu_rigidbody/host/b3GpuNarrowPhase.h" -#include "gpu_rigidbody/host/b3Config.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h" +#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h" +#include "Bullet3OpenCL/RigidBody/b3Config.h" #include "GpuRigidBodyDemoInternalData.h" -#include"../../ObjLoader/objLoader.h" +#include"../../Wavefront/objLoader.h" #include "Bullet3Common/b3Transform.h" #include "OpenGLWindow/GLInstanceGraphicsShape.h" diff --git a/demo/gpudemo/rigidbody/ConcaveScene.h b/Demos3/GpuDemos/rigidbody/ConcaveScene.h similarity index 100% rename from demo/gpudemo/rigidbody/ConcaveScene.h rename to Demos3/GpuDemos/rigidbody/ConcaveScene.h diff --git a/demo/gpudemo/rigidbody/GpuCompoundScene.cpp b/Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp similarity index 95% rename from demo/gpudemo/rigidbody/GpuCompoundScene.cpp rename to Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp index 7bf815650..0a2fa4543 100644 --- a/demo/gpudemo/rigidbody/GpuCompoundScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuCompoundScene.cpp @@ -6,15 +6,15 @@ #include "OpenGLWindow/GLInstancingRenderer.h" #include "Bullet3Common/b3Quaternion.h" #include "OpenGLWindow/b3gWindowInterface.h" -#include "gpu_broadphase/host/b3GpuSapBroadphase.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h" #include "../GpuDemoInternalData.h" -#include "basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "OpenGLWindow/OpenGLInclude.h" #include "OpenGLWindow/GLInstanceRendererInternalData.h" -#include "parallel_primitives/host/b3LauncherCL.h" -#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h" -#include "gpu_rigidbody/host/b3GpuNarrowPhase.h" -#include "gpu_rigidbody/host/b3Config.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h" +#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h" +#include "Bullet3OpenCL/RigidBody/b3Config.h" #include "GpuRigidBodyDemoInternalData.h" #include "Bullet3Common/b3Transform.h" diff --git a/demo/gpudemo/rigidbody/GpuCompoundScene.h b/Demos3/GpuDemos/rigidbody/GpuCompoundScene.h similarity index 100% rename from demo/gpudemo/rigidbody/GpuCompoundScene.h rename to Demos3/GpuDemos/rigidbody/GpuCompoundScene.h diff --git a/demo/gpudemo/rigidbody/GpuConvexScene.cpp b/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp similarity index 93% rename from demo/gpudemo/rigidbody/GpuConvexScene.cpp rename to Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp index f298d583a..0f06ea3e1 100644 --- a/demo/gpudemo/rigidbody/GpuConvexScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuConvexScene.cpp @@ -6,15 +6,15 @@ #include "OpenGLWindow/GLInstancingRenderer.h" #include "Bullet3Common/b3Quaternion.h" #include "OpenGLWindow/b3gWindowInterface.h" -#include "gpu_broadphase/host/b3GpuSapBroadphase.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h" #include "../GpuDemoInternalData.h" -#include "basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "OpenGLWindow/OpenGLInclude.h" #include "OpenGLWindow/GLInstanceRendererInternalData.h" -#include "parallel_primitives/host/b3LauncherCL.h" -#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h" -#include "gpu_rigidbody/host/b3GpuNarrowPhase.h" -#include "gpu_rigidbody/host/b3Config.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h" +#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h" +#include "Bullet3OpenCL/RigidBody/b3Config.h" #include "GpuRigidBodyDemoInternalData.h" #include "../gwenUserInterface.h" #include "Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h" diff --git a/demo/gpudemo/rigidbody/GpuConvexScene.h b/Demos3/GpuDemos/rigidbody/GpuConvexScene.h similarity index 100% rename from demo/gpudemo/rigidbody/GpuConvexScene.h rename to Demos3/GpuDemos/rigidbody/GpuConvexScene.h diff --git a/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp similarity index 94% rename from demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp rename to Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp index 2a6ea647b..1e70f4c9f 100644 --- a/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.cpp @@ -4,15 +4,15 @@ #include "OpenGLWindow/GLInstancingRenderer.h" #include "Bullet3Common/b3Quaternion.h" #include "OpenGLWindow/b3gWindowInterface.h" -#include "gpu_broadphase/host/b3GpuSapBroadphase.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h" #include "../GpuDemoInternalData.h" -#include "basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "OpenGLWindow/OpenGLInclude.h" #include "OpenGLWindow/GLInstanceRendererInternalData.h" -#include "parallel_primitives/host/b3LauncherCL.h" -#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h" -#include "gpu_rigidbody/host/b3GpuNarrowPhase.h" -#include "gpu_rigidbody/host/b3Config.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h" +#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h" +#include "Bullet3OpenCL/RigidBody/b3Config.h" #include "GpuRigidBodyDemoInternalData.h" #include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h" diff --git a/demo/gpudemo/rigidbody/GpuRigidBodyDemo.h b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.h similarity index 100% rename from demo/gpudemo/rigidbody/GpuRigidBodyDemo.h rename to Demos3/GpuDemos/rigidbody/GpuRigidBodyDemo.h diff --git a/demo/gpudemo/rigidbody/GpuRigidBodyDemoInternalData.h b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemoInternalData.h similarity index 83% rename from demo/gpudemo/rigidbody/GpuRigidBodyDemoInternalData.h rename to Demos3/GpuDemos/rigidbody/GpuRigidBodyDemoInternalData.h index 69c0333ec..189c2a5cf 100644 --- a/demo/gpudemo/rigidbody/GpuRigidBodyDemoInternalData.h +++ b/Demos3/GpuDemos/rigidbody/GpuRigidBodyDemoInternalData.h @@ -1,8 +1,8 @@ #ifndef GPU_RIGIDBODY_INTERNAL_DATA_H #define GPU_RIGIDBODY_INTERNAL_DATA_H -#include "basic_initialize/b3OpenCLUtils.h" -#include "parallel_primitives/host/b3OpenCLArray.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" #include "Bullet3Common/b3Vector3.h" struct GpuRigidBodyDemoInternalData diff --git a/demo/gpudemo/rigidbody/GpuSphereScene.cpp b/Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp similarity index 94% rename from demo/gpudemo/rigidbody/GpuSphereScene.cpp rename to Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp index ea0b88ebb..e6d9cf616 100644 --- a/demo/gpudemo/rigidbody/GpuSphereScene.cpp +++ b/Demos3/GpuDemos/rigidbody/GpuSphereScene.cpp @@ -2,19 +2,18 @@ #include "GpuRigidBodyDemo.h" #include "Bullet3Common/b3Quickprof.h" #include "OpenGLWindow/ShapeData.h" - #include "OpenGLWindow/GLInstancingRenderer.h" #include "Bullet3Common/b3Quaternion.h" #include "OpenGLWindow/b3gWindowInterface.h" -#include "gpu_broadphase/host/b3GpuSapBroadphase.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h" #include "../GpuDemoInternalData.h" -#include "basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "OpenGLWindow/OpenGLInclude.h" #include "OpenGLWindow/GLInstanceRendererInternalData.h" -#include "parallel_primitives/host/b3LauncherCL.h" -#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h" -#include "gpu_rigidbody/host/b3GpuNarrowPhase.h" -#include "gpu_rigidbody/host/b3Config.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h" +#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h" +#include "Bullet3OpenCL/RigidBody/b3Config.h" #include "GpuRigidBodyDemoInternalData.h" #include "../gwenUserInterface.h" diff --git a/demo/gpudemo/rigidbody/GpuSphereScene.h b/Demos3/GpuDemos/rigidbody/GpuSphereScene.h similarity index 100% rename from demo/gpudemo/rigidbody/GpuSphereScene.h rename to Demos3/GpuDemos/rigidbody/GpuSphereScene.h diff --git a/demo/gpu_initialize/main.cpp b/Demos3/GpuGuiInitialize/main.cpp similarity index 100% rename from demo/gpu_initialize/main.cpp rename to Demos3/GpuGuiInitialize/main.cpp diff --git a/demo/gpu_initialize/premake4.lua b/Demos3/GpuGuiInitialize/premake4.lua similarity index 100% rename from demo/gpu_initialize/premake4.lua rename to Demos3/GpuGuiInitialize/premake4.lua diff --git a/demo/ObjLoader/list.cpp b/Demos3/Wavefront/list.cpp similarity index 100% rename from demo/ObjLoader/list.cpp rename to Demos3/Wavefront/list.cpp diff --git a/demo/ObjLoader/list.h b/Demos3/Wavefront/list.h similarity index 100% rename from demo/ObjLoader/list.h rename to Demos3/Wavefront/list.h diff --git a/demo/ObjLoader/objLoader.cpp b/Demos3/Wavefront/objLoader.cpp similarity index 100% rename from demo/ObjLoader/objLoader.cpp rename to Demos3/Wavefront/objLoader.cpp diff --git a/demo/ObjLoader/objLoader.h b/Demos3/Wavefront/objLoader.h similarity index 100% rename from demo/ObjLoader/objLoader.h rename to Demos3/Wavefront/objLoader.h diff --git a/demo/ObjLoader/objTester.cpp b/Demos3/Wavefront/objTester.cpp similarity index 100% rename from demo/ObjLoader/objTester.cpp rename to Demos3/Wavefront/objTester.cpp diff --git a/demo/ObjLoader/obj_parser.cpp b/Demos3/Wavefront/obj_parser.cpp similarity index 100% rename from demo/ObjLoader/obj_parser.cpp rename to Demos3/Wavefront/obj_parser.cpp diff --git a/demo/ObjLoader/obj_parser.h b/Demos3/Wavefront/obj_parser.h similarity index 100% rename from demo/ObjLoader/obj_parser.h rename to Demos3/Wavefront/obj_parser.h diff --git a/demo/ObjLoader/premake4.lua b/Demos3/Wavefront/premake4.lua similarity index 100% rename from demo/ObjLoader/premake4.lua rename to Demos3/Wavefront/premake4.lua diff --git a/demo/ObjLoader/string_extra.cpp b/Demos3/Wavefront/string_extra.cpp similarity index 100% rename from demo/ObjLoader/string_extra.cpp rename to Demos3/Wavefront/string_extra.cpp diff --git a/demo/ObjLoader/string_extra.h b/Demos3/Wavefront/string_extra.h similarity index 100% rename from demo/ObjLoader/string_extra.h rename to Demos3/Wavefront/string_extra.h diff --git a/demo/donttouch/Bullet2GpuDemo.cpp b/Demos3/donttouch/Bullet2GpuDemo.cpp similarity index 100% rename from demo/donttouch/Bullet2GpuDemo.cpp rename to Demos3/donttouch/Bullet2GpuDemo.cpp diff --git a/demo/donttouch/Bullet2GpuDemo.h b/Demos3/donttouch/Bullet2GpuDemo.h similarity index 100% rename from demo/donttouch/Bullet2GpuDemo.h rename to Demos3/donttouch/Bullet2GpuDemo.h diff --git a/demo/donttouch/GpuDemo.cpp b/Demos3/donttouch/GpuDemo.cpp similarity index 100% rename from demo/donttouch/GpuDemo.cpp rename to Demos3/donttouch/GpuDemo.cpp diff --git a/demo/donttouch/GpuDemo.h b/Demos3/donttouch/GpuDemo.h similarity index 100% rename from demo/donttouch/GpuDemo.h rename to Demos3/donttouch/GpuDemo.h diff --git a/demo/donttouch/OpenGL3CoreRenderer.cpp b/Demos3/donttouch/OpenGL3CoreRenderer.cpp similarity index 100% rename from demo/donttouch/OpenGL3CoreRenderer.cpp rename to Demos3/donttouch/OpenGL3CoreRenderer.cpp diff --git a/demo/donttouch/OpenGL3CoreRenderer.h b/Demos3/donttouch/OpenGL3CoreRenderer.h similarity index 100% rename from demo/donttouch/OpenGL3CoreRenderer.h rename to Demos3/donttouch/OpenGL3CoreRenderer.h diff --git a/demo/donttouch/b3CpuDynamicsWorld.cpp b/Demos3/donttouch/b3CpuDynamicsWorld.cpp similarity index 100% rename from demo/donttouch/b3CpuDynamicsWorld.cpp rename to Demos3/donttouch/b3CpuDynamicsWorld.cpp diff --git a/demo/donttouch/b3CpuDynamicsWorld.h b/Demos3/donttouch/b3CpuDynamicsWorld.h similarity index 100% rename from demo/donttouch/b3CpuDynamicsWorld.h rename to Demos3/donttouch/b3CpuDynamicsWorld.h diff --git a/demo/donttouch/b3GpuDynamicsWorld.cpp b/Demos3/donttouch/b3GpuDynamicsWorld.cpp similarity index 100% rename from demo/donttouch/b3GpuDynamicsWorld.cpp rename to Demos3/donttouch/b3GpuDynamicsWorld.cpp diff --git a/demo/donttouch/b3GpuDynamicsWorld.h b/Demos3/donttouch/b3GpuDynamicsWorld.h similarity index 100% rename from demo/donttouch/b3GpuDynamicsWorld.h rename to Demos3/donttouch/b3GpuDynamicsWorld.h diff --git a/btgui/GwenOpenGLTest/premake4.lua b/btgui/GwenOpenGLTest/premake4.lua index 31e555f7f..54fabacc9 100644 --- a/btgui/GwenOpenGLTest/premake4.lua +++ b/btgui/GwenOpenGLTest/premake4.lua @@ -1,5 +1,5 @@ - project "Gwen_OpenGLTest" + project "Test_Gwen_OpenGL" kind "ConsoleApp" flags {"Unicode"} diff --git a/build/premake4.lua b/build/premake4.lua index a5930fe80..c9a4901ab 100644 --- a/build/premake4.lua +++ b/build/premake4.lua @@ -91,30 +91,35 @@ if not _OPTIONS["ios"] then - include "../demo/gpudemo" - include "../btgui/MidiTest" +-- include "../demo/gpudemo" +-- include "../btgui/MidiTest" -- include "../opencl/vector_add_simplified" -- include "../opencl/vector_add" - include "../opencl/basic_initialize" - include "../demo/gpu_initialize" - include "../opencl/parallel_primitives/host" - include "../opencl/parallel_primitives/test" - include "../opencl/parallel_primitives/benchmark" - include "../opencl/lds_bank_conflict" --- include "../opencl/reduce" --- include "../opencl/gpu_broadphase/test" --- include "../opencl/gpu_narrowphase/test" include "../btgui/Gwen" include "../btgui/GwenOpenGLTest" - include "../btgui/OpenGLTrueTypeFont" --- include "../btgui/OpenGLWindow" --- include "../demo/ObjLoader" + + include "../test/OpenCL/BasicInitialize" + include "../test/OpenCL/BroadphaseCollision" + include "../test/OpenCL/NarrowphaseCollision" + include "../test/OpenCL/ParallelPrimitives" + include "../src/Bullet3Dynamics" include "../src/Bullet3Common" include "../src/Bullet3Geometry" include "../src/Bullet3Collision" include "../src/Bullet3Serialize/Bullet2FileLoader" + + include "../src/Bullet3OpenCL" + include "../Demos3/GpuDemos" + +-- include "../demo/gpu_initialize" +-- include "../opencl/lds_bank_conflict" +-- include "../opencl/reduce" +-- include "../btgui/OpenGLTrueTypeFont" +-- include "../btgui/OpenGLWindow" +-- include "../demo/ObjLoader" + - include "../test/b3DynamicBvhBroadphase" +-- include "../test/b3DynamicBvhBroadphase" end diff --git a/opencl/basic_initialize/premake4.lua b/opencl/basic_initialize/premake4.lua deleted file mode 100644 index 44af06b5d..000000000 --- a/opencl/basic_initialize/premake4.lua +++ /dev/null @@ -1,28 +0,0 @@ -function createProject(vendor) - - hasCL = findOpenCL(vendor) - - if (hasCL) then - - project ("OpenCL_intialize_" .. vendor) - - initOpenCL(vendor) - - language "C++" - - kind "ConsoleApp" - targetdir "../../bin" - - files { - "main.cpp", - "b3OpenCLUtils.cpp", - "b3OpenCLUtils.h" - } - - end -end - -createProject("Apple") -createProject("AMD") -createProject("Intel") -createProject("NVIDIA") diff --git a/opencl/gpu_broadphase/test/main.cpp b/opencl/gpu_broadphase/test/main.cpp deleted file mode 100644 index 81b4e2b11..000000000 --- a/opencl/gpu_broadphase/test/main.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - - -#include -#include "../basic_initialize/b3OpenCLUtils.h" -#include "../host/b3GpuSapBroadphase.h" -#include "Bullet3Common/b3Vector3.h" -#include "parallel_primitives/host/b3FillCL.h" -#include "parallel_primitives/host/b3BoundSearchCL.h" -#include "parallel_primitives/host/b3RadixSort32CL.h" -#include "parallel_primitives/host/b3PrefixScanCL.h" -#include "Bullet3Common/b3CommandLineArgs.h" -#include "Bullet3Common/b3MinMax.h" - -int g_nPassed = 0; -int g_nFailed = 0; -bool g_testFailed = 0; - -#define TEST_INIT g_testFailed = 0; -#define TEST_ASSERT(x) if( !(x) ){g_testFailed = 1;} -#define TEST_REPORT(testName) printf("[%s] %s\n",(g_testFailed)?"X":"O", testName); if(g_testFailed) g_nFailed++; else g_nPassed++; -#define NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment)) - -cl_context g_context=0; -cl_device_id g_device=0; -cl_command_queue g_queue =0; -const char* g_deviceName = 0; - -void initCL(int preferredDeviceIndex, int preferredPlatformIndex) -{ - void* glCtx=0; - void* glDC = 0; - int ciErrNum = 0; - //bound search and radix sort only work on GPU right now (assume 32 or 64 width workgroup without barriers) - - cl_device_type deviceType = CL_DEVICE_TYPE_ALL; - - g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - int numDev = b3OpenCLUtils::getNumDevices(g_context); - if (numDev>0) - { - b3OpenCLDeviceInfo info; - g_device= b3OpenCLUtils::getDevice(g_context,0); - g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - b3OpenCLUtils::printDeviceInfo(g_device); - b3OpenCLUtils::getDeviceInfo(g_device,&info); - g_deviceName = info.m_deviceName; - } -} - -void exitCL() -{ - clReleaseCommandQueue(g_queue); - clReleaseContext(g_context); -} - - -inline void broadphaseTest() -{ - TEST_INIT; - - b3GpuSapBroadphase* sap = new b3GpuSapBroadphase(g_context,g_device,g_queue); - int group=1; - int mask=1; - b3Vector3 aabbMin(0,0,0); - b3Vector3 aabbMax(1,1,1); - int usrPtr = 1; - sap->createProxy(aabbMin,aabbMax,usrPtr,group,mask); - - aabbMin.setValue(1,1,1); - aabbMax.setValue(2,2,2); - - usrPtr = 2; - sap->createProxy(aabbMin,aabbMax,usrPtr,group,mask); - sap->writeAabbsToGpu(); - - sap->calculateOverlappingPairs(); - - int numOverlap = sap->getNumOverlap(); - cl_mem buf = sap->getOverlappingPairBuffer(); - - TEST_ASSERT(numOverlap==1); - - delete sap; - - TEST_REPORT( "broadphaseTest" ); -} - -int main(int argc, char** argv) -{ - int preferredDeviceIndex = -1; - int preferredPlatformIndex = -1; - - b3CommandLineArgs args(argc, argv); - args.GetCmdLineArgument("deviceId", preferredDeviceIndex); - args.GetCmdLineArgument("platformId", preferredPlatformIndex); - - initCL(preferredDeviceIndex,preferredPlatformIndex); - - - broadphaseTest(); - - printf("%d tests passed\n",g_nPassed, g_nFailed); - if (g_nFailed) - { - printf("%d tests failed\n",g_nFailed); - } - printf("End, press \n"); - - getchar(); - - exitCL(); - -} - diff --git a/opencl/gpu_broadphase/test/premake4.lua b/opencl/gpu_broadphase/test/premake4.lua deleted file mode 100644 index af93fab90..000000000 --- a/opencl/gpu_broadphase/test/premake4.lua +++ /dev/null @@ -1,46 +0,0 @@ -function createProject(vendor) - hasCL = findOpenCL(vendor) - - if (hasCL) then - - project ("OpenCL_broadphase_test_" .. vendor) - - initOpenCL(vendor) - - language "C++" - - kind "ConsoleApp" - targetdir "../../../bin" - includedirs {"..","../..","../../../src"} - - - files { - "main.cpp", - "../../basic_initialize/b3OpenCLInclude.h", - "../../basic_initialize/b3OpenCLUtils.cpp", - "../../basic_initialize/b3OpenCLUtils.h", - "../host/b3GpuSapBroadphase.cpp", - "../host/b3GpuSapBroadphase.h", - "../../parallel_primitives/host/btFillCL.cpp", - "../../parallel_primitives/host/btFillCL.h", - "../../parallel_primitives/host/btBoundSearchCL.cpp", - "../../parallel_primitives/host/btBoundSearchCL.h", - "../../parallel_primitives/host/btPrefixScanCL.cpp", - "../../parallel_primitives/host/btPrefixScanCL.h", - "../../parallel_primitives/host/btRadixSort32CL.cpp", - "../../parallel_primitives/host/btRadixSort32CL.h", - "../../../src/Bullet3Common/b3AlignedAllocator.cpp", - "../../../src/Bullet3Common/b3AlignedAllocator.h", - "../../../src/Bullet3Common/b3AlignedObjectArray.h", - "../../../src/Bullet3Common/b3Quickprof.cpp", - "../../../src/Bullet3Common/b3Quickprof.h", - - } - - end -end - -createProject("AMD") -createProject("Intel") -createProject("NVIDIA") -createProject("Apple") \ No newline at end of file diff --git a/opencl/gpu_narrowphase/test/main.cpp b/opencl/gpu_narrowphase/test/main.cpp deleted file mode 100644 index 8cdd23e5c..000000000 --- a/opencl/gpu_narrowphase/test/main.cpp +++ /dev/null @@ -1,111 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - - -#include -#include "../basic_initialize/b3OpenCLUtils.h" -#include "../host/b3ConvexHullContact.h" - -#include "Bullet3Common/b3Vector3.h" -#include "parallel_primitives/host/b3FillCL.h" -#include "parallel_primitives/host/b3BoundSearchCL.h" -#include "parallel_primitives/host/b3RadixSort32CL.h" -#include "parallel_primitives/host/b3PrefixScanCL.h" -#include "Bullet3Common/b3CommandLineArgs.h" -#include "../host/b3ConvexHullContact.h" - -#include "Bullet3Common/b3MinMax.h" -int g_nPassed = 0; -int g_nFailed = 0; -bool g_testFailed = 0; - -#define TEST_INIT g_testFailed = 0; -#define TEST_ASSERT(x) if( !(x) ){g_testFailed = 1;} -#define TEST_REPORT(testName) printf("[%s] %s\n",(g_testFailed)?"X":"O", testName); if(g_testFailed) g_nFailed++; else g_nPassed++; -#define NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment)) - -cl_context g_context=0; -cl_device_id g_device=0; -cl_command_queue g_queue =0; -const char* g_deviceName = 0; - -void initCL(int preferredDeviceIndex, int preferredPlatformIndex) -{ - void* glCtx=0; - void* glDC = 0; - int ciErrNum = 0; - //bound search and radix sort only work on GPU right now (assume 32 or 64 width workgroup without barriers) - - cl_device_type deviceType = CL_DEVICE_TYPE_ALL; - - g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - int numDev = b3OpenCLUtils::getNumDevices(g_context); - if (numDev>0) - { - b3OpenCLDeviceInfo info; - g_device= b3OpenCLUtils::getDevice(g_context,0); - g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - b3OpenCLUtils::printDeviceInfo(g_device); - b3OpenCLUtils::getDeviceInfo(g_device,&info); - g_deviceName = info.m_deviceName; - } -} - -void exitCL() -{ - clReleaseCommandQueue(g_queue); - clReleaseContext(g_context); -} - - -inline void gpuConvexHullContactTest() -{ - TEST_INIT; - - TEST_ASSERT(1); - - GpuSatCollision* sat = new GpuSatCollision(g_context,g_device,g_queue); - - delete sat; - - TEST_REPORT( "gpuConvexHullContactTest" ); -} - -int main(int argc, char** argv) -{ - int preferredDeviceIndex = -1; int preferredPlatformIndex = -1; - - b3CommandLineArgs args(argc, argv); - args.GetCmdLineArgument("deviceId", preferredDeviceIndex); - args.GetCmdLineArgument("platformId", preferredPlatformIndex); - - initCL(preferredDeviceIndex,preferredPlatformIndex); - - gpuConvexHullContactTest(); - - printf("%d tests passed\n",g_nPassed, g_nFailed); - if (g_nFailed) - { - printf("%d tests failed\n",g_nFailed); - } - printf("End, press \n"); - - getchar(); - - exitCL(); - -} - diff --git a/opencl/gpu_narrowphase/test/premake4.lua b/opencl/gpu_narrowphase/test/premake4.lua deleted file mode 100644 index 45d0064de..000000000 --- a/opencl/gpu_narrowphase/test/premake4.lua +++ /dev/null @@ -1,49 +0,0 @@ -function createProject(vendor) - hasCL = findOpenCL(vendor) - - if (hasCL) then - - project ("OpenCL_sat_test_" .. vendor) - - initOpenCL(vendor) - - language "C++" - - kind "ConsoleApp" - targetdir "../../../bin" - includedirs {"..","../..","../../../src"} - - - files { - "main.cpp", - "../../basic_initialize/b3OpenCLInclude.h", - "../../basic_initialize/b3OpenCLUtils.cpp", - "../../basic_initialize/b3OpenCLUtils.h", - "../host/**.cpp", - "../host/**.h", - "../../parallel_primitives/host/btFillCL.cpp", - "../../parallel_primitives/host/btFillCL.h", - "../../parallel_primitives/host/btBoundSearchCL.cpp", - "../../parallel_primitives/host/btBoundSearchCL.h", - "../../parallel_primitives/host/btPrefixScanCL.cpp", - "../../parallel_primitives/host/btPrefixScanCL.h", - "../../parallel_primitives/host/btRadixSort32CL.cpp", - "../../parallel_primitives/host/btRadixSort32CL.h", - "../../../src/Bullet3Common/b3AlignedAllocator.cpp", - "../../../src/Bullet3Common/b3AlignedAllocator.h", - "../../../src/Bullet3Common/b3AlignedObjectArray.h", - "../../../src/Bullet3Common/b3Quickprof.cpp", - "../../../src/Bullet3Common/b3Quickprof.h", - "../../../src/Bullet3Geometry/**.cpp", - "../../../src/Bullet3Geometry/**.h", - - - } - - end -end - -createProject("AMD") -createProject("Intel") -createProject("NVIDIA") -createProject("Apple") \ No newline at end of file diff --git a/opencl/lds_bank_conflict/lds_kernels.cl b/opencl/lds_bank_conflict/lds_kernels.cl deleted file mode 100644 index 6e3ad78f3..000000000 --- a/opencl/lds_bank_conflict/lds_kernels.cl +++ /dev/null @@ -1,171 +0,0 @@ - -#define TILE_DIM 32 -#define BLOCK_ROWS 8 - - -/*// simple copy kernel (CUDA) -// Used as reference case representing best effective bandwidth. -__global__ void copy(float *odata, const float *idata) -{ - int x = blockIdx.x * TILE_DIM + threadIdx.x; - int y = blockIdx.y * TILE_DIM + threadIdx.y; - int width = gridDim.x * TILE_DIM; - - for (int j = 0; j < TILE_DIM; j+= BLOCK_ROWS) - odata[(y+j)*width + x] = idata[(y+j)*width + x]; -} -*/ -// simple copy kernel (OpenCL) -__kernel void copyKernel(__global float* odata, __global const float* idata) -{ - int x = get_group_id(0) * get_num_groups(0) + get_local_id(0); - int y = get_group_id(1) * get_num_groups(1) + get_local_id(1); - int width = get_num_groups(0) * get_local_size(0); - for (int j = 0; j < get_num_groups(1); j+= get_local_size(1)) - { - odata[(y+j)*width + x] = idata[(y+j)*width + x]; - } -} - -/* -// copy kernel using shared memory (CUDA) -// Also used as reference case, demonstrating effect of using shared memory. -__global__ void copySharedMem(float *odata, const float *idata) -{ - __shared__ float tile[TILE_DIM * TILE_DIM]; - - int x = blockIdx.x * TILE_DIM + threadIdx.x; - int y = blockIdx.y * TILE_DIM + threadIdx.y; - int width = gridDim.x * TILE_DIM; - - for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) - tile[(threadIdx.y+j)*TILE_DIM + threadIdx.x] = idata[(y+j)*width + x]; - - __syncthreads(); - - for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) - odata[(y+j)*width + x] = tile[(threadIdx.y+j)*TILE_DIM + threadIdx.x]; -} -*/ - -// copy kernel using shared memory (OpenCL) -// Also used as reference case, demonstrating effect of using shared memory. -__kernel void copySharedMemKernel(__global float *odata, __global const float *idata) -{ - __local float tile[TILE_DIM * TILE_DIM]; - - int x = get_group_id(0) * get_num_groups(0) + get_local_id(0); - int y = get_group_id(1) * get_num_groups(1) + get_local_id(1); - int width = get_num_groups(0) * get_local_size(0); - - for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) - tile[(get_local_id(1)+j)*TILE_DIM + get_local_id(0)] = idata[(y+j)*width + x]; - - barrier(CLK_LOCAL_MEM_FENCE); - - for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) - odata[(y+j)*width + x] = tile[(get_local_id(1)+j)*TILE_DIM + get_local_id(0)]; -} - -/* -// naive transpose (CUDA) -// Simplest transpose; doesn't use shared memory. -// Global memory reads are coalesced but writes are not. -__global__ void transposeNaive(float *odata, const float *idata) -{ - int x = blockIdx.x * TILE_DIM + threadIdx.x; - int y = blockIdx.y * TILE_DIM + threadIdx.y; - int width = gridDim.x * TILE_DIM; - - for (int j = 0; j < TILE_DIM; j+= BLOCK_ROWS) - odata[x*width + (y+j)] = idata[(y+j)*width + x]; -} -*/ - -// naive transpose (OpenCL) -// Simplest transpose; doesn't use shared memory. -// Global memory reads are coalesced but writes are not. -__kernel void transposeNaiveKernel(__global float *odata, __global const float *idata) -{ - int x = get_group_id(0) * get_num_groups(0) + get_local_id(0); - int y = get_group_id(1) * get_num_groups(1) + get_local_id(1); - int width = get_num_groups(0) * get_local_size(0); - - for (int j = 0; j < TILE_DIM; j+= BLOCK_ROWS) - odata[x*width + (y+j)] = idata[(y+j)*width + x]; -} - -/* -// coalesced transpose (CUDA) -// Uses shared memory to achieve coalesing in both reads and writes -// Tile width == #banks causes shared memory bank conflicts. -__global__ void transposeCoalesced(float *odata, const float *idata) -{ - __shared__ float tile[TILE_DIM][TILE_DIM]; - - int x = blockIdx.x * TILE_DIM + threadIdx.x; - int y = blockIdx.y * TILE_DIM + threadIdx.y; - int width = gridDim.x * TILE_DIM; - - for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) - tile[threadIdx.y+j][threadIdx.x] = idata[(y+j)*width + x]; - - __syncthreads(); - - x = blockIdx.y * TILE_DIM + threadIdx.x; // transpose block offset - y = blockIdx.x * TILE_DIM + threadIdx.y; - - for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) - odata[(y+j)*width + x] = tile[threadIdx.x][threadIdx.y + j]; -} -*/ - -// coalesced transpose (OpenCL) -// Uses shared memory to achieve coalesing in both reads and writes -// Tile width == #banks causes shared memory bank conflicts. -__kernel void transposeCoalescedKernel(__global float *odata, __global const float *idata) -{ - __local float tile[TILE_DIM][TILE_DIM]; - - int x = get_group_id(0) * get_num_groups(0) + get_local_id(0); - int y = get_group_id(1) * get_num_groups(1) + get_local_id(1); - int width = get_num_groups(0) * get_local_size(0); - - for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) - tile[get_local_id(1)+j][get_local_id(0)] = idata[(y+j)*width + x]; - - barrier(CLK_LOCAL_MEM_FENCE); - - x = get_group_id(1) * TILE_DIM + get_local_id(0); - y = get_group_id(0) * TILE_DIM + get_local_id(1); - - for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) - odata[(y+j)*width + x] = tile[get_local_id(0)][get_local_id(1) + j]; -} - - -// No bank-conflict transpose (OpenCL) -// Same as transposeCoalesced except the first tile dimension is padded -// to avoid shared memory bank conflicts. -__kernel void transposeNoBankConflictsKernel(__global float *odata, __global const float *idata) -{ - __local float tile[TILE_DIM][TILE_DIM+1]; - - int x = get_group_id(0) * get_num_groups(0) + get_local_id(0); - int y = get_group_id(1) * get_num_groups(1) + get_local_id(1); - int width = get_num_groups(0) * get_local_size(0); - - for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) - tile[get_local_id(1)+j][get_local_id(0)] = idata[(y+j)*width + x]; - - barrier(CLK_LOCAL_MEM_FENCE); - - x = get_group_id(1) * TILE_DIM + get_local_id(0); - y = get_group_id(0) * TILE_DIM + get_local_id(1); - - for (int j = 0; j < TILE_DIM; j += BLOCK_ROWS) - odata[(y+j)*width + x] = tile[get_local_id(0)][get_local_id(1) + j]; -} - - - diff --git a/opencl/lds_bank_conflict/main.cpp b/opencl/lds_bank_conflict/main.cpp deleted file mode 100644 index b6e70f688..000000000 --- a/opencl/lds_bank_conflict/main.cpp +++ /dev/null @@ -1,361 +0,0 @@ -//Adapted from CUDA to OpenCL by Erwin Coumans -//See http://bitbucket.org/erwincoumans/opencl_course - -// Copyright 2012 NVIDIA Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - - -#include "b3OpenCLUtils.h" -#include "../parallel_primitives/host/b3OpenCLArray.h" -#include "../parallel_primitives/host/b3LauncherCL.h" -#include "Bullet3Common/b3Quickprof.h" -#include "../parallel_primitives/host/b3FillCL.h" -#include "Bullet3Common/b3CommandLineArgs.h" - -#include -#include -#include - -//make sure to update the same #define in the opencl/lds_bank_conflict/lds_kernels.cl -const int TILE_DIM = 32; -const int BLOCK_ROWS = 8; -const int NUM_REPS = 100; - -// Check errors and print GB/s -void postprocess(const float *ref, const float *res, int n, float ms) -{ - bool passed = true; - for (int i = 0; i < n; i++) - if (res[i] != ref[i]) { - printf("\nError: at res[%d] got %f but expected %f\n", i, res[i], ref[i]); - printf("%25s\n", "*** FAILED ***"); - passed = false; - break; - } - if (passed) - printf("%20.2f\n", 2 * n * sizeof(float) * 1e-6 * NUM_REPS / ms ); -} - -char* loadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength) -{ - // locals - FILE* pFileStream = NULL; - size_t szSourceLength; - - // open the OpenCL source code file - pFileStream = fopen(cFilename, "rb"); - if(pFileStream == 0) - { - return NULL; - } - - size_t szPreambleLength = strlen(cPreamble); - - // get the length of the source code - fseek(pFileStream, 0, SEEK_END); - szSourceLength = ftell(pFileStream); - fseek(pFileStream, 0, SEEK_SET); - - // allocate a buffer for the source code string and read it in - char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1); - memcpy(cSourceString, cPreamble, szPreambleLength); - fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream); - - // close the file and return the total length of the combined (preamble + source) string - fclose(pFileStream); - if(szFinalLength != 0) - { - *szFinalLength = szSourceLength + szPreambleLength; - } - cSourceString[szSourceLength + szPreambleLength] = '\0'; - - return cSourceString; -} - -int main(int argc, char **argv) -{ - printf("Use --deviceId= or --platformId= to override OpenCL device\n"); - b3CommandLineArgs args(argc,argv); - - const int nx = 1024; - const int ny = 1024; - - const int mem_size = nx*ny*sizeof(float); - const int num_elements = nx*ny; - b3Clock clock; - double startEvent=0.f; - double stopEvent=0.f; - - int localSizeX = TILE_DIM; - int localSizeY = BLOCK_ROWS; - - int numThreadsX = (nx/TILE_DIM)*TILE_DIM; - int numThreadsY = (ny/TILE_DIM)*BLOCK_ROWS; - - int gridX = numThreadsX / localSizeX; - int gridY = numThreadsY / localSizeY; - - int ciErrNum = 0; - int preferred_device = -1; - int preferred_platform = -1; - args.GetCmdLineArgument("deviceId",preferred_device); - args.GetCmdLineArgument("platformId",preferred_platform); - - - cl_platform_id platformId=0; - cl_context ctx=0; - cl_command_queue queue=0; - cl_device_id device=0; - cl_kernel copyKernel=0; - cl_kernel copySharedMemKernel=0; - cl_kernel transposeNaiveKernel = 0; - cl_kernel transposeCoalescedKernel = 0; - cl_kernel transposeNoBankConflictsKernel= 0; - - - ctx = b3OpenCLUtils::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId); - b3OpenCLUtils::printPlatformInfo(platformId); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - device = b3OpenCLUtils::getDevice(ctx,0); - b3OpenCLUtils::printDeviceInfo(device); - queue = clCreateCommandQueue(ctx, device, 0, &ciErrNum); - - const char* cSourceFile = "opencl/lds_bank_conflict/lds_kernels.cl"; - - size_t szKernelLength; - - const char* cSourceCL =0; - char relativeFileName[1024]; - - { - const char* prefix[]={"./","../","../../","../../../","../../../../"}; - int numPrefixes = sizeof(prefix)/sizeof(char*); - - for (int i=0;!cSourceCL && i d_idataCL(ctx,queue);d_idataCL.resize(num_elements); - b3OpenCLArray d_cdataCL(ctx,queue);d_cdataCL.resize(num_elements); - b3OpenCLArray d_tdataCL(ctx,queue);d_tdataCL.resize(num_elements); - - - // check parameters and calculate execution configuration - if (nx % TILE_DIM || ny % TILE_DIM) - { - printf("nx and ny must be a multiple of TILE_DIM\n"); - goto error_exit; - } - - if (TILE_DIM % BLOCK_ROWS) - { - printf("TILE_DIM must be a multiple of BLOCK_ROWS\n"); - goto error_exit; - } - - // host - for (int j = 0; j < ny; j++) - for (int i = 0; i < nx; i++) - h_idata[j*nx + i] = j*nx + i; - - // correct result for error checking - for (int j = 0; j < ny; j++) - for (int i = 0; i < nx; i++) - { - gold[j*nx + i] = h_idata[i*nx + j]; - } - - d_idataCL.copyFromHostPointer(h_idata,num_elements); - - // events for timing - clock.reset(); - - float ms; - - // ------------ - // time kernels - // ------------ - printf("%25s%25s\n", "Routine", "Bandwidth (GB/s)"); - - // ---- - // copy - // ---- - printf("%25s", "copy"); - - clMemSet.execute(d_cdataCL,0.f,num_elements); - - { - // warm up - b3LauncherCL launcher( queue, copyKernel); - launcher.setBuffer( d_cdataCL.getBufferCL()); - launcher.setBuffer( d_idataCL.getBufferCL()); - launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY ); - - startEvent = clock.getTimeMicroseconds()/1e3; - for (int i = 0; i < NUM_REPS; i++) - launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY ); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - clFinish(queue); - stopEvent = clock.getTimeMicroseconds()/1e3; - } - - ms = float(stopEvent-startEvent); - - d_cdataCL.copyToHostPointer(h_cdata,num_elements,0); - postprocess(h_idata, h_cdata, nx*ny, ms); - - // ------------- - // copySharedMem - // ------------- - printf("%25s", "shared memory copy"); - clMemSet.execute(d_cdataCL,0.f,num_elements); - - { - b3LauncherCL launcher( queue, copySharedMemKernel); - launcher.setBuffer( d_cdataCL.getBufferCL()); - launcher.setBuffer( d_idataCL.getBufferCL()); - launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY ); - - startEvent = clock.getTimeMicroseconds()/1e3; - for (int i = 0; i < NUM_REPS; i++) - launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY ); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - clFinish(queue); - stopEvent = clock.getTimeMicroseconds()/1e3; - } - - ms = float(stopEvent-startEvent); - d_cdataCL.copyToHostPointer(h_cdata,num_elements,0); - postprocess(h_idata, h_cdata, nx * ny, ms); - - // -------------- - // transposeNaive - // -------------- - printf("%25s", "naive transpose"); - clMemSet.execute(d_tdataCL,0.f,num_elements); - { - // warmup - b3LauncherCL launcher( queue, transposeNaiveKernel); - launcher.setBuffer( d_tdataCL.getBufferCL()); - launcher.setBuffer( d_idataCL.getBufferCL()); - launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY ); - - startEvent = clock.getTimeMicroseconds()/1e3; - for (int i = 0; i < NUM_REPS; i++) - launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY ); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - clFinish(queue); - stopEvent = clock.getTimeMicroseconds()/1e3; - } - ms = float(stopEvent-startEvent); - d_tdataCL.copyToHostPointer(h_tdata,num_elements,0); - postprocess(gold, h_tdata, nx * ny, ms); - - // ------------------ - // transposeCoalesced - // ------------------ - printf("%25s", "coalesced transpose"); - clMemSet.execute(d_tdataCL,0.f,num_elements); - { - b3LauncherCL launcher( queue, transposeCoalescedKernel); - launcher.setBuffer( d_tdataCL.getBufferCL()); - launcher.setBuffer( d_idataCL.getBufferCL()); - launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY ); - - startEvent = clock.getTimeMicroseconds()/1e3; - for (int i = 0; i < NUM_REPS; i++) - launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY ); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - clFinish(queue); - stopEvent = clock.getTimeMicroseconds()/1e3; - } - - ms = float(stopEvent-startEvent); - d_tdataCL.copyToHostPointer(h_tdata,num_elements,0); - postprocess(gold, h_tdata, nx * ny, ms); - - // ------------------------ - // transposeNoBankConflicts - // ------------------------ - printf("%25s", "conflict-free transpose"); - clMemSet.execute(d_tdataCL,0.f,num_elements); - { - b3LauncherCL launcher( queue, transposeNoBankConflictsKernel); - launcher.setBuffer( d_tdataCL.getBufferCL()); - launcher.setBuffer( d_idataCL.getBufferCL()); - launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY ); - - startEvent = clock.getTimeMicroseconds()/1e3; - for (int i = 0; i < NUM_REPS; i++) - launcher.launch2D(numThreadsX,numThreadsY,localSizeX,localSizeY ); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - clFinish(queue); - stopEvent = clock.getTimeMicroseconds()/1e3; - } - - ms = float(stopEvent-startEvent); - d_tdataCL.copyToHostPointer(h_tdata,num_elements,0); - postprocess(gold, h_tdata, nx * ny, ms); - -error_exit: - // cleanup - clReleaseKernel(copyKernel); - clReleaseCommandQueue(queue); - clReleaseContext(ctx); - - free(h_idata); - free(h_tdata); - free(h_cdata); - free(gold); - printf("Press \n"); - getchar(); -} diff --git a/opencl/lds_bank_conflict/premake4.lua b/opencl/lds_bank_conflict/premake4.lua deleted file mode 100644 index b568e9ab8..000000000 --- a/opencl/lds_bank_conflict/premake4.lua +++ /dev/null @@ -1,44 +0,0 @@ - -function createProject (vendor) - - local hasCL = findOpenCL(vendor) - - if (hasCL) then - - project ( "OpenCL_lds_bank_conflict_" .. vendor) - - initOpenCL(vendor) - - language "C++" - - kind "ConsoleApp" - targetdir "../../bin" - - links { - "OpenCL_lib_parallel_primitives_host_" .. vendor - } - - includedirs { - "../basic_initialize", - "../../src" - } - - files { - "main.cpp", - "../basic_initialize/b3OpenCLUtils.cpp", - "../basic_initialize/b3OpenCLUtils.h", - "../../src/Bullet3Common/b3AlignedAllocator.cpp", - "../../src/Bullet3Common/b3AlignedAllocator.h", - "../../src/Bullet3Common/b3AlignedObjectArray.h", - "../../src/Bullet3Common/b3Quickprof.cpp", - "../../src/Bullet3Common/b3Quickprof.h", - - } - end - -end - -createProject("AMD") -createProject("NVIDIA") -createProject("Intel") -createProject("Apple") diff --git a/opencl/parallel_primitives/benchmark/premake4.lua b/opencl/parallel_primitives/benchmark/premake4.lua deleted file mode 100644 index 73499e487..000000000 --- a/opencl/parallel_primitives/benchmark/premake4.lua +++ /dev/null @@ -1,40 +0,0 @@ -function createProject(vendor) - hasCL = findOpenCL(vendor) - - if (hasCL) then - - project ("OpenCL_radixsort_benchmark_" .. vendor) - - initOpenCL(vendor) - - language "C++" - - kind "ConsoleApp" - targetdir "../../../bin" - includedirs {"..","../../../src"} - - links { - ("OpenCL_lib_parallel_primitives_host_" .. vendor) - } - - files { - "test_large_problem_sorting.cpp", - "../../basic_initialize/b3OpenCLUtils.cpp", - "../../basic_initialize/b3OpenCLUtils.h", - "../host/b3FillCL.cpp", - "../host/b3PrefixScanCL.cpp", - "../host/b3RadixSort32CL.cpp", - "../../../src/Bullet3Common/b3AlignedAllocator.cpp", - "../../../src/Bullet3Common/b3AlignedAllocator.h", - "../../../src/Bullet3Common/b3AlignedObjectArray.h", - "../../../src/Bullet3Common/b3Quickprof.cpp", - "../../../src/Bullet3Common/b3Quickprof.h", - } - - end -end - -createProject("AMD") -createProject("Intel") -createProject("NVIDIA") -createProject("Apple") \ No newline at end of file diff --git a/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp b/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp deleted file mode 100644 index 01bf03e8a..000000000 --- a/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp +++ /dev/null @@ -1,711 +0,0 @@ -/****************************************************************************** - * Copyright 2010 Duane Merrill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - * - * - * AUTHORS' REQUEST: - * - * If you use|reference|benchmark this code, please cite our Technical - * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): - * - * @TechReport{ Merrill:Sorting:2010, - * author = "Duane Merrill and Andrew Grimshaw", - * title = "Revisiting Sorting for GPGPU Stream Architectures", - * year = "2010", - * institution = "University of Virginia, Department of Computer Science", - * address = "Charlottesville, VA, USA", - * number = "CS2010-03" - * } - * - * For more information, see our Google Code project site: - * http://code.google.com/p/back40computing/ - * - * Thanks! - ******************************************************************************/ - -/****************************************************************************** - * Simple test driver program for *large-problem* radix sorting. - * - * Useful for demonstrating how to integrate radix sorting into - * your application - ******************************************************************************/ - -/****************************************************************************** - * Converted from CUDA to OpenCL/DirectCompute by Erwin Coumans - ******************************************************************************/ -#ifdef _WIN32 -#pragma warning (disable:4996) -#endif -#include -#include -#include -#include -#include -#include -#include - - -//#include -#include -/********************** -* -*/ - -#include "../host/b3RadixSort32CL.h" -#include "../../basic_initialize/b3OpenCLUtils.h" -#include "Bullet3Common/b3Quickprof.h" - -cl_context g_cxMainContext; -cl_device_id g_device; -cl_command_queue g_cqCommandQueue; - -/*********************** -* -*/ - -bool g_verbose; -///Preferred OpenCL device/platform. When < 0 then no preference is used. -///Note that b3OpenCLUtils might still use the preference of using a platform vendor that matches the SDK vendor used to build the application. -///Preferred device/platform take priority over this platform-vendor match -int gPreferredDeviceId = -1; -int gPreferredPlatformId = -1; - - - -/****************************************************************************** - * Routines - ******************************************************************************/ - - -/** - * Keys-only sorting. Uses the GPU to sort the specified vector of elements for the given - * number of iterations, displaying runtime information. - * - * @param[in] num_elements - * Size in elements of the vector to sort - * @param[in] h_keys - * Vector of keys to sort - * @param[in] iterations - * Number of times to invoke the GPU sorting primitive - * @param[in] cfg - * Config - */ -template -void TimedSort( - unsigned int num_elements, - K *h_keys, - unsigned int iterations) -{ - printf("Keys only, %d iterations, %d elements\n", iterations, num_elements); - - int max_elements = num_elements; - b3AlignedObjectArray hostData; - hostData.resize(num_elements); - for (int i=0;i gpuData(g_cxMainContext,g_cqCommandQueue); - gpuData.copyFromHost(hostData); - //sorter.executeHost(gpuData); - sorter.execute(gpuData); - - b3AlignedObjectArray hostDataSorted; - gpuData.copyToHost(hostDataSorted); - - clFinish(g_cqCommandQueue); - - { - //printf("Key-values, %d iterations, %d elements", iterations, num_elements); - - // Create sorting enactor - - // Perform the timed number of sorting iterations - double elapsed = 0; - float duration = 0; - b3Clock watch; - - //warm-start - gpuData.copyFromHost(hostData); - clFinish(g_cqCommandQueue); - sorter.execute(gpuData); - - watch.reset(); - - - for (int i = 0; i < iterations; i++) - { - - - - // Move a fresh copy of the problem into device storage - gpuData.copyFromHost(hostData); - clFinish(g_cqCommandQueue); - - // Start GPU timing record - double startMs = watch.getTimeMicroseconds()/1e3; - - // Call the sorting API routine - sorter.execute(gpuData); - - - - clFinish(g_cqCommandQueue); - - double stopMs = watch.getTimeMicroseconds()/1e3; - - duration = stopMs - startMs; - - // End GPU timing record - elapsed += (double) duration; - printf("duration = %f\n", duration); - } - - // Display timing information - double avg_runtime = elapsed / iterations; - // double throughput = ((double) num_elements) / avg_runtime / 1000.0 / 1000.0; - // printf(", %f GPU ms, %f x10^9 elts/sec\n", avg_runtime, throughput); - double throughput = ((double) num_elements) / avg_runtime / 1000.0 ; - printf(", %f GPU ms, %f x10^6 elts/sec\n", avg_runtime, throughput); - - gpuData.copyToHost(hostData); - for (int i=0;i -void TimedSort( - unsigned int num_elements, - K *h_keys, - V *h_values, - unsigned int iterations) -{ - - printf("Key-values, %d iterations, %d elements\n", iterations, num_elements); - - int max_elements = num_elements; - b3AlignedObjectArray hostData; - hostData.resize(num_elements); - for (int i=0;i gpuData(g_cxMainContext,g_cqCommandQueue); - gpuData.copyFromHost(hostData); - //sorter.executeHost(gpuData); - sorter.execute(gpuData); - - b3AlignedObjectArray hostDataSorted; - gpuData.copyToHost(hostDataSorted); -#if 0 - for (int i=0;i -void RandomBits(K &key, int entropy_reduction = 0, int lower_key_bits = sizeof(K) * 8) -{ - const unsigned int NUM_UCHARS = (sizeof(K) + sizeof(unsigned char) - 1) / sizeof(unsigned char); - unsigned char key_bits[NUM_UCHARS]; - - do { - - for (int j = 0; j < NUM_UCHARS; j++) { - unsigned char quarterword = 0xff; - for (int i = 0; i <= entropy_reduction; i++) { - quarterword &= (rand() >> 7); - } - key_bits[j] = quarterword; - } - - if (lower_key_bits < sizeof(K) * 8) { - unsigned long long base = 0; - memcpy(&base, key_bits, sizeof(K)); - base &= (1 << lower_key_bits) - 1; - memcpy(key_bits, &base, sizeof(K)); - } - - memcpy(&key, key_bits, sizeof(K)); - - } while (key != key); // avoids NaNs when generating random floating point numbers -} - - -/****************************************************************************** - * Templated routines for printing keys/values to the console - ******************************************************************************/ - -template -void PrintValue(T val) { - printf("%d", val); -} - -template<> -void PrintValue(float val) { - printf("%f", val); -} - -template<> -void PrintValue(double val) { - printf("%f", val); -} - -template<> -void PrintValue(unsigned char val) { - printf("%u", val); -} - -template<> -void PrintValue(unsigned short val) { - printf("%u", val); -} - -template<> -void PrintValue(unsigned int val) { - printf("%u", val); -} - -template<> -void PrintValue(long val) { - printf("%ld", val); -} - -template<> -void PrintValue(unsigned long val) { - printf("%lu", val); -} - -template<> -void PrintValue(long long val) { - printf("%lld", val); -} - -template<> -void PrintValue(unsigned long long val) { - printf("%llu", val); -} - - - -/** - * Compares the equivalence of two arrays - */ -template -int CompareResults(T* computed, T* reference, SizeT len, bool verbose = true) -{ - printf("\n"); - for (SizeT i = 0; i < len; i++) { - - if (computed[i] != reference[i]) { - printf("INCORRECT: [%lu]: ", (unsigned long) i); - PrintValue(computed[i]); - printf(" != "); - PrintValue(reference[i]); - - if (verbose) { - printf("\nresult[..."); - for (size_t j = (i >= 5) ? i - 5 : 0; (j < i + 5) && (j < len); j++) { - PrintValue(computed[j]); - printf(", "); - } - printf("...]"); - printf("\nreference[..."); - for (size_t j = (i >= 5) ? i - 5 : 0; (j < i + 5) && (j < len); j++) { - PrintValue(reference[j]); - printf(", "); - } - printf("...]"); - } - - return 1; - } - } - - printf("CORRECT\n"); - return 0; -} - -/** - * Creates an example sorting problem whose keys is a vector of the specified - * number of K elements, values of V elements, and then dispatches the problem - * to the GPU for the given number of iterations, displaying runtime information. - * - * @param[in] iterations - * Number of times to invoke the GPU sorting primitive - * @param[in] num_elements - * Size in elements of the vector to sort - * @param[in] cfg - * Config - */ -template -void TestSort( - unsigned int iterations, - int num_elements, - bool keys_only) -{ - // Allocate the sorting problem on the host and fill the keys with random bytes - - K *h_keys = NULL; - K *h_reference_keys = NULL; - V *h_values = NULL; - h_keys = (K*) malloc(num_elements * sizeof(K)); - h_reference_keys = (K*) malloc(num_elements * sizeof(K)); - if (!keys_only) h_values = (V*) malloc(num_elements * sizeof(V)); - - - // Use random bits - for (unsigned int i = 0; i < num_elements; ++i) { - RandomBits(h_keys[i], 0); - //h_keys[i] = num_elements-i; - //h_keys[i] = 0xffffffffu-i; - if (!keys_only) - h_values[i] = h_keys[i];//0xffffffffu-i; - - h_reference_keys[i] = h_keys[i]; - } - - // Run the timing test - if (keys_only) { - TimedSort(num_elements, h_keys, iterations); - } else { - TimedSort(num_elements, h_keys, h_values, iterations); - } - -// cudaThreadSynchronize(); - - // Display sorted key data - if (g_verbose) { - printf("\n\nKeys:\n"); - for (int i = 0; i < num_elements; i++) { - PrintValue(h_keys[i]); - printf(", "); - } - printf("\n\n"); - } - - // Verify solution - std::sort(h_reference_keys, h_reference_keys + num_elements); - CompareResults(h_keys, h_reference_keys, num_elements, true); - printf("\n"); - fflush(stdout); - - // Free our allocated host memory - if (h_keys != NULL) free(h_keys); - if (h_values != NULL) free(h_values); -} - - - -/** - * Displays the commandline usage for this tool - */ -void Usage() -{ - printf("\ntest_large_problem_sorting [--device=] [--v] [--i=] [--n=] [--key-values] [--deviceId=] [--platformId=]\n"); - printf("\n"); - printf("\t--v\tDisplays sorted results to the console.\n"); - printf("\n"); - printf("\t--i\tPerforms the sorting operation times\n"); - printf("\t\t\ton the device. Re-copies original input each time. Default = 1\n"); - printf("\n"); - printf("\t--n\tThe number of elements to comprise the sample problem\n"); - printf("\t\t\tDefault = 512\n"); - printf("\n"); - printf("\t--key-values\tSpecifies that keys are accommodated by value pairings\n"); - printf("\n"); -} - - -/****************************************************************************** - * Command-line parsing - ******************************************************************************/ -#include -#include -#include - -class b3CommandLineArgs -{ -protected: - - std::map pairs; - -public: - - // Constructor - b3CommandLineArgs(int argc, char **argv) - { - using namespace std; - - for (int i = 1; i < argc; i++) - { - string arg = argv[i]; - - if ((arg[0] != '-') || (arg[1] != '-')) { - continue; - } - - string::size_type pos; - string key, val; - if ((pos = arg.find( '=')) == string::npos) { - key = string(arg, 2, arg.length() - 2); - val = ""; - } else { - key = string(arg, 2, pos - 2); - val = string(arg, pos + 1, arg.length() - 1); - } - pairs[key] = val; - } - } - - bool CheckCmdLineFlag(const char* arg_name) - { - using namespace std; - map::iterator itr; - if ((itr = pairs.find(arg_name)) != pairs.end()) { - return true; - } - return false; - } - - template - void GetCmdLineArgument(const char *arg_name, T &val); - - int ParsedArgc() - { - return pairs.size(); - } -}; - -template -void b3CommandLineArgs::GetCmdLineArgument(const char *arg_name, T &val) -{ - using namespace std; - map::iterator itr; - if ((itr = pairs.find(arg_name)) != pairs.end()) { - istringstream strstream(itr->second); - strstream >> val; - } -} - -template <> -void b3CommandLineArgs::GetCmdLineArgument(const char* arg_name, char* &val) -{ - using namespace std; - map::iterator itr; - if ((itr = pairs.find(arg_name)) != pairs.end()) { - - string s = itr->second; - val = (char*) malloc(sizeof(char) * (s.length() + 1)); - strcpy(val, s.c_str()); - - } else { - val = NULL; - } -} - - - - - -/****************************************************************************** - * Main - ******************************************************************************/ - -extern bool gDebugSkipLoadingBinary; - -int main( int argc, char** argv) -{ - //gDebugSkipLoadingBinary = true; - - cl_int ciErrNum; - b3CommandLineArgs args(argc,argv); - - args.GetCmdLineArgument("deviceId", gPreferredDeviceId); - args.GetCmdLineArgument("platformId", gPreferredPlatformId); - - printf("Initialize OpenCL using b3OpenCLUtils_createContextFromType\n"); - cl_platform_id platformId; - g_cxMainContext = b3OpenCLUtils_createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum, 0, 0,gPreferredDeviceId,gPreferredPlatformId,&platformId); -// g_cxMainContext = b3OpenCLUtils_createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum, 0, 0,gPreferredDeviceId,gPreferredPlatformId,&platformId); - - oclCHECKERROR(ciErrNum, CL_SUCCESS); - - int numDev = b3OpenCLUtils_getNumDevices(g_cxMainContext); - - if (!numDev) - { - printf("error: no OpenCL devices\n"); - exit(0); - } - int result; - int devId = 0; - g_device = b3OpenCLUtils_getDevice(g_cxMainContext,devId); - b3OpenCLUtils_printDeviceInfo(g_device); - // create a command-queue - g_cqCommandQueue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - - - - //srand(time(NULL)); - srand(0); // presently deterministic - - unsigned int num_elements = 8*1024*1024;//4*1024*1024;//4*1024*1024;//257;//8*524288;//2048;//512;//524288; - unsigned int iterations = 10; - bool keys_only = true; - - // - // Check command line arguments - // - - - - if (args.CheckCmdLineFlag("help")) - { - Usage(); - return 0; - } - - args.GetCmdLineArgument("i", iterations); - args.GetCmdLineArgument("n", num_elements); - - - - keys_only = !args.CheckCmdLineFlag("key-values"); - g_verbose = args.CheckCmdLineFlag("v"); - - - - TestSort( - iterations, - num_elements, - keys_only); - - -} diff --git a/opencl/parallel_primitives/host/b3Int2.h b/opencl/parallel_primitives/host/b3Int2.h deleted file mode 100644 index be0dbd9bd..000000000 --- a/opencl/parallel_primitives/host/b3Int2.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef B3_INT2_H -#define B3_INT2_H - -struct b3UnsignedInt2 -{ - union - { - struct - { - unsigned int x,y; - }; - struct - { - unsigned int s[2]; - }; - }; -}; - -struct b3Int2 -{ - union - { - struct - { - int x,y; - }; - struct - { - int s[2]; - }; - }; -}; - - -#endif \ No newline at end of file diff --git a/opencl/parallel_primitives/test/main.cpp b/opencl/parallel_primitives/test/main.cpp deleted file mode 100644 index 9ecb637c9..000000000 --- a/opencl/parallel_primitives/test/main.cpp +++ /dev/null @@ -1,379 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - - -#include -#include "../basic_initialize/b3OpenCLUtils.h" -#include "../host/b3FillCL.h" -#include "../host/b3BoundSearchCL.h" -#include "../host/b3RadixSort32CL.h" -#include "../host/b3PrefixScanCL.h" -#include "Bullet3Common/b3CommandLineArgs.h" -#include "Bullet3Common/b3MinMax.h" - -int g_nPassed = 0; -int g_nFailed = 0; -bool g_testFailed = 0; - -#define TEST_INIT g_testFailed = 0; -#define TEST_ASSERT(x) if( !(x) ){g_testFailed = 1;} -#define TEST_REPORT(testName) printf("[%s] %s\n",(g_testFailed)?"X":"O", testName); if(g_testFailed) g_nFailed++; else g_nPassed++; -#define NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment)) - -cl_context g_context=0; -cl_device_id g_device=0; -cl_command_queue g_queue =0; -const char* g_deviceName = 0; - -void initCL(int preferredDeviceIndex, int preferredPlatformIndex) -{ - void* glCtx=0; - void* glDC = 0; - int ciErrNum = 0; - //bound search and radix sort only work on GPU right now (assume 32 or 64 width workgroup without barriers) - - cl_device_type deviceType = CL_DEVICE_TYPE_ALL; - - g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - int numDev = b3OpenCLUtils::getNumDevices(g_context); - if (numDev>0) - { - b3OpenCLDeviceInfo info; - g_device= b3OpenCLUtils::getDevice(g_context,0); - g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - b3OpenCLUtils::printDeviceInfo(g_device); - b3OpenCLUtils::getDeviceInfo(g_device,&info); - g_deviceName = info.m_deviceName; - } -} - -void exitCL() -{ - clReleaseCommandQueue(g_queue); - clReleaseContext(g_context); -} - - -inline void fillIntTest() -{ - TEST_INIT; - - b3FillCL* fillCL = new b3FillCL(g_context,g_device,g_queue); - int maxSize=1024*256; - b3OpenCLArray intBuffer(g_context,g_queue,maxSize); - intBuffer.resize(maxSize); - -#define NUM_TESTS 7 - - int dx = maxSize/NUM_TESTS; - for (int iter=0;iterexecute(intBuffer,value,size,offset); - - b3AlignedObjectArray hostBuf2; - hostBuf2.resize(size); - fillCL->executeHost(hostBuf2,value,size,offset); - - b3AlignedObjectArray hostBuf; - intBuffer.copyToHost(hostBuf); - - for(int i=0; i -__inline -T getRandom(const T& minV, const T& maxV) -{ - float r = (rand()%10000)/10000.f; - T range = maxV - minV; - return (T)(minV + r*range); -} - -struct b3SortDataCompare -{ - inline bool operator()(const b3SortData& first, const b3SortData& second) const - { - return (first.m_key < second.m_key) || (first.m_key==second.m_key && first.m_value < second.m_value); - } -}; - - -void boundSearchTest( ) -{ - TEST_INIT; - - int maxSize = 1024*256; - int bucketSize = 256; - - b3OpenCLArray srcCL(g_context,g_queue,maxSize); - b3OpenCLArray upperCL(g_context,g_queue,maxSize); - b3OpenCLArray lowerCL(g_context,g_queue,maxSize); - - b3AlignedObjectArray srcHost; - b3AlignedObjectArray upperHost; - b3AlignedObjectArray lowerHost; - b3AlignedObjectArray upperHostCompare; - b3AlignedObjectArray lowerHostCompare; - - b3BoundSearchCL* search = new b3BoundSearchCL(g_context,g_device,g_queue, maxSize); - - - int dx = maxSize/NUM_TESTS; - for(int iter=0; iterexecute(srcCL,size,upperCL,bucketSize,b3BoundSearchCL::BOUND_UPPER); - search->execute(srcCL,size,lowerCL,bucketSize,b3BoundSearchCL::BOUND_LOWER); - - search->executeHost(srcHost,size,upperHostCompare,bucketSize,b3BoundSearchCL::BOUND_UPPER); - search->executeHost(srcHost,size,lowerHostCompare,bucketSize,b3BoundSearchCL::BOUND_LOWER); - - lowerCL.copyToHost(lowerHost); - upperCL.copyToHost(upperHost); - for(int i=0; i buf0Host; - b3AlignedObjectArray buf1Host; - - b3OpenCLArray buf2CL(g_context,g_queue,maxSize); - b3OpenCLArray buf3CL(g_context,g_queue,maxSize); - - - b3PrefixScanCL* scan = new b3PrefixScanCL(g_context,g_device,g_queue,maxSize); - - int dx = maxSize/NUM_TESTS; - for(int iter=0; iterexecuteHost(buf0Host, buf1Host, size, &sumHost ); - scan->execute( buf2CL, buf3CL, size, &sumGPU ); - - buf3CL.copyToHost(buf0Host); - - TEST_ASSERT( sumHost == sumGPU ); - for(int i=0; i buf0Host; - buf0Host.resize(maxSize); - b3AlignedObjectArray buf1Host; - buf1Host.resize(maxSize ); - b3OpenCLArray buf2CL(g_context,g_queue,maxSize); - - b3RadixSort32CL* sort = new b3RadixSort32CL(g_context,g_device,g_queue,maxSize); - - int dx = maxSize/NUM_TESTS; - for(int iter=0; iterexecuteHost( buf0Host); - sort->execute(buf2CL); - - buf2CL.copyToHost(buf1Host); - - for(int i=0; i\n"); - getchar(); -} - diff --git a/opencl/parallel_primitives/test/premake4.lua b/opencl/parallel_primitives/test/premake4.lua deleted file mode 100644 index a292c665d..000000000 --- a/opencl/parallel_primitives/test/premake4.lua +++ /dev/null @@ -1,41 +0,0 @@ -function createProject(vendor) - hasCL = findOpenCL(vendor) - - if (hasCL) then - - project ("OpenCL_primitives_test_" .. vendor) - - initOpenCL(vendor) - - language "C++" - - kind "ConsoleApp" - targetdir "../../../bin" - includedirs {".","..","../../../src"} - - - files { - "main.cpp", - "../../basic_initialize/b3OpenCLInclude.h", - "../../basic_initialize/b3OpenCLUtils.cpp", - "../../basic_initialize/b3OpenCLUtils.h", - "../host/b3FillCL.cpp", - "../host/b3FillCL.h", - "../host/b3BoundSearchCL.cpp", - "../host/b3BoundSearchCL.h", - "../host/b3PrefixScanCL.cpp", - "../host/b3PrefixScanCL.h", - "../host/b3RadixSort32CL.cpp", - "../host/b3RadixSort32CL.h", - "../../../src/Bullet3Common/b3AlignedAllocator.cpp", - "../../../src/Bullet3Common/b3AlignedAllocator.h", - "../../../src/Bullet3Common/b3AlignedObjectArray.h", - } - - end -end - -createProject("AMD") -createProject("Intel") -createProject("NVIDIA") -createProject("Apple") \ No newline at end of file diff --git a/opencl/reduce/main.cpp b/opencl/reduce/main.cpp deleted file mode 100644 index 946a501e5..000000000 --- a/opencl/reduce/main.cpp +++ /dev/null @@ -1,116 +0,0 @@ -///original author: Erwin Coumans -#include "b3OpenCLUtils.h" -#include "../parallel_primitives/host/b3OpenCLArray.h" -#include "../parallel_primitives/host/b3LauncherCL.h" -#include - - -#define MSTRINGIFY(A) #A -const char* kernelString= MSTRINGIFY( -__kernel void ReduceGlobal(__global int* d_in, __global int* d_out, int numElements) -{ - int myId = get_global_id(0); - int tid = get_local_id(0); - - - int ls = get_local_size(0); - for (unsigned int s=ls/2;s>0;s>>=1) - { - if (myId a(ctx,queue); - b3OpenCLArray b(ctx,queue); - b3AlignedObjectArray hostA; - b3AlignedObjectArray hostB; - - for (int i=0;i=numElements) - return; - - float8 aGID = a[iGID]; - float8 bGID = b[iGID]; - - float8 result = aGID + bGID; - // write back out to GMEM - c[iGID] = result; -} diff --git a/opencl/vector_add/VectorAddKernels.h b/opencl/vector_add/VectorAddKernels.h deleted file mode 100644 index 55c238aae..000000000 --- a/opencl/vector_add/VectorAddKernels.h +++ /dev/null @@ -1,20 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* vectorAddCL= \ -"\n" -"\n" -"__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c, int numElements)\n" -"{\n" -" // get oct-float index into global data array\n" -" int iGID = get_global_id(0);\n" -" if (iGID>=numElements)\n" -" return;\n" -"\n" -" float8 aGID = a[iGID];\n" -" float8 bGID = b[iGID];\n" -"\n" -" float8 result = aGID + bGID;\n" -" // write back out to GMEM\n" -" c[iGID] = result;\n" -"}\n" -"\n" -; diff --git a/opencl/vector_add/main.cpp b/opencl/vector_add/main.cpp deleted file mode 100644 index c7b7956be..000000000 --- a/opencl/vector_add/main.cpp +++ /dev/null @@ -1,408 +0,0 @@ - -///VectorAdd sample, from the NVidia JumpStart Guide -///http://developer.download.nvidia.com/OpenCL/NVIDIA_OpenCL_JumpStart_Guide.pdf - -///Instead of #include we include -///Apart from this include file, all other code should compile and work on OpenCL compliant implementation - - -#define LOAD_FROM_FILE - -#ifdef __APPLE__ - #include -#else - #include -#endif //__APPLE__ -#ifdef _WIN32 -#pragma warning (disable:4996) -#endif -#include -#include -#include -#include - -#define GRID3DOCL_CHECKERROR(a, b) if((a)!=(b)) { printf("3D GRID OCL Error : %d\n", (a)); b3Assert((a) == (b)); } -size_t wgSize; - -#include "VectorAddKernels.h" - -#ifdef CL_PLATFORM_INTEL - const char* preferredPlatform = "Intel(R) Corporation"; -#elif defined CL_PLATFORM_AMD - const char* preferredPlatform = "Advanced Micro Devices, Inc."; -#elif defined CL_PLATFORM_NVIDIA - const char* preferredPlatform = "NVIDIA Corporation"; -#else - const char* preferredPlatform = "Unknown"; -#endif - - - -char* loadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength) -{ - // locals - FILE* pFileStream = NULL; - size_t szSourceLength; - - // open the OpenCL source code file - pFileStream = fopen(cFilename, "rb"); - if(pFileStream == 0) - { - return NULL; - } - - size_t szPreambleLength = strlen(cPreamble); - - // get the length of the source code - fseek(pFileStream, 0, SEEK_END); - szSourceLength = ftell(pFileStream); - fseek(pFileStream, 0, SEEK_SET); - - // allocate a buffer for the source code string and read it in - char* cSourceString = (char *)malloc(szSourceLength + szPreambleLength + 1); - memcpy(cSourceString, cPreamble, szPreambleLength); - fread((cSourceString) + szPreambleLength, szSourceLength, 1, pFileStream); - - // close the file and return the total length of the combined (preamble + source) string - fclose(pFileStream); - if(szFinalLength != 0) - { - *szFinalLength = szSourceLength + szPreambleLength; - } - cSourceString[szSourceLength + szPreambleLength] = '\0'; - - return cSourceString; -} - -size_t workitem_size[3]; - -void printDevInfo(cl_device_id device) -{ - char device_string[1024]; - - clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); - printf( " Device %s:\n", device_string); - - // CL_DEVICE_INFO - cl_device_type type; - clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(type), &type, NULL); - if( type & CL_DEVICE_TYPE_CPU ) - printf(" CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_CPU"); - if( type & CL_DEVICE_TYPE_GPU ) - printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_GPU"); - if( type & CL_DEVICE_TYPE_ACCELERATOR ) - printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); - if( type & CL_DEVICE_TYPE_DEFAULT ) - printf( " CL_DEVICE_TYPE:\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); - - // CL_DEVICE_MAX_COMPUTE_UNITS - cl_uint compute_units; - clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(compute_units), &compute_units, NULL); - printf( " CL_DEVICE_MAX_COMPUTE_UNITS:\t%d\n", compute_units); - - // CL_DEVICE_MAX_WORK_GROUP_SIZE - - clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(workitem_size), &workitem_size, NULL); - printf( " CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", workitem_size[0], workitem_size[1], workitem_size[2]); - -} - - - - -// Main function -// ********************************************************************* -int main(int argc, char **argv) -{ - void *srcA, *srcB, *dst; // Host buffers for OpenCL test - cl_context cxGPUContext; // OpenCL context - cl_command_queue cqCommandQue; // OpenCL command que - cl_device_id* cdDevices; // OpenCL device list - cl_program cpProgram; // OpenCL program - cl_kernel ckKernel; // OpenCL kernel - cl_mem cmMemObjs[3]; // OpenCL memory buffer objects: 3 for device - size_t szGlobalWorkSize[1]; // 1D var for Total # of work items - size_t szLocalWorkSize[1]; // 1D var for # of work items in the work group - size_t szParmDataBytes; // Byte size of context information - cl_int ciErr1, ciErr2; // Error code var - - - int iTestN = 100000 * 8; // Size of Vectors to process - - int actualGlobalSize = iTestN / 8; - - - // set Global and Local work size dimensions - szGlobalWorkSize[0] = iTestN >> 3; // do 8 computations per work item - szLocalWorkSize[0]= iTestN>>3; - - - // Allocate and initialize host arrays - srcA = (void *)malloc (sizeof(cl_float) * iTestN); - srcB = (void *)malloc (sizeof(cl_float) * iTestN); - dst = (void *)malloc (sizeof(cl_float) * iTestN); - - int i; - - // Initialize arrays with some values - for (i=0;i processing outside of the buffer - //make sure to check kernel - } - - size_t globalThreads[] = {num_t * workgroupSize}; - size_t localThreads[] = {workgroupSize}; - - - localWorkSize[0] = workgroupSize; - globalWorkSize[0] = num_t * workgroupSize; - localWorkSize[1] = 1; - globalWorkSize[1] = 1; - - // Copy input data from host to GPU and launch kernel - ciErr1 |= clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, globalThreads, localThreads, 0, NULL, NULL); - - } - - if (ciErrNum != CL_SUCCESS) - { - printf("cannot clEnqueueNDRangeKernel\n"); - exit(0); - } - - clFinish(cqCommandQue); - // Read back results and check accumulated errors - ciErr1 |= clEnqueueReadBuffer(cqCommandQue, cmMemObjs[2], CL_TRUE, 0, sizeof(cl_float8) * szGlobalWorkSize[0], dst, 0, NULL, NULL); - - // Release kernel, program, and memory objects - // NOTE: Most properly this should be done at any of the exit points above, but it is omitted elsewhere for clarity. - free(cdDevices); - clReleaseKernel(ckKernel); - clReleaseProgram(cpProgram); - clReleaseCommandQueue(cqCommandQue); - clReleaseContext(cxGPUContext); - - - // print the results - int iErrorCount = 0; - for (i = 0; i < iTestN; i++) - { - if (((float*)dst)[i] != ((float*)srcA)[i]+((float*)srcB)[i]) - iErrorCount++; - } - - if (iErrorCount) - { - printf("Validation FAILED\n"); - } else - { - printf("Validation SUCCESSFULL\n"); - } - // Free host memory, close log and return success - for (i = 0; i < 3; i++) - { - clReleaseMemObject(cmMemObjs[i]); - } - - free(srcA); - free(srcB); - free (dst); - printf("Press ENTER to quit\n"); - getchar(); -} - - diff --git a/opencl/vector_add_simplified/main.cpp b/opencl/vector_add_simplified/main.cpp deleted file mode 100644 index 21604156e..000000000 --- a/opencl/vector_add_simplified/main.cpp +++ /dev/null @@ -1,69 +0,0 @@ -///original author: Erwin Coumans -#include "b3OpenCLUtils.h" -#include "../parallel_primitives/host/b3OpenCLArray.h" -#include "../parallel_primitives/host/b3LauncherCL.h" -#include - - -#define MSTRINGIFY(A) #A -const char* kernelString= MSTRINGIFY( -__kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c, int numElements) -{ - int iGID = get_global_id(0); - if (iGID>=numElements) - return; - float aGID = a[iGID]; - float bGID = b[iGID]; - float result = aGID + bGID; - c[iGID] = result; -} -); - -int main(int argc, char* argv[]) -{ - int ciErrNum = 0; - int preferred_device = -1; - int preferred_platform = -1; - cl_platform_id platformId; - cl_context ctx; - cl_command_queue queue; - cl_device_id device; - cl_kernel addKernel; - ctx = b3OpenCLUtils::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId); - b3OpenCLUtils::printPlatformInfo(platformId); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - if (!ctx) { - printf("No OpenCL capable GPU found!"); - return 0; - } - - device = b3OpenCLUtils::getDevice(ctx,0); - queue = clCreateCommandQueue(ctx, device, 0, &ciErrNum); - addKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"VectorAdd",&ciErrNum); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - int numElements = 32; - b3OpenCLArray a(ctx,queue); - b3OpenCLArray b(ctx,queue); - b3OpenCLArray c(ctx,queue); - for (int i=0;i b3VertexArray; #include "Bullet3Common/b3Quickprof.h" #include //for FLT_MAX -#include "basic_initialize/b3OpenCLUtils.h" -#include "parallel_primitives/host/b3LauncherCL.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" //#include "AdlQuaternion.h" -#include "../kernels/satKernels.h" -#include "../kernels/satClipHullContacts.h" -#include "../kernels/bvhTraversal.h" -#include "../kernels/primitiveContacts.h" +#include "kernels/satKernels.h" +#include "kernels/satClipHullContacts.h" +#include "kernels/bvhTraversal.h" +#include "kernels/primitiveContacts.h" #include "Bullet3Geometry/b3AabbUtil.h" +#define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" +#define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl" +#define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" +#define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" + #define dot3F4 b3Dot @@ -64,7 +69,7 @@ m_totalContactsOut(m_context, m_queue) // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); //#endif - cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,src,&errNum,flags,"opencl/gpu_narrowphase/kernels/sat.cl"); + cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,src,&errNum,flags,BT_NARROWPHASE_SAT_PATH); b3Assert(errNum==CL_SUCCESS); m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findSeparatingAxisKernel",&errNum,satProg ); @@ -92,7 +97,7 @@ m_totalContactsOut(m_context, m_queue) // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); //#endif - cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,"opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); + cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,BT_NARROWPHASE_CLIPHULL_PATH); b3Assert(errNum==CL_SUCCESS); m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg); @@ -132,7 +137,7 @@ m_totalContactsOut(m_context, m_queue) if (1) { const char* srcBvh = bvhTraversalKernelCL; - cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"","opencl/gpu_narrowphase/kernels/bvhTraversal.cl"); + cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"",BT_NARROWPHASE_BVH_TRAVERSAL_PATH); b3Assert(errNum==CL_SUCCESS); m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,""); @@ -142,7 +147,7 @@ m_totalContactsOut(m_context, m_queue) { const char* primitiveContactsSrc = primitiveContactsKernelsCL; - cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"","opencl/gpu_narrowphase/kernels/primitiveContacts.cl"); + cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"",BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); b3Assert(errNum==CL_SUCCESS); m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,""); @@ -527,7 +532,7 @@ void computeContactPlaneConvex(int pairIndex, b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; c->m_worldPos[i] = pOnB1; } - c->m_worldNormal[3] = numReducedPoints; + c->m_worldNormal[3] = (b3Scalar)numReducedPoints; }//if (dstIdx < numPairs) } @@ -665,7 +670,7 @@ void computeContactPlaneCompound(int pairIndex, b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; c->m_worldPos[i] = pOnB1; } - c->m_worldNormal[3] = numReducedPoints; + c->m_worldNormal[3] = (b3Scalar)numReducedPoints; }//if (dstIdx < numPairs) } @@ -825,7 +830,7 @@ void computeContactSphereConvex(int pairIndex, c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; c->m_worldPos[0] = pOnB1; int numPoints = 1; - c->m_worldNormal[3] = numPoints; + c->m_worldNormal[3] = (b3Scalar)numPoints; }//if (dstIdx < numPairs) } }//if (hasCollision) diff --git a/opencl/gpu_narrowphase/host/b3ConvexHullContact.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h similarity index 95% rename from opencl/gpu_narrowphase/host/b3ConvexHullContact.h rename to src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h index dbdd5f883..7420ae4dc 100644 --- a/opencl/gpu_narrowphase/host/b3ConvexHullContact.h +++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h @@ -2,15 +2,15 @@ #ifndef _CONVEX_HULL_CONTACT_H #define _CONVEX_HULL_CONTACT_H -#include "parallel_primitives/host/b3OpenCLArray.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" #include "Bullet3Common/b3AlignedObjectArray.h" #include "b3ConvexUtility.h" #include "b3ConvexPolyhedronCL.h" #include "b3Collidable.h" #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "parallel_primitives/host/b3Int2.h" -#include "parallel_primitives/host/b3Int4.h" +#include "Bullet3Common/b3Int2.h" +#include "Bullet3Common/b3Int4.h" #include "b3OptimizedBvh.h" #include "b3BvhInfo.h" diff --git a/opencl/gpu_narrowphase/host/b3ConvexPolyhedronCL.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h similarity index 100% rename from opencl/gpu_narrowphase/host/b3ConvexPolyhedronCL.h rename to src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h diff --git a/opencl/gpu_narrowphase/host/b3ConvexUtility.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexUtility.cpp similarity index 100% rename from opencl/gpu_narrowphase/host/b3ConvexUtility.cpp rename to src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexUtility.cpp diff --git a/opencl/gpu_narrowphase/host/b3ConvexUtility.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexUtility.h similarity index 100% rename from opencl/gpu_narrowphase/host/b3ConvexUtility.h rename to src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexUtility.h diff --git a/opencl/gpu_narrowphase/host/b3OptimizedBvh.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp similarity index 100% rename from opencl/gpu_narrowphase/host/b3OptimizedBvh.cpp rename to src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp diff --git a/opencl/gpu_narrowphase/host/b3OptimizedBvh.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h similarity index 100% rename from opencl/gpu_narrowphase/host/b3OptimizedBvh.h rename to src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h diff --git a/opencl/gpu_narrowphase/host/b3QuantizedBvh.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp similarity index 100% rename from opencl/gpu_narrowphase/host/b3QuantizedBvh.cpp rename to src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp diff --git a/opencl/gpu_narrowphase/host/b3QuantizedBvh.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h similarity index 100% rename from opencl/gpu_narrowphase/host/b3QuantizedBvh.h rename to src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h diff --git a/opencl/gpu_narrowphase/host/b3StridingMeshInterface.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp similarity index 100% rename from opencl/gpu_narrowphase/host/b3StridingMeshInterface.cpp rename to src/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp diff --git a/opencl/gpu_narrowphase/host/b3StridingMeshInterface.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h similarity index 100% rename from opencl/gpu_narrowphase/host/b3StridingMeshInterface.h rename to src/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h diff --git a/opencl/gpu_narrowphase/host/b3TriangleCallback.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp similarity index 100% rename from opencl/gpu_narrowphase/host/b3TriangleCallback.cpp rename to src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp diff --git a/opencl/gpu_narrowphase/host/b3TriangleCallback.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h similarity index 100% rename from opencl/gpu_narrowphase/host/b3TriangleCallback.h rename to src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h diff --git a/opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp similarity index 100% rename from opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.cpp rename to src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp diff --git a/opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h similarity index 100% rename from opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.h rename to src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h diff --git a/opencl/gpu_narrowphase/kernels/bvhTraversal.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl similarity index 100% rename from opencl/gpu_narrowphase/kernels/bvhTraversal.cl rename to src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl diff --git a/opencl/gpu_narrowphase/kernels/bvhTraversal.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h similarity index 100% rename from opencl/gpu_narrowphase/kernels/bvhTraversal.h rename to src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h diff --git a/opencl/gpu_narrowphase/kernels/primitiveContacts.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl similarity index 100% rename from opencl/gpu_narrowphase/kernels/primitiveContacts.cl rename to src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl diff --git a/opencl/gpu_narrowphase/kernels/primitiveContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h similarity index 100% rename from opencl/gpu_narrowphase/kernels/primitiveContacts.h rename to src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h diff --git a/opencl/gpu_narrowphase/kernels/sat.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl similarity index 100% rename from opencl/gpu_narrowphase/kernels/sat.cl rename to src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl diff --git a/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl similarity index 100% rename from opencl/gpu_narrowphase/kernels/satClipHullContacts.cl rename to src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl diff --git a/opencl/gpu_narrowphase/kernels/satClipHullContacts.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h similarity index 100% rename from opencl/gpu_narrowphase/kernels/satClipHullContacts.h rename to src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h diff --git a/opencl/gpu_narrowphase/kernels/satKernels.h b/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h similarity index 100% rename from opencl/gpu_narrowphase/kernels/satKernels.h rename to src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h diff --git a/opencl/parallel_primitives/host/b3BoundSearchCL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp similarity index 96% rename from opencl/parallel_primitives/host/b3BoundSearchCL.cpp rename to src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp index 45c4cd62e..cae108689 100644 --- a/opencl/parallel_primitives/host/b3BoundSearchCL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp @@ -14,16 +14,16 @@ subject to the following restrictions: //Originally written by Takahiro Harada //Host-code rewritten by Erwin Coumans -#define BOUNDSEARCH_PATH "opencl/parallel_primitives/kernels/BoundSearchKernels.cl" +#define BOUNDSEARCH_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl" #define KERNEL0 "SearchSortDataLowerKernel" #define KERNEL1 "SearchSortDataUpperKernel" #define KERNEL2 "SubtractKernel" #include "b3BoundSearchCL.h" -#include "../../basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "b3LauncherCL.h" -#include "../kernels/BoundSearchKernelsCL.h" +#include "kernels/BoundSearchKernelsCL.h" b3BoundSearchCL::b3BoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize) :m_context(ctx), diff --git a/opencl/parallel_primitives/host/b3BoundSearchCL.h b/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h similarity index 100% rename from opencl/parallel_primitives/host/b3BoundSearchCL.h rename to src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h diff --git a/opencl/parallel_primitives/host/b3BufferInfoCL.h b/src/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h similarity index 100% rename from opencl/parallel_primitives/host/b3BufferInfoCL.h rename to src/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h diff --git a/opencl/parallel_primitives/host/b3FillCL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp similarity index 94% rename from opencl/parallel_primitives/host/b3FillCL.cpp rename to src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp index 522cecca9..3379aa0af 100644 --- a/opencl/parallel_primitives/host/b3FillCL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp @@ -1,11 +1,11 @@ #include "b3FillCL.h" -#include "../../basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "b3BufferInfoCL.h" #include "b3LauncherCL.h" -#define FILL_CL_PROGRAM_PATH "opencl/parallel_primitives/kernels/FillKernels.cl" +#define FILL_CL_PROGRAM_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl" -#include "../kernels/FillKernelsCL.h" +#include "kernels/FillKernelsCL.h" b3FillCL::b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue) :m_commandQueue(queue) diff --git a/opencl/parallel_primitives/host/b3FillCL.h b/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h similarity index 94% rename from opencl/parallel_primitives/host/b3FillCL.h rename to src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h index 113c549b4..3803d202c 100644 --- a/opencl/parallel_primitives/host/b3FillCL.h +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h @@ -4,8 +4,8 @@ #include "b3OpenCLArray.h" #include "Bullet3Common/b3Scalar.h" -#include "b3Int2.h" -#include "b3Int4.h" +#include "Bullet3Common/b3Int2.h" +#include "Bullet3Common/b3Int4.h" class b3FillCL diff --git a/opencl/parallel_primitives/host/b3LauncherCL.h b/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h similarity index 100% rename from opencl/parallel_primitives/host/b3LauncherCL.h rename to src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h diff --git a/opencl/parallel_primitives/host/b3OpenCLArray.h b/src/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h similarity index 99% rename from opencl/parallel_primitives/host/b3OpenCLArray.h rename to src/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h index 2c100bd7f..36ecd6126 100644 --- a/opencl/parallel_primitives/host/b3OpenCLArray.h +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h @@ -2,7 +2,7 @@ #define B3_OPENCL_ARRAY_H #include "Bullet3Common/b3AlignedObjectArray.h" -#include "../../basic_initialize/b3OpenCLInclude.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" template class b3OpenCLArray diff --git a/opencl/parallel_primitives/host/b3PrefixScanCL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp similarity index 94% rename from opencl/parallel_primitives/host/b3PrefixScanCL.cpp rename to src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp index 4a9359e54..6b64ad336 100644 --- a/opencl/parallel_primitives/host/b3PrefixScanCL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp @@ -1,10 +1,10 @@ #include "b3PrefixScanCL.h" #include "b3FillCL.h" -#define B3_PREFIXSCAN_PROG_PATH "opencl/parallel_primitives/kernels/PrefixScanKernels.cl" +#define B3_PREFIXSCAN_PROG_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl" #include "b3LauncherCL.h" -#include "../../basic_initialize/b3OpenCLUtils.h" -#include "../kernels/PrefixScanKernelsCL.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "kernels/PrefixScanKernelsCL.h" b3PrefixScanCL::b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size) :m_commandQueue(queue) diff --git a/opencl/parallel_primitives/host/b3PrefixScanCL.h b/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h similarity index 100% rename from opencl/parallel_primitives/host/b3PrefixScanCL.h rename to src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h diff --git a/opencl/parallel_primitives/host/b3RadixSort32CL.cpp b/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp similarity index 99% rename from opencl/parallel_primitives/host/b3RadixSort32CL.cpp rename to src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp index 9f5345896..9380574cd 100644 --- a/opencl/parallel_primitives/host/b3RadixSort32CL.cpp +++ b/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp @@ -1,13 +1,13 @@ #include "b3RadixSort32CL.h" #include "b3LauncherCL.h" -#include "../../basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "b3PrefixScanCL.h" #include "b3FillCL.h" -#define RADIXSORT32_PATH "opencl/parallel_primitives/kernels/RadixSort32Kernels.cl" +#define RADIXSORT32_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl" -#include "../kernels/RadixSort32KernelsCL.h" +#include "kernels/RadixSort32KernelsCL.h" b3RadixSort32CL::b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity) :m_commandQueue(queue) diff --git a/opencl/parallel_primitives/host/b3RadixSort32CL.h b/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h similarity index 100% rename from opencl/parallel_primitives/host/b3RadixSort32CL.h rename to src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h diff --git a/opencl/parallel_primitives/kernels/BoundSearchKernels.cl b/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl similarity index 100% rename from opencl/parallel_primitives/kernels/BoundSearchKernels.cl rename to src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl diff --git a/opencl/parallel_primitives/kernels/BoundSearchKernelsCL.h b/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h similarity index 100% rename from opencl/parallel_primitives/kernels/BoundSearchKernelsCL.h rename to src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h diff --git a/opencl/parallel_primitives/kernels/CopyKernels.cl b/src/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl similarity index 100% rename from opencl/parallel_primitives/kernels/CopyKernels.cl rename to src/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl diff --git a/opencl/parallel_primitives/kernels/CopyKernelsCL.h b/src/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h similarity index 100% rename from opencl/parallel_primitives/kernels/CopyKernelsCL.h rename to src/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h diff --git a/opencl/parallel_primitives/kernels/FillKernels.cl b/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl similarity index 100% rename from opencl/parallel_primitives/kernels/FillKernels.cl rename to src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl diff --git a/opencl/parallel_primitives/kernels/FillKernelsCL.h b/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h similarity index 100% rename from opencl/parallel_primitives/kernels/FillKernelsCL.h rename to src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h diff --git a/opencl/parallel_primitives/kernels/PrefixScanKernels.cl b/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl similarity index 100% rename from opencl/parallel_primitives/kernels/PrefixScanKernels.cl rename to src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl diff --git a/opencl/parallel_primitives/kernels/PrefixScanKernelsCL.h b/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h similarity index 100% rename from opencl/parallel_primitives/kernels/PrefixScanKernelsCL.h rename to src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h diff --git a/opencl/parallel_primitives/kernels/RadixSort32Kernels.cl b/src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl similarity index 100% rename from opencl/parallel_primitives/kernels/RadixSort32Kernels.cl rename to src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl diff --git a/opencl/parallel_primitives/kernels/RadixSort32KernelsCL.h b/src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h similarity index 100% rename from opencl/parallel_primitives/kernels/RadixSort32KernelsCL.h rename to src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h diff --git a/opencl/gpu_rigidbody/host/b3Config.h b/src/Bullet3OpenCL/RigidBody/b3Config.h similarity index 100% rename from opencl/gpu_rigidbody/host/b3Config.h rename to src/Bullet3OpenCL/RigidBody/b3Config.h diff --git a/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp similarity index 96% rename from opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp rename to src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp index 76aef9a9f..e54e455ff 100644 --- a/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.cpp @@ -1,30 +1,30 @@ #include "b3GpuBatchingPgsSolver.h" -#include "../../parallel_primitives/host/b3RadixSort32CL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" #include "Bullet3Common/b3Quickprof.h" -#include "../../parallel_primitives/host/b3LauncherCL.h" -#include "../../parallel_primitives/host/b3BoundSearchCL.h" -#include "../../parallel_primitives/host/b3PrefixScanCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" #include -#include "../../basic_initialize/b3OpenCLUtils.h" -#include "../host/b3Config.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "b3Config.h" #include "b3Solver.h" -#define B3_SOLVER_SETUP_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup.cl" -#define B3_SOLVER_SETUP2_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup2.cl" -#define B3_SOLVER_CONTACT_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveContact.cl" -#define B3_SOLVER_FRICTION_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveFriction.cl" -#define BATCHING_PATH "opencl/gpu_rigidbody/kernels/batchingKernels.cl" -#define BATCHING_NEW_PATH "opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl" +#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" +#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" +#define B3_SOLVER_CONTACT_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" +#define B3_SOLVER_FRICTION_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" +#define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" +#define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" -#include "../kernels/solverSetup.h" -#include "../kernels/solverSetup2.h" -#include "../kernels/solveContact.h" -#include "../kernels/solveFriction.h" -#include "../kernels/batchingKernels.h" -#include "../kernels/batchingKernelsNew.h" +#include "kernels/solverSetup.h" +#include "kernels/solverSetup2.h" +#include "kernels/solveContact.h" +#include "kernels/solveFriction.h" +#include "kernels/batchingKernels.h" +#include "kernels/batchingKernelsNew.h" @@ -37,8 +37,8 @@ enum }; -bool gpuBatchContacts = true;//true; -bool gpuSolveConstraint = true;//true; +bool b3GpuBatchContacts = true;//true; +bool b3GpuSolveConstraint = true;//true; struct b3GpuBatchingPgsSolverInternalData @@ -167,7 +167,7 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic } { - cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH); + cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, B3_BATCHING_PATH); b3Assert(batchingProg); m_data->m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros ); @@ -175,7 +175,7 @@ b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id devic } { - cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH); + cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, B3_BATCHING_NEW_PATH); b3Assert(batchingNewProg); m_data->m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros ); @@ -588,7 +588,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem bool compareGPU = false; if (nContacts) { - if (gpuBatchContacts) + if (b3GpuBatchContacts) { B3_PROFILE("gpu batchContacts"); maxNumBatches = 50;//250; @@ -680,7 +680,7 @@ void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem if (1) { m_data->m_solverGPU->m_nIterations = 4;//10 - if (gpuSolveConstraint) + if (b3GpuSolveConstraint) { B3_PROFILE("GPU solveContactConstraint"); diff --git a/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.h b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.h similarity index 92% rename from opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.h rename to src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.h index 8ecbeaaf9..4c93d0964 100644 --- a/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuBatchingPgsSolver.h @@ -2,8 +2,8 @@ #ifndef B3_GPU_BATCHING_PGS_SOLVER_H #define B3_GPU_BATCHING_PGS_SOLVER_H -#include "../../basic_initialize/b3OpenCLInclude.h" -#include "../../parallel_primitives/host/b3OpenCLArray.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" #include "b3GpuConstraint4.h" diff --git a/opencl/gpu_rigidbody/host/b3GpuConstraint4.h b/src/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h similarity index 100% rename from opencl/gpu_rigidbody/host/b3GpuConstraint4.h rename to src/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h diff --git a/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp similarity index 98% rename from opencl/gpu_rigidbody/host/b3GpuNarrowPhase.cpp rename to src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp index b6d9904a5..e23ead86e 100644 --- a/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp @@ -1,16 +1,16 @@ #include "b3GpuNarrowPhase.h" -#include "parallel_primitives/host/b3OpenCLArray.h" -#include "../../gpu_narrowphase/host/b3ConvexPolyhedronCL.h" -#include "../../gpu_narrowphase/host/b3ConvexHullContact.h" -#include "../../gpu_broadphase/host/b3SapAabb.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" #include #include "b3Config.h" -#include "../../gpu_narrowphase/host/b3OptimizedBvh.h" -#include "../../gpu_narrowphase/host/b3TriangleIndexVertexArray.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h" #include "Bullet3Geometry/b3AabbUtil.h" -#include "../../gpu_narrowphase/host/b3BvhInfo.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h" struct b3GpuNarrowPhaseInternalData { diff --git a/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h similarity index 95% rename from opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h rename to src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h index 78c6da7e9..5453f490d 100644 --- a/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h @@ -1,8 +1,8 @@ #ifndef B3_GPU_NARROWPHASE_H #define B3_GPU_NARROWPHASE_H -#include "../../gpu_narrowphase/host/b3Collidable.h" -#include "basic_initialize/b3OpenCLInclude.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" #include "Bullet3Common/b3AlignedObjectArray.h" #include "Bullet3Common/b3Vector3.h" diff --git a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp similarity index 95% rename from opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp rename to src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp index c0f77abd8..c6c5b445d 100644 --- a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp @@ -1,20 +1,23 @@ #include "b3GpuRigidBodyPipeline.h" #include "b3GpuRigidBodyPipelineInternalData.h" -#include "../kernels/integrateKernel.h" -#include "../kernels/updateAabbsKernel.h" +#include "kernels/integrateKernel.h" +#include "kernels/updateAabbsKernel.h" -#include "../../basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "b3GpuNarrowPhase.h" #include "Bullet3Geometry/b3AabbUtil.h" -#include "../../gpu_broadphase/host/b3SapAabb.h" -#include "../../gpu_broadphase/host/b3GpuSapBroadphase.h" -#include "parallel_primitives/host/b3LauncherCL.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" #include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h" #include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h" //#define TEST_OTHER_GPU_SOLVER +#define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" +#define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" + bool useDbvt = false; bool useBullet2CpuSolver = false;//false; bool dumpContactStats = false; @@ -60,14 +63,14 @@ b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id devic cl_int errNum=0; { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,integrateKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/integrateKernel.cl"); + cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,integrateKernelCL,&errNum,"",B3_RIGIDBODY_INTEGRATE_PATH); b3Assert(errNum==CL_SUCCESS); m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,integrateKernelCL, "integrateTransformsKernel",&errNum,prog); b3Assert(errNum==CL_SUCCESS); clReleaseProgram(prog); } { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,updateAabbsKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl"); + cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,updateAabbsKernelCL,&errNum,"",B3_RIGIDBODY_UPDATEAABB_PATH); b3Assert(errNum==CL_SUCCESS); m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,updateAabbsKernelCL, "initializeGpuAabbsFull",&errNum,prog); b3Assert(errNum==CL_SUCCESS); diff --git a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.h b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h similarity index 94% rename from opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.h rename to src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h index c552105a2..f8eea8122 100644 --- a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h @@ -1,7 +1,7 @@ #ifndef B3_GPU_RIGIDBODY_PIPELINE_H #define B3_GPU_RIGIDBODY_PIPELINE_H -#include "../../basic_initialize/b3OpenCLInclude.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" class b3GpuRigidBodyPipeline { diff --git a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipelineInternalData.h b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h similarity index 81% rename from opencl/gpu_rigidbody/host/b3GpuRigidBodyPipelineInternalData.h rename to src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h index 3104c06cf..1e262bb65 100644 --- a/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipelineInternalData.h +++ b/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h @@ -1,13 +1,13 @@ #ifndef B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H #define B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H -#include "../../basic_initialize/b3OpenCLInclude.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" #include "Bullet3Common/b3AlignedObjectArray.h" -#include "../../parallel_primitives/host/b3OpenCLArray.h" -#include "../../gpu_narrowphase/host/b3Collidable.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" +#include "Bullet3OpenCL/NarrowphaseCollision/b3Collidable.h" -#include "gpu_broadphase/host/b3SapAabb.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" #include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" diff --git a/opencl/gpu_rigidbody/host/b3Solver.cpp b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp similarity index 96% rename from opencl/gpu_rigidbody/host/b3Solver.cpp rename to src/Bullet3OpenCL/RigidBody/b3Solver.cpp index 7d91b4e7d..70e67659f 100644 --- a/opencl/gpu_rigidbody/host/b3Solver.cpp +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -19,28 +19,26 @@ subject to the following restrictions: ///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments bool useNewBatchingKernel = true; -#define B3_SOLVER_SETUP_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup.cl" -#define B3_SOLVER_SETUP2_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup2.cl" - -#define B3_SOLVER_CONTACT_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveContact.cl" -#define B3_SOLVER_FRICTION_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solveFriction.cl" - -#define BATCHING_PATH "opencl/gpu_rigidbody/kernels/batchingKernels.cl" -#define BATCHING_NEW_PATH "opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl" +#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" +#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" +#define B3_SOLVER_CONTACT_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" +#define B3_SOLVER_FRICTION_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" +#define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" +#define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" -#include "../kernels/solverSetup.h" -#include "../kernels/solverSetup2.h" +#include "kernels/solverSetup.h" +#include "kernels/solverSetup2.h" -#include "../kernels/solveContact.h" -#include "../kernels/solveFriction.h" +#include "kernels/solveContact.h" +#include "kernels/solveFriction.h" -#include "../kernels/batchingKernels.h" -#include "../kernels/batchingKernelsNew.h" +#include "kernels/batchingKernels.h" +#include "kernels/batchingKernelsNew.h" #include "Bullet3Common/b3Quickprof.h" -#include "../../parallel_primitives/host/b3LauncherCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" #include "Bullet3Common/b3Vector3.h" struct SolverDebugInfo @@ -161,14 +159,14 @@ b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, } { - cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH); + cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, B3_BATCHING_PATH); b3Assert(batchingProg); m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros ); b3Assert(m_batchingKernel); } { - cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH); + cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, B3_BATCHING_NEW_PATH); b3Assert(batchingNewProg); m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros ); diff --git a/opencl/gpu_rigidbody/host/b3Solver.h b/src/Bullet3OpenCL/RigidBody/b3Solver.h similarity index 90% rename from opencl/gpu_rigidbody/host/b3Solver.h rename to src/Bullet3OpenCL/RigidBody/b3Solver.h index 738bfa629..6df0b2fd8 100644 --- a/opencl/gpu_rigidbody/host/b3Solver.h +++ b/src/Bullet3OpenCL/RigidBody/b3Solver.h @@ -17,17 +17,17 @@ subject to the following restrictions: #ifndef __ADL_SOLVER_H #define __ADL_SOLVER_H -#include "../../parallel_primitives/host/b3OpenCLArray.h" -#include "../host/b3GpuConstraint4.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" +#include "b3GpuConstraint4.h" + #include "Bullet3Collision/NarrowPhaseCollision/b3RigidBodyCL.h" #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "../host/b3GpuConstraint4.h" -#include "../../parallel_primitives/host/b3PrefixScanCL.h" -#include "../../parallel_primitives/host/b3RadixSort32CL.h" -#include "../../parallel_primitives/host/b3BoundSearchCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h" -#include "../../basic_initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #define B3NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment)) diff --git a/opencl/gpu_rigidbody/kernels/batchingKernels.cl b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl similarity index 100% rename from opencl/gpu_rigidbody/kernels/batchingKernels.cl rename to src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl diff --git a/opencl/gpu_rigidbody/kernels/batchingKernels.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h similarity index 100% rename from opencl/gpu_rigidbody/kernels/batchingKernels.h rename to src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h diff --git a/opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl similarity index 100% rename from opencl/gpu_rigidbody/kernels/batchingKernelsNew.cl rename to src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl diff --git a/opencl/gpu_rigidbody/kernels/batchingKernelsNew.h b/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h similarity index 100% rename from opencl/gpu_rigidbody/kernels/batchingKernelsNew.h rename to src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h diff --git a/opencl/gpu_rigidbody/kernels/integrateKernel.cl b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl similarity index 100% rename from opencl/gpu_rigidbody/kernels/integrateKernel.cl rename to src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl diff --git a/opencl/gpu_rigidbody/kernels/integrateKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h similarity index 100% rename from opencl/gpu_rigidbody/kernels/integrateKernel.h rename to src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h diff --git a/opencl/gpu_rigidbody/kernels/solveContact.cl b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl similarity index 100% rename from opencl/gpu_rigidbody/kernels/solveContact.cl rename to src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl diff --git a/opencl/gpu_rigidbody/kernels/solveContact.h b/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h similarity index 100% rename from opencl/gpu_rigidbody/kernels/solveContact.h rename to src/Bullet3OpenCL/RigidBody/kernels/solveContact.h diff --git a/opencl/gpu_rigidbody/kernels/solveFriction.cl b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl similarity index 100% rename from opencl/gpu_rigidbody/kernels/solveFriction.cl rename to src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl diff --git a/opencl/gpu_rigidbody/kernels/solveFriction.h b/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h similarity index 100% rename from opencl/gpu_rigidbody/kernels/solveFriction.h rename to src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h diff --git a/opencl/gpu_rigidbody/kernels/solverSetup.cl b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl similarity index 100% rename from opencl/gpu_rigidbody/kernels/solverSetup.cl rename to src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl diff --git a/opencl/gpu_rigidbody/kernels/solverSetup.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h similarity index 100% rename from opencl/gpu_rigidbody/kernels/solverSetup.h rename to src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h diff --git a/opencl/gpu_rigidbody/kernels/solverSetup2.cl b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl similarity index 100% rename from opencl/gpu_rigidbody/kernels/solverSetup2.cl rename to src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl diff --git a/opencl/gpu_rigidbody/kernels/solverSetup2.h b/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h similarity index 100% rename from opencl/gpu_rigidbody/kernels/solverSetup2.h rename to src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h diff --git a/opencl/gpu_rigidbody/kernels/solverUtils.cl b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl similarity index 100% rename from opencl/gpu_rigidbody/kernels/solverUtils.cl rename to src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl diff --git a/opencl/gpu_rigidbody/kernels/solverUtils.h b/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h similarity index 100% rename from opencl/gpu_rigidbody/kernels/solverUtils.h rename to src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h diff --git a/opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl similarity index 100% rename from opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl rename to src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl diff --git a/opencl/gpu_rigidbody/kernels/updateAabbsKernel.h b/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h similarity index 100% rename from opencl/gpu_rigidbody/kernels/updateAabbsKernel.h rename to src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h diff --git a/opencl/parallel_primitives/host/premake4.lua b/src/Bullet3OpenCL/premake4.lua similarity index 67% rename from opencl/parallel_primitives/host/premake4.lua rename to src/Bullet3OpenCL/premake4.lua index cafa03f31..d4c25f091 100644 --- a/opencl/parallel_primitives/host/premake4.lua +++ b/src/Bullet3OpenCL/premake4.lua @@ -3,15 +3,15 @@ function createProject(vendor) if (hasCL) then - project ("OpenCL_lib_parallel_primitives_host_" .. vendor) + project ("Bullet3OpenCL_" .. vendor) initOpenCL(vendor) kind "StaticLib" - targetdir "../../../lib" + targetdir "../../lib" includedirs { - ".","../../../src" + ".",".." } files { diff --git a/src/Bullet3Serialize/Bullet2FileLoader/premake4.lua b/src/Bullet3Serialize/Bullet2FileLoader/premake4.lua index cbec2a011..a916792b7 100644 --- a/src/Bullet3Serialize/Bullet2FileLoader/premake4.lua +++ b/src/Bullet3Serialize/Bullet2FileLoader/premake4.lua @@ -1,7 +1,7 @@ project "Bullet2FileLoader" kind "StaticLib" - targetdir "../../lib" + targetdir "../../../lib" includedirs { "../../../src" } diff --git a/opencl/basic_initialize/main.cpp b/test/OpenCL/BasicInitialize/main.cpp similarity index 97% rename from opencl/basic_initialize/main.cpp rename to test/OpenCL/BasicInitialize/main.cpp index b708ad1cc..bf8b0ab03 100644 --- a/opencl/basic_initialize/main.cpp +++ b/test/OpenCL/BasicInitialize/main.cpp @@ -15,7 +15,7 @@ subject to the following restrictions: ///original author: Erwin Coumans -#include "b3OpenCLUtils.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include cl_context g_cxMainContext; @@ -94,5 +94,7 @@ int main(int argc, char* argv[]) else { printf("No OpenCL capable GPU found!"); } + printf("press \n"); + getchar(); return 0; } \ No newline at end of file diff --git a/opencl/vector_add/premake4.lua b/test/OpenCL/BasicInitialize/premake4.lua similarity index 54% rename from opencl/vector_add/premake4.lua rename to test/OpenCL/BasicInitialize/premake4.lua index 1bf00306e..fd372df30 100644 --- a/opencl/vector_add/premake4.lua +++ b/test/OpenCL/BasicInitialize/premake4.lua @@ -4,25 +4,28 @@ function createProject(vendor) if (hasCL) then - project ("OpenCL_VectorAdd_" .. vendor) + project ("Test_OpenCL_intialize_" .. vendor) initOpenCL(vendor) language "C++" + kind "ConsoleApp" - targetdir "../../bin" + targetdir "../../../bin" + includedirs {"../../../src"} + files { "main.cpp", - "../basic_initialize/b3OpenCLUtils.cpp", - "../basic_initialize/b3OpenCLUtils.h" + "../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp", + "../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h" } end end +createProject("Apple") createProject("AMD") createProject("Intel") createProject("NVIDIA") -createProject("Apple") diff --git a/test/b3DynamicBvhBroadphase/premake4.lua b/test/b3DynamicBvhBroadphase/premake4.lua index d0e9df3cc..721f91735 100644 --- a/test/b3DynamicBvhBroadphase/premake4.lua +++ b/test/b3DynamicBvhBroadphase/premake4.lua @@ -1,6 +1,6 @@ -project ("b3DynamicBvhBroadphase_test") +project ("Test_b3DynamicBvhBroadphase_test") language "C++"