add some command-line options

--use_jacobi
--allow_opencl_cpu

b3LauncherCL constructor takes string, to make it easier to determine failing OpenCL kernel
b3SetCustomErrorMessageFunc, printf error and exit(0)
This commit is contained in:
erwin coumans 2013-11-19 13:42:53 -08:00
parent 54909160a7
commit 26dfaa441e
27 changed files with 491 additions and 448 deletions

View File

@ -5,6 +5,8 @@
#include "OpenGLWindow/ShapeData.h" #include "OpenGLWindow/ShapeData.h"
#include "OpenGLWindow/GLInstancingRenderer.h" #include "OpenGLWindow/GLInstancingRenderer.h"
bool gAllowCpuOpenCL = false;
GpuDemo::GpuDemo() GpuDemo::GpuDemo()
:m_clData(0) :m_clData(0)
{ {
@ -32,6 +34,8 @@ void GpuDemo::exitCL()
} }
void GpuDemo::initCL(int preferredDeviceIndex, int preferredPlatformIndex) void GpuDemo::initCL(int preferredDeviceIndex, int preferredPlatformIndex)
{ {
void* glCtx=0; void* glCtx=0;
@ -40,11 +44,10 @@ void GpuDemo::initCL(int preferredDeviceIndex, int preferredPlatformIndex)
int ciErrNum = 0; int ciErrNum = 0;
//#ifdef CL_PLATFORM_INTEL
//cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
//#else
cl_device_type deviceType = CL_DEVICE_TYPE_GPU; cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
//#endif if (gAllowCpuOpenCL)
deviceType = CL_DEVICE_TYPE_ALL;

View File

@ -363,7 +363,7 @@ void ParticleDemo::clientMoveAndDisplay()
b3BufferInfoCL( m_data->m_clPositionBuffer) b3BufferInfoCL( m_data->m_clPositionBuffer)
}; };
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel ); b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel,"m_updatePositionsKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numParticles); launcher.setConst( numParticles);
@ -382,7 +382,7 @@ void ParticleDemo::clientMoveAndDisplay()
b3BufferInfoCL( m_data->m_simParamGPU->getBufferCL(),true) b3BufferInfoCL( m_data->m_simParamGPU->getBufferCL(),true)
}; };
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel2 ); b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel2 ,"m_updatePositionsKernel2");
launcher.setConst( numParticles); launcher.setConst( numParticles);
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
@ -401,7 +401,7 @@ void ParticleDemo::clientMoveAndDisplay()
b3BufferInfoCL( m_data->m_broadphaseGPU->getAabbBufferWS()), b3BufferInfoCL( m_data->m_broadphaseGPU->getAabbBufferWS()),
}; };
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbsKernel ); b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbsKernel,"m_updateAabbsKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( m_data->m_simParamCPU[0].m_particleRad); launcher.setConst( m_data->m_simParamCPU[0].m_particleRad);
launcher.setConst( numParticles); launcher.setConst( numParticles);
@ -428,7 +428,7 @@ void ParticleDemo::clientMoveAndDisplay()
b3BufferInfoCL( m_data->m_broadphaseGPU->getOverlappingPairBuffer(),true), b3BufferInfoCL( m_data->m_broadphaseGPU->getOverlappingPairBuffer(),true),
}; };
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_collideParticlesKernel); b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_collideParticlesKernel,"m_collideParticlesKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numPairsGPU); launcher.setConst( numPairsGPU);
launcher.launch1D( numPairsGPU); launcher.launch1D( numPairsGPU);

View File

@ -431,7 +431,7 @@ void PairBench::clientMoveAndDisplay()
if (1) if (1)
{ {
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_sineWaveKernel); b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_sineWaveKernel,"m_sineWaveKernel");
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
launcher.setBuffer(m_data->m_bodyTimes->getBufferCL() ); launcher.setBuffer(m_data->m_bodyTimes->getBufferCL() );
launcher.setConst( numObjects); launcher.setConst( numObjects);
@ -441,7 +441,7 @@ void PairBench::clientMoveAndDisplay()
else else
{ {
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_moveObjectsKernel); b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_moveObjectsKernel,"m_moveObjectsKernel");
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
launcher.setConst( numObjects); launcher.setConst( numObjects);
launcher.launch1D( numObjects); launcher.launch1D( numObjects);
@ -455,7 +455,7 @@ void PairBench::clientMoveAndDisplay()
if (updateOnGpu) if (updateOnGpu)
{ {
B3_PROFILE("updateOnGpu"); B3_PROFILE("updateOnGpu");
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple); b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbSimple,"m_updateAabbSimple");
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
launcher.setConst( numObjects); launcher.setConst( numObjects);
launcher.setBuffer(m_data->m_broadphaseGPU->getAabbBufferWS()); launcher.setBuffer(m_data->m_broadphaseGPU->getAabbBufferWS());
@ -542,7 +542,7 @@ void PairBench::clientMoveAndDisplay()
int numPairs = m_data->m_broadphaseGPU->getNumOverlap(); int numPairs = m_data->m_broadphaseGPU->getNumOverlap();
cl_mem pairBuf = m_data->m_broadphaseGPU->getOverlappingPairBuffer(); cl_mem pairBuf = m_data->m_broadphaseGPU->getOverlappingPairBuffer();
b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_colorPairsKernel); b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_colorPairsKernel,"m_colorPairsKernel");
launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() ); launcher.setBuffer(m_data->m_instancePosOrnColor->getBufferCL() );
launcher.setConst( numObjects); launcher.setConst( numObjects);
launcher.setBuffer( pairBuf); launcher.setBuffer( pairBuf);

View File

@ -57,14 +57,23 @@ extern char OpenSansData[];
extern char* gPairBenchFileName; extern char* gPairBenchFileName;
extern float shadowMapWidth; extern float shadowMapWidth;
extern float shadowMapHeight; extern float shadowMapHeight;
extern bool gDebugLauncherCL;
extern bool gAllowCpuOpenCL;
extern bool gDebugForceLoadingFromSource; extern bool gDebugForceLoadingFromSource;
extern bool gDebugSkipLoadingBinary; extern bool gDebugSkipLoadingBinary;
extern bool useShadowMap; extern bool useShadowMap;
extern float shadowMapWorldSize; extern float shadowMapWorldSize;
extern bool useJacobi; extern bool gUseJacobi;
extern bool useUniformGrid; extern bool useUniformGrid;
extern bool gUseDbvt;
extern bool gDumpContactStats;
extern bool gCalcWorldSpaceAabbOnCpu;
extern bool gUseCalculateOverlappingPairsHost;
extern bool gIntegrateOnCpu;
extern bool gConvertConstraintOnCpu;
static void MyResizeCallback( float width, float height) static void MyResizeCallback( float width, float height)
{ {
g_OpenGLWidth = width; g_OpenGLWidth = width;
@ -91,7 +100,7 @@ enum
}; };
b3AlignedObjectArray<const char*> demoNames; b3AlignedObjectArray<const char*> demoNames;
int selectedDemo = 0; int selectedDemo = 1;
GpuDemo::CreateFunc* allDemos[]= GpuDemo::CreateFunc* allDemos[]=
{ {
//ConcaveCompound2Scene::MyCreateFunc, //ConcaveCompound2Scene::MyCreateFunc,
@ -554,6 +563,11 @@ void writeTextureToPng(int textureWidth, int textureHeight, const char* fileName
#include "Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h" #include "Bullet3Dynamics/ConstraintSolver/b3Generic6DofConstraint.h"
#include "Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h" #include "Bullet3Dynamics/ConstraintSolver/b3Point2PointConstraint.h"
void MyErrorFunc(const char* msg)
{
printf("Error: %s\n",msg);
exit(0);
}
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
@ -566,6 +580,7 @@ int main(int argc, char* argv[])
int sz6 = sizeof(b3Transform); int sz6 = sizeof(b3Transform);
//b3OpenCLUtils::setCachePath("/Users/erwincoumans/develop/mycache"); //b3OpenCLUtils::setCachePath("/Users/erwincoumans/develop/mycache");
b3SetCustomErrorMessageFunc(MyErrorFunc);
b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile); b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile);
b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile); b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile);
@ -608,9 +623,8 @@ int main(int argc, char* argv[])
} }
args.GetCmdLineArgument("pair_benchmark_file",gPairBenchFileName); args.GetCmdLineArgument("pair_benchmark_file",gPairBenchFileName);
useJacobi = args.CheckCmdLineFlag("use_jacobi");
useUniformGrid = args.CheckCmdLineFlag("use_uniform_grid");
gDebugLauncherCL = args.CheckCmdLineFlag("debug_kernel_launch");
dump_timings=args.CheckCmdLineFlag("dump_timings"); dump_timings=args.CheckCmdLineFlag("dump_timings");
ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl"); ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl");
@ -622,6 +636,21 @@ int main(int argc, char* argv[])
ci.m_useInstancedCollisionShapes = !args.CheckCmdLineFlag("no_instanced_collision_shapes"); ci.m_useInstancedCollisionShapes = !args.CheckCmdLineFlag("no_instanced_collision_shapes");
args.GetCmdLineArgument("cl_device", ci.preferredOpenCLDeviceIndex); args.GetCmdLineArgument("cl_device", ci.preferredOpenCLDeviceIndex);
args.GetCmdLineArgument("cl_platform", ci.preferredOpenCLPlatformIndex); args.GetCmdLineArgument("cl_platform", ci.preferredOpenCLPlatformIndex);
gAllowCpuOpenCL = args.CheckCmdLineFlag("allow_opencl_cpu");
gUseJacobi = args.CheckCmdLineFlag("use_jacobi");
gUseDbvt = args.CheckCmdLineFlag("use_dbvt");
gDumpContactStats = args.CheckCmdLineFlag("dump_contact_stats");
gCalcWorldSpaceAabbOnCpu = args.CheckCmdLineFlag("calc_aabb_cpu");
gUseCalculateOverlappingPairsHost = args.CheckCmdLineFlag("calc_pairs_cpu");
gIntegrateOnCpu = args.CheckCmdLineFlag("integrate_cpu");
gConvertConstraintOnCpu = args.CheckCmdLineFlag("convert_constraints_cpu");
useUniformGrid = args.CheckCmdLineFlag("use_uniform_grid");
args.GetCmdLineArgument("x_dim", ci.arraySizeX); args.GetCmdLineArgument("x_dim", ci.arraySizeX);
args.GetCmdLineArgument("y_dim", ci.arraySizeY); args.GetCmdLineArgument("y_dim", ci.arraySizeY);
args.GetCmdLineArgument("z_dim", ci.arraySizeZ); args.GetCmdLineArgument("z_dim", ci.arraySizeZ);

View File

@ -251,7 +251,7 @@ void GpuRigidBodyDemo::clientMoveAndDisplay()
B3_PROFILE("cl2gl_convert"); B3_PROFILE("cl2gl_convert");
int ciErrNum = 0; int ciErrNum = 0;
cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer(); cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer();
b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel); b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel,"m_copyTransformsToVBOKernel");
launch.setBuffer(bodies); launch.setBuffer(bodies);
launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL()); launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL());
launch.setConst(numObjects); launch.setConst(numObjects);

View File

@ -255,7 +255,7 @@ void GpuSoftBodyDemo::clientMoveAndDisplay()
B3_PROFILE("cl2gl_convert"); B3_PROFILE("cl2gl_convert");
int ciErrNum = 0; int ciErrNum = 0;
cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer(); cl_mem bodies = m_data->m_rigidBodyPipeline->getBodyBuffer();
b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel); b3LauncherCL launch(m_clData->m_clQueue,m_data->m_copyTransformsToVBOKernel,"m_copyTransformsToVBOKernel");
launch.setBuffer(bodies); launch.setBuffer(bodies);
launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL()); launch.setBuffer(m_data->m_instancePosOrnColor->getBufferCL());
launch.setConst(numObjects); launch.setConst(numObjects);

View File

@ -163,7 +163,8 @@ public:
cl_device_type deviceType = CL_DEVICE_TYPE_ALL; cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
cl_int errNum; cl_int errNum;
cl_context context = b3OpenCLUtils::createContextFromPlatform(platform,deviceType,&errNum); cl_context context = b3OpenCLUtils::createContextFromPlatform(platform,deviceType,&errNum);
if (context)
{
Gwen::UnicodeString strIn = Gwen::Utility::StringToUnicode(platformInfo.m_platformName); Gwen::UnicodeString strIn = Gwen::Utility::StringToUnicode(platformInfo.m_platformName);
Gwen::UnicodeString txt = Gwen::Utility::Format( L"Platform %d (",i)+strIn + Gwen::Utility::Format(L")"); Gwen::UnicodeString txt = Gwen::Utility::Format( L"Platform %d (",i)+strIn + Gwen::Utility::Format(L")");
@ -305,7 +306,7 @@ public:
} }
} }
} }
}
/* /*

View File

@ -165,7 +165,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()), b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()),
}; };
b3LauncherCL launcher(m_queue, m_copyAabbsKernel ); b3LauncherCL launcher(m_queue, m_copyAabbsKernel,"m_copyAabbsKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numSmallAabbs ); launcher.setConst( numSmallAabbs );
int num = numSmallAabbs; int num = numSmallAabbs;
@ -185,7 +185,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
b3BufferInfoCL( m_largeAabbsGPU.getBufferCL()), b3BufferInfoCL( m_largeAabbsGPU.getBufferCL()),
}; };
b3LauncherCL launcher(m_queue, m_copyAabbsKernel ); b3LauncherCL launcher(m_queue, m_copyAabbsKernel ,"m_copyAabbsKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numLargeAabbs ); launcher.setConst( numLargeAabbs );
int num = numLargeAabbs; int num = numLargeAabbs;
@ -212,7 +212,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
b3BufferInfoCL( m_smallAabbsGPU.getBufferCL() ), b3BufferInfoCL( m_smallAabbsGPU.getBufferCL() ),
b3BufferInfoCL( m_gpuPairs.getBufferCL() ), b3BufferInfoCL( m_gpuPairs.getBufferCL() ),
b3BufferInfoCL(pairCount.getBufferCL())}; b3BufferInfoCL(pairCount.getBufferCL())};
b3LauncherCL launcher(m_queue, m_sap2Kernel); b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numLargeAabbs ); launcher.setConst( numLargeAabbs );
launcher.setConst( numSmallAabbs); launcher.setConst( numSmallAabbs);
@ -239,7 +239,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
m_hashGpu.resize(numSmallAabbs); m_hashGpu.resize(numSmallAabbs);
{ {
B3_PROFILE("kCalcHashAABB"); B3_PROFILE("kCalcHashAABB");
b3LauncherCL launch(m_queue,kCalcHashAABB); b3LauncherCL launch(m_queue,kCalcHashAABB,"kCalcHashAABB");
launch.setConst(numSmallAabbs); launch.setConst(numSmallAabbs);
launch.setBuffer(m_smallAabbsGPU.getBufferCL()); launch.setBuffer(m_smallAabbsGPU.getBufferCL());
launch.setBuffer(m_hashGpu.getBufferCL()); launch.setBuffer(m_hashGpu.getBufferCL());
@ -256,7 +256,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
{ {
B3_PROFILE("kClearCellStart"); B3_PROFILE("kClearCellStart");
b3LauncherCL launch(m_queue,kClearCellStart); b3LauncherCL launch(m_queue,kClearCellStart,"kClearCellStart");
launch.setConst(numCells); launch.setConst(numCells);
launch.setBuffer(m_cellStartGpu.getBufferCL()); launch.setBuffer(m_cellStartGpu.getBufferCL());
launch.launch1D(numCells); launch.launch1D(numCells);
@ -268,7 +268,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
{ {
B3_PROFILE("kFindCellStart"); B3_PROFILE("kFindCellStart");
b3LauncherCL launch(m_queue,kFindCellStart); b3LauncherCL launch(m_queue,kFindCellStart,"kFindCellStart");
launch.setConst(numSmallAabbs); launch.setConst(numSmallAabbs);
launch.setBuffer(m_hashGpu.getBufferCL()); launch.setBuffer(m_hashGpu.getBufferCL());
launch.setBuffer(m_cellStartGpu.getBufferCL()); launch.setBuffer(m_cellStartGpu.getBufferCL());
@ -303,7 +303,7 @@ void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
pairStartCurGpu.copyFromHost(pairStartCpu); pairStartCurGpu.copyFromHost(pairStartCpu);
b3LauncherCL launch(m_queue,kFindOverlappingPairs); b3LauncherCL launch(m_queue,kFindOverlappingPairs,"kFindOverlappingPairs");
launch.setConst(numSmallAabbs); launch.setConst(numSmallAabbs);
launch.setBuffer(m_smallAabbsGPU.getBufferCL()); launch.setBuffer(m_smallAabbsGPU.getBufferCL());
launch.setBuffer(m_hashGpu.getBufferCL()); launch.setBuffer(m_hashGpu.getBufferCL());

View File

@ -461,7 +461,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
{ {
B3_PROFILE("launch1D"); B3_PROFILE("launch1D");
b3LauncherCL launcher(m_queue, m_computePairsIncremental3dSapKernel); b3LauncherCL launcher(m_queue, m_computePairsIncremental3dSapKernel,"m_computePairsIncremental3dSapKernel");
launcher.setBuffer(m_objectMinMaxIndexGPUaxis0.getBufferCL()); launcher.setBuffer(m_objectMinMaxIndexGPUaxis0.getBufferCL());
launcher.setBuffer(m_objectMinMaxIndexGPUaxis1.getBufferCL()); launcher.setBuffer(m_objectMinMaxIndexGPUaxis1.getBufferCL());
launcher.setBuffer(m_objectMinMaxIndexGPUaxis2.getBufferCL()); launcher.setBuffer(m_objectMinMaxIndexGPUaxis2.getBufferCL());
@ -1040,7 +1040,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()), b3BufferInfoCL( m_smallAabbsGPU.getBufferCL()),
}; };
b3LauncherCL launcher(m_queue, m_copyAabbsKernel ); b3LauncherCL launcher(m_queue, m_copyAabbsKernel ,"m_copyAabbsKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numSmallAabbs ); launcher.setConst( numSmallAabbs );
int num = numSmallAabbs; int num = numSmallAabbs;
@ -1063,7 +1063,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
m_sum2.at(numSmallAabbs)=b3MakeVector3(0,0,0); //slow? m_sum2.at(numSmallAabbs)=b3MakeVector3(0,0,0); //slow?
} }
b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel ); b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel ,"m_prepareSumVarianceKernel");
launcher.setBuffer(m_smallAabbsGPU.getBufferCL()); launcher.setBuffer(m_smallAabbsGPU.getBufferCL());
launcher.setBuffer(m_sum.getBufferCL()); launcher.setBuffer(m_sum.getBufferCL());
launcher.setBuffer(m_sum2.getBufferCL()); launcher.setBuffer(m_sum2.getBufferCL());
@ -1117,7 +1117,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
b3BufferInfoCL( m_largeAabbsGPU.getBufferCL()), b3BufferInfoCL( m_largeAabbsGPU.getBufferCL()),
}; };
b3LauncherCL launcher(m_queue, m_copyAabbsKernel ); b3LauncherCL launcher(m_queue, m_copyAabbsKernel ,"m_copyAabbsKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numLargeAabbs ); launcher.setConst( numLargeAabbs );
int num = numLargeAabbs; int num = numLargeAabbs;
@ -1140,7 +1140,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
{ {
B3_PROFILE("flipFloatKernel"); B3_PROFILE("flipFloatKernel");
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), b3BufferInfoCL( m_gpuSmallSortData.getBufferCL())}; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), b3BufferInfoCL( m_gpuSmallSortData.getBufferCL())};
b3LauncherCL launcher(m_queue, m_flipFloatKernel ); b3LauncherCL launcher(m_queue, m_flipFloatKernel ,"m_flipFloatKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numSmallAabbs ); launcher.setConst( numSmallAabbs );
launcher.setConst( axis ); launcher.setConst( axis );
@ -1162,7 +1162,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
{ {
B3_PROFILE("scatterKernel"); B3_PROFILE("scatterKernel");
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), b3BufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())}; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), b3BufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())};
b3LauncherCL launcher(m_queue, m_scatterKernel ); b3LauncherCL launcher(m_queue, m_scatterKernel ,"m_scatterKernel ");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numSmallAabbs); launcher.setConst( numSmallAabbs);
int num = numSmallAabbs; int num = numSmallAabbs;
@ -1184,7 +1184,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
{ {
B3_PROFILE("sap2Kernel"); B3_PROFILE("sap2Kernel");
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_largeAabbsGPU.getBufferCL() ),b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())}; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_largeAabbsGPU.getBufferCL() ),b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())};
b3LauncherCL launcher(m_queue, m_sap2Kernel); b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numLargeAabbs ); launcher.setConst( numLargeAabbs );
launcher.setConst( numSmallAabbs); launcher.setConst( numSmallAabbs);
@ -1205,7 +1205,7 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
{ {
B3_PROFILE("sapKernel"); B3_PROFILE("sapKernel");
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())}; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())};
b3LauncherCL launcher(m_queue, m_sapKernel); b3LauncherCL launcher(m_queue, m_sapKernel,"m_sapKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numSmallAabbs ); launcher.setConst( numSmallAabbs );
launcher.setConst( axis ); launcher.setConst( axis );

View File

@ -2898,11 +2898,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
{ {
//printf("hostPairs[i].z=%d\n",hostPairs[i].z); //printf("hostPairs[i].z=%d\n",hostPairs[i].z);
//int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,
// hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,
hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity, // hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,
oldHostContacts); // oldHostContacts);
if (contactIndex>=0) if (contactIndex>=0)
@ -2954,7 +2954,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( m_totalContactsOut.getBufferCL()) b3BufferInfoCL( m_totalContactsOut.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_primitiveContactsKernel); b3LauncherCL launcher(m_queue, m_primitiveContactsKernel,"m_primitiveContactsKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nPairs ); launcher.setConst( nPairs );
launcher.setConst(maxContactCapacity); launcher.setConst(maxContactCapacity);
@ -3020,7 +3020,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()) b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel); b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel,"m_findSeparatingAxisKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nPairs ); launcher.setConst( nPairs );
@ -3043,7 +3043,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
numConcavePairs = m_numConcavePairsOut.at(0); numConcavePairs = m_numConcavePairsOut.at(0);
b3LauncherCL launcher(m_queue, m_bvhTraversalKernel); b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel");
launcher.setBuffer( pairs->getBufferCL()); launcher.setBuffer( pairs->getBufferCL());
launcher.setBuffer( bodyBuf->getBufferCL()); launcher.setBuffer( bodyBuf->getBufferCL());
launcher.setBuffer( gpuCollidables.getBufferCL()); launcher.setBuffer( gpuCollidables.getBufferCL());
@ -3088,7 +3088,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( m_concaveSepNormals.getBufferCL()) b3BufferInfoCL( m_concaveSepNormals.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel); b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numConcavePairs ); launcher.setConst( numConcavePairs );
@ -3132,7 +3132,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL(bvhInfo->getBufferCL()) b3BufferInfoCL(bvhInfo->getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel); b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel,"m_findCompoundPairsKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nPairs ); launcher.setConst( nPairs );
launcher.setConst( compoundPairCapacity); launcher.setConst( compoundPairCapacity);
@ -3263,7 +3263,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( m_totalContactsOut.getBufferCL()) b3BufferInfoCL( m_totalContactsOut.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel); b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel,"m_processCompoundPairsPrimitivesKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numCompoundPairs ); launcher.setConst( numCompoundPairs );
launcher.setConst(maxContactCapacity); launcher.setConst(maxContactCapacity);
@ -3302,7 +3302,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL()) b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel); b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel,"m_processCompoundPairsKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numCompoundPairs ); launcher.setConst( numCompoundPairs );
@ -3348,7 +3348,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( m_totalContactsOut.getBufferCL()) b3BufferInfoCL( m_totalContactsOut.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel); b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel,"m_findConcaveSphereContactsKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numConcavePairs ); launcher.setConst( numConcavePairs );
@ -3406,7 +3406,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()),
b3BufferInfoCL( m_totalContactsOut.getBufferCL()) b3BufferInfoCL( m_totalContactsOut.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel); b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( numConcavePairs ); launcher.setConst( numConcavePairs );
int num = numConcavePairs; int num = numConcavePairs;
@ -3474,7 +3474,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( worldVertsB1GPU.getBufferCL()) b3BufferInfoCL( worldVertsB1GPU.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_findClippingFacesKernel); b3LauncherCL launcher(m_queue, m_findClippingFacesKernel,"m_findClippingFacesKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( vertexFaceCapacity); launcher.setConst( vertexFaceCapacity);
launcher.setConst( nPairs ); launcher.setConst( nPairs );
@ -3509,7 +3509,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( m_totalContactsOut.getBufferCL()) b3BufferInfoCL( m_totalContactsOut.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_clipFacesAndContactReductionKernel); b3LauncherCL launcher(m_queue, m_clipFacesAndContactReductionKernel,"m_clipFacesAndContactReductionKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(vertexFaceCapacity); launcher.setConst(vertexFaceCapacity);
@ -3552,7 +3552,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( m_totalContactsOut.getBufferCL()) b3BufferInfoCL( m_totalContactsOut.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_newContactReductionKernel); b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(vertexFaceCapacity); launcher.setConst(vertexFaceCapacity);
launcher.setConst( nPairs ); launcher.setConst( nPairs );
@ -3588,7 +3588,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()),
b3BufferInfoCL( m_totalContactsOut.getBufferCL()) b3BufferInfoCL( m_totalContactsOut.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_clipHullHullKernel); b3LauncherCL launcher(m_queue, m_clipHullHullKernel,"m_clipHullHullKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nPairs ); launcher.setConst( nPairs );
launcher.setConst(maxContactCapacity); launcher.setConst(maxContactCapacity);
@ -3625,7 +3625,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()),
b3BufferInfoCL( m_totalContactsOut.getBufferCL()) b3BufferInfoCL( m_totalContactsOut.getBufferCL())
}; };
b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel); b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel,"m_clipCompoundsHullHullKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nCompoundsPairs ); launcher.setConst( nCompoundsPairs );
launcher.setConst(maxContactCapacity); launcher.setConst(maxContactCapacity);

View File

@ -87,7 +87,7 @@ void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCL
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL()) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL()) };
b3LauncherCL launcher( m_queue, m_lowerSortDataKernel ); b3LauncherCL launcher( m_queue, m_lowerSortDataKernel,"m_lowerSortDataKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nSrc ); launcher.setConst( nSrc );
launcher.setConst( nDst ); launcher.setConst( nDst );
@ -98,7 +98,7 @@ void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCL
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) };
b3LauncherCL launcher(m_queue, m_upperSortDataKernel ); b3LauncherCL launcher(m_queue, m_upperSortDataKernel,"m_upperSortDataKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nSrc ); launcher.setConst( nSrc );
launcher.setConst( nDst ); launcher.setConst( nDst );
@ -122,7 +122,7 @@ void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCL
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_upper->getBufferCL(), true ), b3BufferInfoCL( m_lower->getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_upper->getBufferCL(), true ), b3BufferInfoCL( m_lower->getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) };
b3LauncherCL launcher( m_queue, m_subtractKernel ); b3LauncherCL launcher( m_queue, m_subtractKernel ,"m_subtractKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( nSrc ); launcher.setConst( nSrc );
launcher.setConst( nDst ); launcher.setConst( nDst );

View File

@ -47,7 +47,7 @@ void b3FillCL::execute(b3OpenCLArray<float>& src, const float value, int n, int
b3Assert( n>0 ); b3Assert( n>0 );
{ {
b3LauncherCL launcher( m_commandQueue, m_fillFloatKernel ); b3LauncherCL launcher( m_commandQueue, m_fillFloatKernel,"m_fillFloatKernel" );
launcher.setBuffer( src.getBufferCL()); launcher.setBuffer( src.getBufferCL());
launcher.setConst( n ); launcher.setConst( n );
launcher.setConst( value ); launcher.setConst( value );
@ -63,7 +63,7 @@ void b3FillCL::execute(b3OpenCLArray<int>& src, const int value, int n, int offs
{ {
b3LauncherCL launcher( m_commandQueue, m_fillIntKernel ); b3LauncherCL launcher( m_commandQueue, m_fillIntKernel ,"m_fillIntKernel");
launcher.setBuffer(src.getBufferCL()); launcher.setBuffer(src.getBufferCL());
launcher.setConst( n); launcher.setConst( n);
launcher.setConst( value); launcher.setConst( value);
@ -80,7 +80,7 @@ void b3FillCL::execute(b3OpenCLArray<unsigned int>& src, const unsigned int valu
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel ); b3LauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel,"m_fillUnsignedIntKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( n ); launcher.setConst( n );
launcher.setConst(value); launcher.setConst(value);
@ -114,7 +114,7 @@ void b3FillCL::execute(b3OpenCLArray<b3Int2> &src, const b3Int2 &value, int n, i
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) };
b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2); b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2,"m_fillKernelInt2");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst(n); launcher.setConst(n);
launcher.setConst(value); launcher.setConst(value);

View File

@ -0,0 +1,287 @@
#include "b3LauncherCL.h"
bool gDebugLauncherCL = false;
b3LauncherCL::b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name)
:m_commandQueue(queue),
m_kernel(kernel),
m_idx(0),
m_enableSerialization(false),
m_name(name)
{
if (gDebugLauncherCL)
{
static int counter = 0;
printf("[%d] Prepare to launch OpenCL kernel %s\n", counter++, name);
}
m_serializationSizeInBytes = sizeof(int);
}
b3LauncherCL::~b3LauncherCL()
{
for (int i=0;i<m_arrays.size();i++)
{
clReleaseMemObject(m_arrays[i]->getBufferCL());
}
if (gDebugLauncherCL)
{
static int counter = 0;
printf("[%d] Finished launching OpenCL kernel %s [%d]\n", counter++,m_name);
}
}
void b3LauncherCL::setBuffer( cl_mem clBuffer)
{
if (m_enableSerialization)
{
b3KernelArgData kernelArg;
kernelArg.m_argIndex = m_idx;
kernelArg.m_isBuffer = 1;
kernelArg.m_clBuffer = clBuffer;
cl_mem_info param_name = CL_MEM_SIZE;
size_t param_value;
size_t sizeInBytes = sizeof(size_t);
size_t actualSizeInBytes;
cl_int err;
err = clGetMemObjectInfo ( kernelArg.m_clBuffer,
param_name,
sizeInBytes,
&param_value,
&actualSizeInBytes);
b3Assert( err == CL_SUCCESS );
kernelArg.m_argSizeInBytes = param_value;
m_kernelArguments.push_back(kernelArg);
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
m_serializationSizeInBytes+=param_value;
}
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &clBuffer);
b3Assert( status == CL_SUCCESS );
}
void b3LauncherCL::setBuffers( b3BufferInfoCL* buffInfo, int n )
{
for(int i=0; i<n; i++)
{
if (m_enableSerialization)
{
b3KernelArgData kernelArg;
kernelArg.m_argIndex = m_idx;
kernelArg.m_isBuffer = 1;
kernelArg.m_clBuffer = buffInfo[i].m_clBuffer;
cl_mem_info param_name = CL_MEM_SIZE;
size_t param_value;
size_t sizeInBytes = sizeof(size_t);
size_t actualSizeInBytes;
cl_int err;
err = clGetMemObjectInfo ( kernelArg.m_clBuffer,
param_name,
sizeInBytes,
&param_value,
&actualSizeInBytes);
b3Assert( err == CL_SUCCESS );
kernelArg.m_argSizeInBytes = param_value;
m_kernelArguments.push_back(kernelArg);
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
m_serializationSizeInBytes+=param_value;
}
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &buffInfo[i].m_clBuffer);
b3Assert( status == CL_SUCCESS );
}
}
int b3LauncherCL::deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx)
{
int index=0;
int numArguments = *(int*) &buf[index];
index+=sizeof(int);
for (int i=0;i<numArguments;i++)
{
b3KernelArgData* arg = (b3KernelArgData*)&buf[index];
index+=sizeof(b3KernelArgData);
if (arg->m_isBuffer)
{
b3OpenCLArray<unsigned char>* clData = new b3OpenCLArray<unsigned char>(ctx,m_commandQueue, arg->m_argSizeInBytes);
clData->resize(arg->m_argSizeInBytes);
clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes);
arg->m_clBuffer = clData->getBufferCL();
m_arrays.push_back(clData);
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer);
b3Assert( status == CL_SUCCESS );
index+=arg->m_argSizeInBytes;
} else
{
cl_int status = clSetKernelArg( m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData);
b3Assert( status == CL_SUCCESS );
}
m_kernelArguments.push_back(*arg);
}
m_serializationSizeInBytes = index;
return index;
}
int b3LauncherCL::validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx)
{
int index=0;
int numArguments = *(int*) &goldBuffer[index];
index+=sizeof(int);
if (numArguments != m_kernelArguments.size())
{
printf("failed validation: expected %d arguments, found %d\n",numArguments, m_kernelArguments.size());
return -1;
}
for (int ii=0;ii<numArguments;ii++)
{
b3KernelArgData* argGold = (b3KernelArgData*)&goldBuffer[index];
if (m_kernelArguments[ii].m_argSizeInBytes != argGold->m_argSizeInBytes)
{
printf("failed validation: argument %d sizeInBytes expected: %d, found %d\n",ii, argGold->m_argSizeInBytes, m_kernelArguments[ii].m_argSizeInBytes);
return -2;
}
{
int expected = argGold->m_isBuffer;
int found = m_kernelArguments[ii].m_isBuffer;
if (expected != found)
{
printf("failed validation: argument %d isBuffer expected: %d, found %d\n",ii,expected, found);
return -3;
}
}
index+=sizeof(b3KernelArgData);
if (argGold->m_isBuffer)
{
unsigned char* memBuf= (unsigned char*) malloc(m_kernelArguments[ii].m_argSizeInBytes);
unsigned char* goldBuf = &goldBuffer[index];
for (int j=0;j<m_kernelArguments[j].m_argSizeInBytes;j++)
{
memBuf[j] = 0xaa;
}
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, m_kernelArguments[ii].m_clBuffer, CL_TRUE, 0, m_kernelArguments[ii].m_argSizeInBytes,
memBuf, 0,0,0 );
b3Assert( status==CL_SUCCESS );
clFinish(m_commandQueue);
for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++)
{
int expected = goldBuf[b];
int found = memBuf[b];
if (expected != found)
{
printf("failed validation: argument %d OpenCL data at byte position %d expected: %d, found %d\n",
ii, b, expected, found);
return -4;
}
}
index+=argGold->m_argSizeInBytes;
} else
{
//compare content
for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++)
{
int expected = argGold->m_argData[b];
int found =m_kernelArguments[ii].m_argData[b];
if (expected != found)
{
printf("failed validation: argument %d const data at byte position %d expected: %d, found %d\n",
ii, b, expected, found);
return -5;
}
}
}
}
return index;
}
int b3LauncherCL::serializeArguments(unsigned char* destBuffer, int destBufferCapacity)
{
//initialize to known values
for (int i=0;i<destBufferCapacity;i++)
destBuffer[i] = 0xec;
assert(destBufferCapacity>=m_serializationSizeInBytes);
//todo: use the b3Serializer for this to allow for 32/64bit, endianness etc
int numArguments = m_kernelArguments.size();
int curBufferSize = 0;
int* dest = (int*)&destBuffer[curBufferSize];
*dest = numArguments;
curBufferSize += sizeof(int);
for (int i=0;i<this->m_kernelArguments.size();i++)
{
b3KernelArgData* arg = (b3KernelArgData*) &destBuffer[curBufferSize];
*arg = m_kernelArguments[i];
curBufferSize+=sizeof(b3KernelArgData);
if (arg->m_isBuffer==1)
{
//copy the OpenCL buffer content
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes,
&destBuffer[curBufferSize], 0,0,0 );
b3Assert( status==CL_SUCCESS );
clFinish(m_commandQueue);
curBufferSize+=arg->m_argSizeInBytes;
}
}
return curBufferSize;
}
void b3LauncherCL::serializeToFile(const char* fileName, int numWorkItems)
{
int num = numWorkItems;
int buffSize = getSerializationBufferSize();
unsigned char* buf = new unsigned char[buffSize+sizeof(int)];
for (int i=0;i<buffSize+1;i++)
{
unsigned char* ptr = (unsigned char*)&buf[i];
*ptr = 0xff;
}
// int actualWrite = serializeArguments(buf,buffSize);
// unsigned char* cptr = (unsigned char*)&buf[buffSize];
// printf("buf[buffSize] = %d\n",*cptr);
assert(buf[buffSize]==0xff);//check for buffer overrun
int* ptr = (int*)&buf[buffSize];
*ptr = num;
FILE* f = fopen(fileName,"wb");
fwrite(buf,buffSize+sizeof(int),1,f);
fclose(f);
delete[] buf;
}

View File

@ -39,286 +39,31 @@ class b3LauncherCL
int m_serializationSizeInBytes; int m_serializationSizeInBytes;
bool m_enableSerialization; bool m_enableSerialization;
const char* m_name;
public: public:
b3AlignedObjectArray<b3OpenCLArray<unsigned char>* > m_arrays; b3AlignedObjectArray<b3OpenCLArray<unsigned char>* > m_arrays;
b3LauncherCL(cl_command_queue queue, cl_kernel kernel) b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name);
:m_commandQueue(queue),
m_kernel(kernel),
m_idx(0),
m_enableSerialization(false)
{
m_serializationSizeInBytes = sizeof(int);
}
virtual ~b3LauncherCL() virtual ~b3LauncherCL();
{
for (int i=0;i<m_arrays.size();i++)
{
clReleaseMemObject(m_arrays[i]->getBufferCL());
}
}
inline void setBuffer( cl_mem clBuffer) void setBuffer( cl_mem clBuffer);
{
if (m_enableSerialization)
{
b3KernelArgData kernelArg;
kernelArg.m_argIndex = m_idx;
kernelArg.m_isBuffer = 1;
kernelArg.m_clBuffer = clBuffer;
cl_mem_info param_name = CL_MEM_SIZE; void setBuffers( b3BufferInfoCL* buffInfo, int n );
size_t param_value;
size_t sizeInBytes = sizeof(size_t);
size_t actualSizeInBytes;
cl_int err;
err = clGetMemObjectInfo ( kernelArg.m_clBuffer,
param_name,
sizeInBytes,
&param_value,
&actualSizeInBytes);
b3Assert( err == CL_SUCCESS );
kernelArg.m_argSizeInBytes = param_value;
m_kernelArguments.push_back(kernelArg);
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
m_serializationSizeInBytes+=param_value;
}
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &clBuffer);
b3Assert( status == CL_SUCCESS );
}
inline void setBuffers( b3BufferInfoCL* buffInfo, int n )
{
for(int i=0; i<n; i++)
{
if (m_enableSerialization)
{
b3KernelArgData kernelArg;
kernelArg.m_argIndex = m_idx;
kernelArg.m_isBuffer = 1;
kernelArg.m_clBuffer = buffInfo[i].m_clBuffer;
cl_mem_info param_name = CL_MEM_SIZE;
size_t param_value;
size_t sizeInBytes = sizeof(size_t);
size_t actualSizeInBytes;
cl_int err;
err = clGetMemObjectInfo ( kernelArg.m_clBuffer,
param_name,
sizeInBytes,
&param_value,
&actualSizeInBytes);
b3Assert( err == CL_SUCCESS );
kernelArg.m_argSizeInBytes = param_value;
m_kernelArguments.push_back(kernelArg);
m_serializationSizeInBytes+= sizeof(b3KernelArgData);
m_serializationSizeInBytes+=param_value;
}
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &buffInfo[i].m_clBuffer);
b3Assert( status == CL_SUCCESS );
}
}
int getSerializationBufferSize() const int getSerializationBufferSize() const
{ {
return m_serializationSizeInBytes; return m_serializationSizeInBytes;
} }
inline int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx) int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx);
{
int index=0;
int numArguments = *(int*) &buf[index]; inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx);
index+=sizeof(int);
for (int i=0;i<numArguments;i++) int serializeArguments(unsigned char* destBuffer, int destBufferCapacity);
{
b3KernelArgData* arg = (b3KernelArgData*)&buf[index];
index+=sizeof(b3KernelArgData);
if (arg->m_isBuffer)
{
b3OpenCLArray<unsigned char>* clData = new b3OpenCLArray<unsigned char>(ctx,m_commandQueue, arg->m_argSizeInBytes);
clData->resize(arg->m_argSizeInBytes);
clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes);
arg->m_clBuffer = clData->getBufferCL();
m_arrays.push_back(clData);
cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer);
b3Assert( status == CL_SUCCESS );
index+=arg->m_argSizeInBytes;
} else
{
cl_int status = clSetKernelArg( m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData);
b3Assert( status == CL_SUCCESS );
}
m_kernelArguments.push_back(*arg);
}
m_serializationSizeInBytes = index;
return index;
}
inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx)
{
int index=0;
int numArguments = *(int*) &goldBuffer[index];
index+=sizeof(int);
if (numArguments != m_kernelArguments.size())
{
printf("failed validation: expected %d arguments, found %d\n",numArguments, m_kernelArguments.size());
return -1;
}
for (int ii=0;ii<numArguments;ii++)
{
b3KernelArgData* argGold = (b3KernelArgData*)&goldBuffer[index];
if (m_kernelArguments[ii].m_argSizeInBytes != argGold->m_argSizeInBytes)
{
printf("failed validation: argument %d sizeInBytes expected: %d, found %d\n",ii, argGold->m_argSizeInBytes, m_kernelArguments[ii].m_argSizeInBytes);
return -2;
}
{
int expected = argGold->m_isBuffer;
int found = m_kernelArguments[ii].m_isBuffer;
if (expected != found)
{
printf("failed validation: argument %d isBuffer expected: %d, found %d\n",ii,expected, found);
return -3;
}
}
index+=sizeof(b3KernelArgData);
if (argGold->m_isBuffer)
{
unsigned char* memBuf= (unsigned char*) malloc(m_kernelArguments[ii].m_argSizeInBytes);
unsigned char* goldBuf = &goldBuffer[index];
for (int j=0;j<m_kernelArguments[j].m_argSizeInBytes;j++)
{
memBuf[j] = 0xaa;
}
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, m_kernelArguments[ii].m_clBuffer, CL_TRUE, 0, m_kernelArguments[ii].m_argSizeInBytes,
memBuf, 0,0,0 );
b3Assert( status==CL_SUCCESS );
clFinish(m_commandQueue);
for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++)
{
int expected = goldBuf[b];
int found = memBuf[b];
if (expected != found)
{
printf("failed validation: argument %d OpenCL data at byte position %d expected: %d, found %d\n",
ii, b, expected, found);
return -4;
}
}
index+=argGold->m_argSizeInBytes;
} else
{
//compare content
for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++)
{
int expected = argGold->m_argData[b];
int found =m_kernelArguments[ii].m_argData[b];
if (expected != found)
{
printf("failed validation: argument %d const data at byte position %d expected: %d, found %d\n",
ii, b, expected, found);
return -5;
}
}
}
}
return index;
}
inline int serializeArguments(unsigned char* destBuffer, int destBufferCapacity)
{
//initialize to known values
for (int i=0;i<destBufferCapacity;i++)
destBuffer[i] = 0xec;
assert(destBufferCapacity>=m_serializationSizeInBytes);
//todo: use the b3Serializer for this to allow for 32/64bit, endianness etc
int numArguments = m_kernelArguments.size();
int curBufferSize = 0;
int* dest = (int*)&destBuffer[curBufferSize];
*dest = numArguments;
curBufferSize += sizeof(int);
for (int i=0;i<this->m_kernelArguments.size();i++)
{
b3KernelArgData* arg = (b3KernelArgData*) &destBuffer[curBufferSize];
*arg = m_kernelArguments[i];
curBufferSize+=sizeof(b3KernelArgData);
if (arg->m_isBuffer==1)
{
//copy the OpenCL buffer content
cl_int status = 0;
status = clEnqueueReadBuffer( m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes,
&destBuffer[curBufferSize], 0,0,0 );
b3Assert( status==CL_SUCCESS );
clFinish(m_commandQueue);
curBufferSize+=arg->m_argSizeInBytes;
}
}
return curBufferSize;
}
void serializeToFile(const char* fileName, int numWorkItems)
{
int num = numWorkItems;
int buffSize = getSerializationBufferSize();
unsigned char* buf = new unsigned char[buffSize+sizeof(int)];
for (int i=0;i<buffSize+1;i++)
{
unsigned char* ptr = (unsigned char*)&buf[i];
*ptr = 0xff;
}
// int actualWrite = serializeArguments(buf,buffSize);
// unsigned char* cptr = (unsigned char*)&buf[buffSize];
// printf("buf[buffSize] = %d\n",*cptr);
assert(buf[buffSize]==0xff);//check for buffer overrun
int* ptr = (int*)&buf[buffSize];
*ptr = num;
FILE* f = fopen(fileName,"wb");
fwrite(buf,buffSize+sizeof(int),1,f);
fclose(f);
delete[] buf;
}
void serializeToFile(const char* fileName, int numWorkItems);
template<typename T> template<typename T>
inline void setConst( const T& consts ) inline void setConst( const T& consts )

View File

@ -63,7 +63,7 @@ void b3PrefixScanCL::execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<uns
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_localScanKernel ); b3LauncherCL launcher( m_commandQueue, m_localScanKernel,"m_localScanKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer ); launcher.setConst( constBuffer );
launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE ); launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE );
@ -72,7 +72,7 @@ void b3PrefixScanCL::execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<uns
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_blockSumKernel ); b3LauncherCL launcher( m_commandQueue, m_blockSumKernel,"m_blockSumKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer ); launcher.setConst( constBuffer );
launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE ); launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE );
@ -82,7 +82,7 @@ void b3PrefixScanCL::execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<uns
if( numBlocks > 1 ) if( numBlocks > 1 )
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_propagationKernel ); b3LauncherCL launcher( m_commandQueue, m_propagationKernel,"m_propagationKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer ); launcher.setConst( constBuffer );
launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE ); launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE );

View File

@ -63,7 +63,7 @@ void b3PrefixScanFloat4CL::execute(b3OpenCLArray<b3Vector3>& src, b3OpenCLArray<
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_localScanKernel ); b3LauncherCL launcher( m_commandQueue, m_localScanKernel ,"m_localScanKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer ); launcher.setConst( constBuffer );
launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE ); launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE );
@ -72,7 +72,7 @@ void b3PrefixScanFloat4CL::execute(b3OpenCLArray<b3Vector3>& src, b3OpenCLArray<
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_blockSumKernel ); b3LauncherCL launcher( m_commandQueue, m_blockSumKernel ,"m_blockSumKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer ); launcher.setConst( constBuffer );
launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE ); launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE );
@ -82,7 +82,7 @@ void b3PrefixScanFloat4CL::execute(b3OpenCLArray<b3Vector3>& src, b3OpenCLArray<
if( numBlocks > 1 ) if( numBlocks > 1 )
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_propagationKernel ); b3LauncherCL launcher( m_commandQueue, m_propagationKernel ,"m_propagationKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( constBuffer ); launcher.setConst( constBuffer );
launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE ); launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE );

View File

@ -294,7 +294,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sor
if (src->size()) if (src->size())
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) };
b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel); b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel,"m_streamCountSortDataKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata ); launcher.setConst( cdata );
@ -328,7 +328,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sor
if (fastScan) if (fastScan)
{// prefix scan group histogram {// prefix scan group histogram
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel ); b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel,"m_prefixScanKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata ); launcher.setConst( cdata );
launcher.launch1D( 128, 128 ); launcher.launch1D( 128, 128 );
@ -362,7 +362,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sor
if (src->size()) if (src->size())
{// local sort and distribute {// local sort and distribute
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )}; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )};
b3LauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel ); b3LauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel,"m_sortAndScatterSortDataKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata ); launcher.setConst( cdata );
launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); launcher.launch1D( nWGs*WG_SIZE, WG_SIZE );
@ -641,7 +641,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBi
if (src->size()) if (src->size())
{ {
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) };
b3LauncherCL launcher(m_commandQueue, m_streamCountKernel); b3LauncherCL launcher(m_commandQueue, m_streamCountKernel,"m_streamCountKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata ); launcher.setConst( cdata );
@ -662,7 +662,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBi
if (fastScan) if (fastScan)
{// prefix scan group histogram {// prefix scan group histogram
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) };
b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel ); b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel,"m_prefixScanKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata ); launcher.setConst( cdata );
launcher.launch1D( 128, 128 ); launcher.launch1D( 128, 128 );
@ -676,7 +676,7 @@ void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBi
if (src->size()) if (src->size())
{// local sort and distribute {// local sort and distribute
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )}; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )};
b3LauncherCL launcher( m_commandQueue, m_sortAndScatterKernel ); b3LauncherCL launcher( m_commandQueue, m_sortAndScatterKernel ,"m_sortAndScatterKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata ); launcher.setConst( cdata );
launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); launcher.launch1D( nWGs*WG_SIZE, WG_SIZE );

View File

@ -230,7 +230,7 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3Align
{ {
B3_PROFILE("raycast launch1D"); B3_PROFILE("raycast launch1D");
b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel); b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel");
int numRays = rays.size(); int numRays = rays.size();
launcher.setConst(numRays); launcher.setConst(numRays);

View File

@ -790,7 +790,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
{ {
B3_PROFILE("m_countBodiesKernel"); B3_PROFILE("m_countBodiesKernel");
b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel); b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel,"m_countBodiesKernel");
launcher.setBuffer(contactBuf);//manifoldPtr->getBufferCL()); launcher.setBuffer(contactBuf);//manifoldPtr->getBufferCL());
launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
@ -815,7 +815,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
{ {
B3_PROFILE("contactToConstraintSplitKernel"); B3_PROFILE("contactToConstraintSplitKernel");
b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel); b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel,"m_contactToConstraintSplitKernel");
launcher.setBuffer(contactBuf); launcher.setBuffer(contactBuf);
launcher.setBuffer(bodyBuf); launcher.setBuffer(bodyBuf);
launcher.setBuffer(inertiaBuf); launcher.setBuffer(inertiaBuf);
@ -840,11 +840,12 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
{ {
B3_PROFILE("m_clearVelocitiesKernel"); B3_PROFILE("m_clearVelocitiesKernel");
b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel); b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel,"m_clearVelocitiesKernel");
launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
launch.setConst(totalNumSplitBodies); launch.setConst(totalNumSplitBodies);
launch.launch1D(totalNumSplitBodies); launch.launch1D(totalNumSplitBodies);
clFinish(m_queue);
} }
@ -854,7 +855,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
{ {
{ {
B3_PROFILE("m_solveContactKernel"); B3_PROFILE("m_solveContactKernel");
b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel ); b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel,"m_solveContactKernel" );
launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
launcher.setBuffer(bodyBuf); launcher.setBuffer(bodyBuf);
launcher.setBuffer(inertiaBuf); launcher.setBuffer(inertiaBuf);
@ -869,14 +870,14 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
launcher.setConst(numManifolds); launcher.setConst(numManifolds);
launcher.launch1D(numManifolds); launcher.launch1D(numManifolds);
clFinish(m_queue);
} }
{ {
B3_PROFILE("average velocities"); B3_PROFILE("average velocities");
b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel,"m_averageVelocitiesKernel");
launcher.setBuffer(bodyBuf); launcher.setBuffer(bodyBuf);
launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
@ -884,13 +885,13 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
launcher.setConst(numBodies); launcher.setConst(numBodies);
launcher.launch1D(numBodies); launcher.launch1D(numBodies);
clFinish(m_queue);
} }
{ {
B3_PROFILE("m_solveFrictionKernel"); B3_PROFILE("m_solveFrictionKernel");
b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel); b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel,"m_solveFrictionKernel");
launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
launcher.setBuffer(bodyBuf); launcher.setBuffer(bodyBuf);
launcher.setBuffer(inertiaBuf); launcher.setBuffer(inertiaBuf);
@ -905,13 +906,13 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
launcher.setConst(numManifolds); launcher.setConst(numManifolds);
launcher.launch1D(numManifolds); launcher.launch1D(numManifolds);
clFinish(m_queue);
} }
{ {
B3_PROFILE("average velocities"); B3_PROFILE("average velocities");
b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel,"m_averageVelocitiesKernel");
launcher.setBuffer(bodyBuf); launcher.setBuffer(bodyBuf);
launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
@ -919,7 +920,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
launcher.setConst(numBodies); launcher.setConst(numBodies);
launcher.launch1D(numBodies); launcher.launch1D(numBodies);
clFinish(m_queue);
} }
@ -929,7 +930,7 @@ void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_m
{ {
B3_PROFILE("update body velocities"); B3_PROFILE("update body velocities");
b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel); b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel,"m_updateBodyVelocitiesKernel");
launcher.setBuffer(bodyBuf); launcher.setBuffer(bodyBuf);
launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); launcher.setBuffer(m_data->m_bodyCount->getBufferCL());

View File

@ -231,7 +231,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
{ {
B3_PROFILE("m_initSolverBodiesKernel"); B3_PROFILE("m_initSolverBodiesKernel");
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initSolverBodiesKernel); b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initSolverBodiesKernel,"m_initSolverBodiesKernel");
launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setBuffer(gpuBodies->getBufferCL());
launcher.setConst(numBodies); launcher.setConst(numBodies);
@ -280,7 +280,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
{ {
B3_PROFILE("getInfo1Kernel"); B3_PROFILE("getInfo1Kernel");
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo1Kernel); b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo1Kernel,"m_getInfo1Kernel");
launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL());
launcher.setConst(numConstraints); launcher.setConst(numConstraints);
@ -300,7 +300,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
{ {
B3_PROFILE("init batch constraints"); B3_PROFILE("init batch constraints");
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initBatchConstraintsKernel); b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initBatchConstraintsKernel,"m_initBatchConstraintsKernel");
launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
@ -348,7 +348,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3
{ {
{ {
B3_PROFILE("getInfo2Kernel"); B3_PROFILE("getInfo2Kernel");
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo2Kernel); b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo2Kernel,"m_getInfo2Kernel");
launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL());
launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
@ -759,7 +759,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArr
int numConstraintsInBatch*/ int numConstraintsInBatch*/
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_solveJointConstraintRowsKernels); b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_solveJointConstraintRowsKernels,"m_solveJointConstraintRowsKernels");
launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL());
@ -1040,7 +1040,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray<b
if (gpuBreakConstraints) if (gpuBreakConstraints)
{ {
B3_PROFILE("breakViolatedConstraintsKernel"); B3_PROFILE("breakViolatedConstraintsKernel");
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_breakViolatedConstraintsKernel); b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_breakViolatedConstraintsKernel,"m_breakViolatedConstraintsKernel");
launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL());
launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
@ -1090,7 +1090,7 @@ b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray<b
{ {
B3_PROFILE("GPU write back velocities and transforms"); B3_PROFILE("GPU write back velocities and transforms");
b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_writeBackVelocitiesKernel); b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_writeBackVelocitiesKernel,"m_writeBackVelocitiesKernel");
launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setBuffer(gpuBodies->getBufferCL());
launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
launcher.setConst(numBodies); launcher.setConst(numBodies);

View File

@ -1,32 +1,16 @@
//#define USE_CPU
#ifdef USE_CPU
bool b3GpuBatchContacts = false;
bool b3GpuSolveConstraint = false;
bool gpuRadixSort=false;
bool gpuSetSortData = false;
bool optionalSortContactsDeterminism = true; bool gCpuBatchContacts = false;
bool gpuSortContactsDeterminism = false; bool gCpuSolveConstraint = false;
bool useCpuCopyConstraints = true; bool gCpuRadixSort=false;
bool gCpuSetSortData = false;
bool gCpuSortContactsDeterminism = false;
bool gUseCpuCopyConstraints = false;
bool gUseScanHost = false;
bool gReorderContactsOnCpu = false;
bool useScanHost = true; bool optionalSortContactsDeterminism = true;
bool reorderContactsOnCpu = true;
#else
bool b3GpuBatchContacts = true;
bool b3GpuSolveConstraint = true;
bool gpuRadixSort=true;
bool gpuSetSortData = true;
bool optionalSortContactsDeterminism = true;
bool gpuSortContactsDeterminism = true;
bool useCpuCopyConstraints = false;
bool useScanHost = false;
bool reorderContactsOnCpu = false;
#endif
#include "b3GpuPgsContactSolver.h" #include "b3GpuPgsContactSolver.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" #include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
@ -336,7 +320,7 @@ void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArray<b3Rigid
cdata.z = ib; cdata.z = ib;
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveContactKernel ); b3LauncherCL launcher( m_data->m_queue, m_data->m_solveContactKernel,"m_solveContactKernel" );
#if 1 #if 1
b3BufferInfoCL bInfo[] = { b3BufferInfoCL bInfo[] = {
@ -441,7 +425,7 @@ void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArray<b3Rigid
,b3BufferInfoCL(&gpuDebugInfo) ,b3BufferInfoCL(&gpuDebugInfo)
#endif //DEBUG_ME #endif //DEBUG_ME
}; };
b3LauncherCL launcher( m_data->m_queue, m_data->m_solveFrictionKernel ); b3LauncherCL launcher( m_data->m_queue, m_data->m_solveFrictionKernel,"m_solveFrictionKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
//launcher.setConst( cdata.x ); //launcher.setConst( cdata.x );
launcher.setConst( cdata.y ); launcher.setConst( cdata.y );
@ -598,7 +582,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
if (optionalSortContactsDeterminism) if (optionalSortContactsDeterminism)
{ {
if (gpuSortContactsDeterminism) if (!gCpuSortContactsDeterminism)
{ {
B3_PROFILE("GPU Sort contact constraints (determinism)"); B3_PROFILE("GPU Sort contact constraints (determinism)");
@ -608,7 +592,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0); m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0);
{ {
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel); b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel");
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
launcher.setConst(numContacts); launcher.setConst(numContacts);
@ -616,7 +600,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
} }
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
{ {
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel); b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel");
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
launcher.setConst(numContacts); launcher.setConst(numContacts);
@ -624,7 +608,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
} }
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
{ {
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel); b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel");
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
launcher.setConst(numContacts); launcher.setConst(numContacts);
@ -634,7 +618,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
{ {
b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel); b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel");
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
launcher.setConst(numContacts); launcher.setConst(numContacts);
@ -651,7 +635,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
//b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) //b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
// , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; // , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel); b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL());
launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
@ -755,7 +739,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
if (gpuSetSortData) if (!gCpuSetSortData)
{ // 2. set cell idx { // 2. set cell idx
B3_PROFILE("GPU set cell idx"); B3_PROFILE("GPU set cell idx");
struct CB struct CB
@ -779,7 +763,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel ); b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata.m_nContacts ); launcher.setConst( cdata.m_nContacts );
launcher.setConst( cdata.m_scale ); launcher.setConst( cdata.m_scale );
@ -812,7 +796,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
if (gpuRadixSort) if (!gCpuRadixSort)
{ // 3. sort by cell idx { // 3. sort by cell idx
B3_PROFILE("gpuRadixSort"); B3_PROFILE("gpuRadixSort");
//int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; //int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
@ -836,7 +820,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
} }
if (useScanHost) if (gUseScanHost)
{ {
// 4. find entries // 4. find entries
B3_PROFILE("cpuBoundSearch"); B3_PROFILE("cpuBoundSearch");
@ -879,7 +863,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
if (nContacts) if (nContacts)
{ // 5. sort constraints by cellIdx { // 5. sort constraints by cellIdx
if (reorderContactsOnCpu) if (gReorderContactsOnCpu)
{ {
B3_PROFILE("cpu m_reorderContactKernel"); B3_PROFILE("cpu m_reorderContactKernel");
b3AlignedObjectArray<b3SortData> sortDataHost; b3AlignedObjectArray<b3SortData> sortDataHost;
@ -918,7 +902,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel); b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata ); launcher.setConst( cdata );
launcher.launch1D( nContacts, 64 ); launcher.launch1D( nContacts, 64 );
@ -944,7 +928,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
if (nContacts) if (nContacts)
{ {
if (useCpuCopyConstraints) if (gUseCpuCopyConstraints)
{ {
for (int i=0;i<nContacts;i++) for (int i=0;i<nContacts;i++)
{ {
@ -962,7 +946,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() )
}; };
b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel ); b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
launcher.setConst( cdata ); launcher.setConst( cdata );
launcher.launch1D( nContacts, 64 ); launcher.launch1D( nContacts, 64 );
@ -975,7 +959,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
bool compareGPU = false; bool compareGPU = false;
if (nContacts) if (nContacts)
{ {
if (b3GpuBatchContacts) if (!gCpuBatchContacts)
{ {
B3_PROFILE("gpu batchContacts"); B3_PROFILE("gpu batchContacts");
maxNumBatches = 150;//250; maxNumBatches = 150;//250;
@ -984,10 +968,12 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
} else } else
{ {
B3_PROFILE("cpu batchContacts"); B3_PROFILE("cpu batchContacts");
b3AlignedObjectArray<b3Contact4> cpuContacts; static b3AlignedObjectArray<b3Contact4> cpuContacts;
b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
contactsIn->copyToHost(cpuContacts); {
B3_PROFILE("copyToHost");
contactsIn->copyToHost(cpuContacts);
}
b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;
@ -1025,7 +1011,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
} }
} }
clFinish(m_data->m_queue); //clFinish(m_data->m_queue);
} }
{ {
B3_PROFILE("m_contactBuffer->copyFromHost"); B3_PROFILE("m_contactBuffer->copyFromHost");
@ -1063,7 +1049,7 @@ void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
int numIter = 4; int numIter = 4;
m_data->m_solverGPU->m_nIterations = numIter;//10 m_data->m_solverGPU->m_nIterations = numIter;//10
if (b3GpuSolveConstraint) if (!gCpuSolveConstraint)
{ {
B3_PROFILE("GPU solveContactConstraint"); B3_PROFILE("GPU solveContactConstraint");

View File

@ -33,27 +33,15 @@ subject to the following restrictions:
#define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" #define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl"
#define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" #define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl"
bool useBullet2CpuSolver = true;
//choice of contact solver //choice of contact solver
bool useJacobi = false; bool gUseJacobi = false;
bool gUseDbvt = false;
//#define USE_CPU bool gDumpContactStats = false;
#ifdef USE_CPU bool gCalcWorldSpaceAabbOnCpu = false;
bool useDbvt = true; bool gUseCalculateOverlappingPairsHost = false;
bool useBullet2CpuSolver = true; bool gIntegrateOnCpu = false;
bool dumpContactStats = false;
bool calcWorldSpaceAabbOnCpu = true;
bool useCalculateOverlappingPairsHost = true;
bool integrateOnCpu = true;
#else
bool useDbvt = false;
bool useBullet2CpuSolver = true;
bool dumpContactStats = false;
bool calcWorldSpaceAabbOnCpu = false;//true;
bool useCalculateOverlappingPairsHost = false;
bool integrateOnCpu = false;
#endif
#define TEST_OTHER_GPU_SOLVER 1 #define TEST_OTHER_GPU_SOLVER 1
#ifdef TEST_OTHER_GPU_SOLVER #ifdef TEST_OTHER_GPU_SOLVER
@ -241,7 +229,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
//compute overlapping pairs //compute overlapping pairs
{ {
if (useDbvt) if (gUseDbvt)
{ {
{ {
B3_PROFILE("setAabb"); B3_PROFILE("setAabb");
@ -261,7 +249,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs(); numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs();
} else } else
{ {
if (useCalculateOverlappingPairsHost) if (gUseCalculateOverlappingPairsHost)
{ {
m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs); m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs);
} else } else
@ -284,7 +272,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
{ {
cl_mem pairs =0; cl_mem pairs =0;
cl_mem aabbsWS =0; cl_mem aabbsWS =0;
if (useDbvt) if (gUseDbvt)
{ {
B3_PROFILE("m_overlappingPairsGPU->copyFromHost"); B3_PROFILE("m_overlappingPairsGPU->copyFromHost");
m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray()); m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
@ -300,13 +288,13 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
m_data->m_narrowphase->computeContacts(pairs,numPairs,aabbsWS,numBodies); m_data->m_narrowphase->computeContacts(pairs,numPairs,aabbsWS,numBodies);
numContacts = m_data->m_narrowphase->getNumContactsGpu(); numContacts = m_data->m_narrowphase->getNumContactsGpu();
if (useDbvt) if (gUseDbvt)
{ {
///store the cached information (contact locations in the 'z' component) ///store the cached information (contact locations in the 'z' component)
B3_PROFILE("m_overlappingPairsGPU->copyToHost"); B3_PROFILE("m_overlappingPairsGPU->copyToHost");
m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray()); m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
} }
if (dumpContactStats && numContacts) if (gDumpContactStats && numContacts)
{ {
m_data->m_narrowphase->getContactsGpu(); m_data->m_narrowphase->getContactsGpu();
@ -369,7 +357,7 @@ void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
#ifdef TEST_OTHER_GPU_SOLVER #ifdef TEST_OTHER_GPU_SOLVER
if (useJacobi) if (gUseJacobi)
{ {
bool useGpu = true; bool useGpu = true;
if (useGpu) if (useGpu)
@ -453,7 +441,7 @@ void b3GpuRigidBodyPipeline::integrate(float timeStep)
int numBodies = m_data->m_narrowphase->getNumRigidBodies(); int numBodies = m_data->m_narrowphase->getNumRigidBodies();
float angularDamp = 0.99f; float angularDamp = 0.99f;
if (integrateOnCpu) if (gIntegrateOnCpu)
{ {
if(numBodies) if(numBodies)
{ {
@ -470,7 +458,7 @@ void b3GpuRigidBodyPipeline::integrate(float timeStep)
} }
} else } else
{ {
b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel); b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel,"m_integrateTransformsKernel");
launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu()); launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu());
launcher.setConst(numBodies); launcher.setConst(numBodies);
@ -492,12 +480,12 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
if (!numBodies) if (!numBodies)
return; return;
if (calcWorldSpaceAabbOnCpu) if (gCalcWorldSpaceAabbOnCpu)
{ {
if (numBodies) if (numBodies)
{ {
if (useDbvt) if (gUseDbvt)
{ {
m_data->m_allAabbsCPU.resize(numBodies); m_data->m_allAabbsCPU.resize(numBodies);
m_data->m_narrowphase->readbackAllBodiesToCpu(); m_data->m_narrowphase->readbackAllBodiesToCpu();
@ -521,7 +509,7 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
} else } else
{ {
//__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB) //__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB)
b3LauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel); b3LauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel,"m_updateAabbsKernel");
launcher.setConst(numBodies); launcher.setConst(numBodies);
cl_mem bodies = m_data->m_narrowphase->getBodiesGpu(); cl_mem bodies = m_data->m_narrowphase->getBodiesGpu();
launcher.setBuffer(bodies); launcher.setBuffer(bodies);
@ -531,7 +519,7 @@ void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
launcher.setBuffer(localAabbs); launcher.setBuffer(localAabbs);
cl_mem worldAabbs =0; cl_mem worldAabbs =0;
if (useDbvt) if (gUseDbvt)
{ {
worldAabbs = m_data->m_allAabbsGPU->getBufferCL(); worldAabbs = m_data->m_allAabbsGPU->getBufferCL();
} else } else
@ -624,7 +612,7 @@ int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* po
if (bodyIndex>=0) if (bodyIndex>=0)
{ {
if (useDbvt) if (gUseDbvt)
{ {
m_data->m_broadphaseDbvt->createProxy(aabbMin,aabbMax,bodyIndex,0,1,1); m_data->m_broadphaseDbvt->createProxy(aabbMin,aabbMax,bodyIndex,0,1,1);
b3SapAabb aabb; b3SapAabb aabb;

View File

@ -18,7 +18,7 @@ subject to the following restrictions:
///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments ///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments
bool useNewBatchingKernel = true; bool useNewBatchingKernel = true;
bool convertConstraintOnCpu = false; bool gConvertConstraintOnCpu = false;
#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" #define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl"
#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" #define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl"
@ -824,7 +824,7 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
cdata.z = ib; cdata.z = ib;
b3LauncherCL launcher( m_queue, m_solveContactKernel ); b3LauncherCL launcher( m_queue, m_solveContactKernel ,"m_solveContactKernel");
#if 1 #if 1
b3BufferInfoCL bInfo[] = { b3BufferInfoCL bInfo[] = {
@ -929,7 +929,7 @@ void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* body
,b3BufferInfoCL(&gpuDebugInfo) ,b3BufferInfoCL(&gpuDebugInfo)
#endif //DEBUG_ME #endif //DEBUG_ME
}; };
b3LauncherCL launcher( m_queue, m_solveFrictionKernel ); b3LauncherCL launcher( m_queue, m_solveFrictionKernel,"m_solveFrictionKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
//launcher.setConst( cdata.x ); //launcher.setConst( cdata.x );
launcher.setConst( cdata.y ); launcher.setConst( cdata.y );
@ -979,7 +979,7 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff; cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
if (convertConstraintOnCpu) if (gConvertConstraintOnCpu)
{ {
b3AlignedObjectArray<b3RigidBodyCL> gBodies; b3AlignedObjectArray<b3RigidBodyCL> gBodies;
bodyBuf->copyToHost(gBodies); bodyBuf->copyToHost(gBodies);
@ -1031,7 +1031,7 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf
b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()), b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()),
b3BufferInfoCL( contactCOut->getBufferCL() )}; b3BufferInfoCL( contactCOut->getBufferCL() )};
b3LauncherCL launcher( m_queue, m_contactToConstraintKernel ); b3LauncherCL launcher( m_queue, m_contactToConstraintKernel,"m_contactToConstraintKernel" );
launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
//launcher.setConst( cdata ); //launcher.setConst( cdata );
@ -1169,7 +1169,7 @@ void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContact
//b3LauncherCL launcher( m_queue, m_batchingKernel); //b3LauncherCL launcher( m_queue, m_batchingKernel);
cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel; cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel;
b3LauncherCL launcher( m_queue, k); b3LauncherCL launcher( m_queue, k,"*batchingKernel");
if (!useNewBatchingKernel ) if (!useNewBatchingKernel )
{ {
launcher.setBuffer( contacts->getBufferCL() ); launcher.setBuffer( contacts->getBufferCL() );

View File

@ -186,7 +186,7 @@ int main(int argc, char* argv[])
atomicCounter.push_back(0); atomicCounter.push_back(0);
deviceElements.resize(numWorkItems); deviceElements.resize(numWorkItems);
b3LauncherCL run(queue,testKernel); b3LauncherCL run(queue,testKernel,"testKernel");
run.setBuffer(deviceElements.getBufferCL()); run.setBuffer(deviceElements.getBufferCL());
run.setBuffer(deviceContacts.getBufferCL()); run.setBuffer(deviceContacts.getBufferCL());
run.setBuffer(atomicCounter.getBufferCL()); run.setBuffer(atomicCounter.getBufferCL());

View File

@ -22,6 +22,7 @@ function createProject(vendor)
"../../../src/Bullet3Common/b3AlignedAllocator.cpp", "../../../src/Bullet3Common/b3AlignedAllocator.cpp",
"../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h", "../../../src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h",
"../../../src/Bullet3Common/b3Logging.cpp", "../../../src/Bullet3Common/b3Logging.cpp",
"../../../src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp"
} }
end end

View File

@ -27,6 +27,7 @@ function createProject(vendor)
"../../../src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h", "../../../src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h",
"../../../src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp", "../../../src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp",
"../../../src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h", "../../../src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h",
"../../../src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp",
"../../../src/Bullet3Common/b3AlignedAllocator.cpp", "../../../src/Bullet3Common/b3AlignedAllocator.cpp",
"../../../src/Bullet3Common/b3AlignedAllocator.h", "../../../src/Bullet3Common/b3AlignedAllocator.h",
"../../../src/Bullet3Common/b3AlignedObjectArray.h", "../../../src/Bullet3Common/b3AlignedObjectArray.h",

View File

@ -24,6 +24,7 @@ function createProject(vendor)
"../../../src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp", "../../../src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp",
"../../../src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp", "../../../src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp",
"../../../src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp", "../../../src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp",
"../../../src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp",
"../../../src/Bullet3Common/b3AlignedAllocator.cpp", "../../../src/Bullet3Common/b3AlignedAllocator.cpp",
"../../../src/Bullet3Common/b3AlignedAllocator.h", "../../../src/Bullet3Common/b3AlignedAllocator.h",
"../../../src/Bullet3Common/b3AlignedObjectArray.h", "../../../src/Bullet3Common/b3AlignedObjectArray.h",