bullet3/examples/MultiThreadedDemo/ParallelFor.h

337 lines
7.5 KiB
C
Raw Normal View History

/*
Bullet Continuous Collision Detection and Physics Library
Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include <stdio.h> //printf debugging
#include <algorithm>
// choose threading providers:
#if BT_USE_TBB
#define USE_TBB 1 // use Intel Threading Building Blocks for thread management
#endif
#if BT_USE_PPL
#define USE_PPL 1 // use Microsoft Parallel Patterns Library (installed with Visual Studio 2010 and later)
#endif // BT_USE_PPL
#if BT_USE_OPENMP
#define USE_OPENMP 1 // use OpenMP (also need to change compiler options for OpenMP support)
#endif
#if USE_OPENMP
#include <omp.h>
#endif // #if USE_OPENMP
#if USE_PPL
#include <ppl.h> // if you get a compile error here, check whether your version of Visual Studio includes PPL
// Visual Studio 2010 and later should come with it
#include <concrtrm.h> // for GetProcessorCount()
#endif // #if USE_PPL
#if USE_TBB
#define __TBB_NO_IMPLICIT_LINKAGE 1
#include <tbb/tbb.h>
#include <tbb/task_scheduler_init.h>
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#endif // #if USE_TBB
class TaskManager
{
public:
enum Api
{
apiNone,
apiOpenMP,
apiTbb,
apiPpl,
apiCount
};
static const char* getApiName( Api api )
{
switch ( api )
{
case apiNone: return "None";
case apiOpenMP: return "OpenMP";
case apiTbb: return "Intel TBB";
case apiPpl: return "MS PPL";
default: return "unknown";
}
}
TaskManager()
{
m_api = apiNone;
m_numThreads = 0;
#if USE_TBB
m_tbbSchedulerInit = NULL;
#endif // #if USE_TBB
}
Api getApi() const
{
return m_api;
}
bool isSupported( Api api ) const
{
#if USE_OPENMP
if ( api == apiOpenMP )
{
return true;
}
#endif
#if USE_TBB
if ( api == apiTbb )
{
return true;
}
#endif
#if USE_PPL
if ( api == apiPpl )
{
return true;
}
#endif
// apiNone is always "supported"
return api == apiNone;
}
void setApi( Api api )
{
if (isSupported(api))
{
m_api = api;
}
else
{
// no compile time support for selected API, fallback to "none"
m_api = apiNone;
}
}
static int getMaxNumThreads()
{
#if USE_OPENMP
return omp_get_max_threads();
#elif USE_PPL
return concurrency::GetProcessorCount();
#elif USE_TBB
return tbb::task_scheduler_init::default_num_threads();
#endif
return 1;
}
int getNumThreads() const
{
return m_numThreads;
}
int setNumThreads( int numThreads )
{
m_numThreads = ( std::max )( 1, numThreads );
#if USE_OPENMP
omp_set_num_threads( m_numThreads );
#endif
#if USE_PPL
{
using namespace concurrency;
if ( CurrentScheduler::Id() != -1 )
{
CurrentScheduler::Detach();
}
SchedulerPolicy policy;
policy.SetConcurrencyLimits( m_numThreads, m_numThreads );
CurrentScheduler::Create( policy );
}
#endif
#if USE_TBB
if ( m_tbbSchedulerInit )
{
delete m_tbbSchedulerInit;
m_tbbSchedulerInit = NULL;
}
m_tbbSchedulerInit = new tbb::task_scheduler_init( m_numThreads );
#endif
return m_numThreads;
}
void init()
{
if (m_numThreads == 0)
{
#if USE_PPL
setApi( apiPpl );
#endif
#if USE_TBB
setApi( apiTbb );
#endif
#if USE_OPENMP
setApi( apiOpenMP );
#endif
setNumThreads(getMaxNumThreads());
}
else
{
setNumThreads(m_numThreads);
}
}
void shutdown()
{
#if USE_TBB
if ( m_tbbSchedulerInit )
{
delete m_tbbSchedulerInit;
m_tbbSchedulerInit = NULL;
}
#endif
}
private:
Api m_api;
int m_numThreads;
#if USE_TBB
tbb::task_scheduler_init* m_tbbSchedulerInit;
#endif // #if USE_TBB
};
extern TaskManager gTaskMgr;
inline static void initTaskScheduler()
{
gTaskMgr.init();
}
inline static void cleanupTaskScheduler()
{
gTaskMgr.shutdown();
}
#if USE_TBB
///
/// TbbBodyAdapter -- Converts a body object that implements the
/// "forLoop(int iBegin, int iEnd) const" function
/// into a TBB compatible object that takes a tbb::blocked_range<int> type.
///
template <class TBody>
struct TbbBodyAdapter
{
const TBody* mBody;
void operator()( const tbb::blocked_range<int>& range ) const
{
mBody->forLoop( range.begin(), range.end() );
}
};
#endif // #if USE_TBB
#if USE_PPL
///
/// PplBodyAdapter -- Converts a body object that implements the
/// "forLoop(int iBegin, int iEnd) const" function
/// into a PPL compatible object that implements "void operator()( int ) const"
///
template <class TBody>
struct PplBodyAdapter
{
const TBody* mBody;
int mGrainSize;
int mIndexEnd;
void operator()( int i ) const
{
mBody->forLoop( i, (std::min)(i + mGrainSize, mIndexEnd) );
}
};
#endif // #if USE_PPL
///
/// parallelFor -- interface for submitting work expressed as a for loop to the worker threads
///
template <class TBody>
void parallelFor( int iBegin, int iEnd, int grainSize, const TBody& body )
{
#if USE_OPENMP
if ( gTaskMgr.getApi() == TaskManager::apiOpenMP )
{
#pragma omp parallel for schedule(static, 1)
for ( int i = iBegin; i < iEnd; i += grainSize )
{
body.forLoop( i, (std::min)( i + grainSize, iEnd ) );
}
return;
}
#endif // #if USE_OPENMP
#if USE_PPL
if ( gTaskMgr.getApi() == TaskManager::apiPpl )
{
// PPL dispatch
PplBodyAdapter<TBody> pplBody;
pplBody.mBody = &body;
pplBody.mGrainSize = grainSize;
pplBody.mIndexEnd = iEnd;
// note: MSVC 2010 doesn't support partitioner args, so avoid them
concurrency::parallel_for( iBegin,
iEnd,
grainSize,
pplBody
);
return;
}
#endif //#if USE_PPL
#if USE_TBB
if ( gTaskMgr.getApi() == TaskManager::apiTbb )
{
// TBB dispatch
TbbBodyAdapter<TBody> tbbBody;
tbbBody.mBody = &body;
tbb::parallel_for( tbb::blocked_range<int>( iBegin, iEnd, grainSize ),
tbbBody,
tbb::simple_partitioner()
);
return;
}
#endif // #if USE_TBB
{
// run on main thread
body.forLoop( iBegin, iEnd );
}
}