2012-06-11 18:53:35 +00:00
|
|
|
//
|
|
|
|
// Copyright (C) Pixar. All rights reserved.
|
|
|
|
//
|
|
|
|
// This license governs use of the accompanying software. If you
|
|
|
|
// use the software, you accept this license. If you do not accept
|
|
|
|
// the license, do not use the software.
|
|
|
|
//
|
|
|
|
// 1. Definitions
|
|
|
|
// The terms "reproduce," "reproduction," "derivative works," and
|
|
|
|
// "distribution" have the same meaning here as under U.S.
|
|
|
|
// copyright law. A "contribution" is the original software, or
|
|
|
|
// any additions or changes to the software.
|
|
|
|
// A "contributor" is any person or entity that distributes its
|
|
|
|
// contribution under this license.
|
|
|
|
// "Licensed patents" are a contributor's patent claims that read
|
|
|
|
// directly on its contribution.
|
|
|
|
//
|
|
|
|
// 2. Grant of Rights
|
|
|
|
// (A) Copyright Grant- Subject to the terms of this license,
|
|
|
|
// including the license conditions and limitations in section 3,
|
|
|
|
// each contributor grants you a non-exclusive, worldwide,
|
|
|
|
// royalty-free copyright license to reproduce its contribution,
|
|
|
|
// prepare derivative works of its contribution, and distribute
|
|
|
|
// its contribution or any derivative works that you create.
|
|
|
|
// (B) Patent Grant- Subject to the terms of this license,
|
|
|
|
// including the license conditions and limitations in section 3,
|
|
|
|
// each contributor grants you a non-exclusive, worldwide,
|
|
|
|
// royalty-free license under its licensed patents to make, have
|
|
|
|
// made, use, sell, offer for sale, import, and/or otherwise
|
|
|
|
// dispose of its contribution in the software or derivative works
|
|
|
|
// of the contribution in the software.
|
|
|
|
//
|
|
|
|
// 3. Conditions and Limitations
|
|
|
|
// (A) No Trademark License- This license does not grant you
|
|
|
|
// rights to use any contributor's name, logo, or trademarks.
|
|
|
|
// (B) If you bring a patent claim against any contributor over
|
|
|
|
// patents that you claim are infringed by the software, your
|
|
|
|
// patent license from such contributor to the software ends
|
|
|
|
// automatically.
|
|
|
|
// (C) If you distribute any portion of the software, you must
|
|
|
|
// retain all copyright, patent, trademark, and attribution
|
|
|
|
// notices that are present in the software.
|
|
|
|
// (D) If you distribute any portion of the software in source
|
|
|
|
// code form, you may do so only under this license by including a
|
|
|
|
// complete copy of this license with your distribution. If you
|
|
|
|
// distribute any portion of the software in compiled or object
|
|
|
|
// code form, you may only do so under a license that complies
|
|
|
|
// with this license.
|
|
|
|
// (E) The software is licensed "as-is." You bear the risk of
|
|
|
|
// using it. The contributors give no express warranties,
|
|
|
|
// guarantees or conditions. You may have additional consumer
|
|
|
|
// rights under your local laws which this license cannot change.
|
|
|
|
// To the extent permitted under your local laws, the contributors
|
|
|
|
// exclude the implied warranties of merchantability, fitness for
|
|
|
|
// a particular purpose and non-infringement.
|
|
|
|
//
|
|
|
|
#ifndef OSD_CUDA_INIT_H
|
|
|
|
#define OSD_CUDA_INIT_H
|
|
|
|
|
|
|
|
// From "NVIDIA GPU Computing SDK 4.2/C/common/inc/cutil_inline_runtime.h":
|
|
|
|
|
|
|
|
// Beginning of GPU Architecture definitions
|
|
|
|
inline int _ConvertSMVer2Cores_local(int major, int minor)
|
|
|
|
{
|
|
|
|
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
|
|
|
typedef struct {
|
|
|
|
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
|
|
|
int Cores;
|
|
|
|
} sSMtoCores;
|
|
|
|
|
2012-08-04 02:51:27 +00:00
|
|
|
sSMtoCores nGpuArchCoresPerSM[] =
|
2012-06-11 18:53:35 +00:00
|
|
|
{ { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
|
|
|
|
{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
|
|
|
|
{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
|
|
|
|
{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
|
|
|
|
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
|
|
|
|
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
|
|
|
|
{ 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class
|
|
|
|
{ -1, -1 }
|
|
|
|
};
|
|
|
|
|
|
|
|
int index = 0;
|
|
|
|
while (nGpuArchCoresPerSM[index].SM != -1) {
|
|
|
|
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
|
|
|
|
return nGpuArchCoresPerSM[index].Cores;
|
|
|
|
}
|
|
|
|
index++;
|
|
|
|
}
|
|
|
|
printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
// end of GPU Architecture definitions
|
|
|
|
|
|
|
|
// This function returns the best GPU (with maximum GFLOPS)
|
|
|
|
inline int cutGetMaxGflopsDeviceId()
|
|
|
|
{
|
|
|
|
int current_device = 0, sm_per_multiproc = 0;
|
|
|
|
int max_compute_perf = 0, max_perf_device = 0;
|
|
|
|
int device_count = 0, best_SM_arch = 0;
|
|
|
|
cudaDeviceProp deviceProp;
|
|
|
|
|
|
|
|
cudaGetDeviceCount( &device_count );
|
|
|
|
// Find the best major SM Architecture GPU device
|
|
|
|
while ( current_device < device_count ) {
|
|
|
|
cudaGetDeviceProperties( &deviceProp, current_device );
|
|
|
|
if (deviceProp.major > 0 && deviceProp.major < 9999) {
|
|
|
|
best_SM_arch = std::max(best_SM_arch, deviceProp.major);
|
|
|
|
}
|
|
|
|
current_device++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find the best CUDA capable GPU device
|
|
|
|
current_device = 0;
|
|
|
|
while( current_device < device_count ) {
|
|
|
|
cudaGetDeviceProperties( &deviceProp, current_device );
|
|
|
|
if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
|
|
|
|
sm_per_multiproc = 1;
|
|
|
|
} else {
|
|
|
|
sm_per_multiproc = _ConvertSMVer2Cores_local(deviceProp.major, deviceProp.minor);
|
|
|
|
}
|
|
|
|
int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
|
|
|
|
if( compute_perf > max_compute_perf ) {
|
|
|
|
// If we find GPU with SM major > 2, search only these
|
|
|
|
if ( best_SM_arch > 2 ) {
|
|
|
|
// If our device==dest_SM_arch, choose this, or else pass
|
2012-08-04 02:51:27 +00:00
|
|
|
if (deviceProp.major == best_SM_arch) {
|
2012-06-11 18:53:35 +00:00
|
|
|
max_compute_perf = compute_perf;
|
|
|
|
max_perf_device = current_device;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
max_compute_perf = compute_perf;
|
|
|
|
max_perf_device = current_device;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++current_device;
|
|
|
|
}
|
|
|
|
return max_perf_device;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif //OSD_CUDA_INIT_H
|