// // Copyright (C) Pixar. All rights reserved. // // This license governs use of the accompanying software. If you // use the software, you accept this license. If you do not accept // the license, do not use the software. // // 1. Definitions // The terms "reproduce," "reproduction," "derivative works," and // "distribution" have the same meaning here as under U.S. // copyright law. A "contribution" is the original software, or // any additions or changes to the software. // A "contributor" is any person or entity that distributes its // contribution under this license. // "Licensed patents" are a contributor's patent claims that read // directly on its contribution. // // 2. Grant of Rights // (A) Copyright Grant- Subject to the terms of this license, // including the license conditions and limitations in section 3, // each contributor grants you a non-exclusive, worldwide, // royalty-free copyright license to reproduce its contribution, // prepare derivative works of its contribution, and distribute // its contribution or any derivative works that you create. // (B) Patent Grant- Subject to the terms of this license, // including the license conditions and limitations in section 3, // each contributor grants you a non-exclusive, worldwide, // royalty-free license under its licensed patents to make, have // made, use, sell, offer for sale, import, and/or otherwise // dispose of its contribution in the software or derivative works // of the contribution in the software. // // 3. Conditions and Limitations // (A) No Trademark License- This license does not grant you // rights to use any contributor's name, logo, or trademarks. // (B) If you bring a patent claim against any contributor over // patents that you claim are infringed by the software, your // patent license from such contributor to the software ends // automatically. // (C) If you distribute any portion of the software, you must // retain all copyright, patent, trademark, and attribution // notices that are present in the software. // (D) If you distribute any portion of the software in source // code form, you may do so only under this license by including a // complete copy of this license with your distribution. If you // distribute any portion of the software in compiled or object // code form, you may only do so under a license that complies // with this license. // (E) The software is licensed "as-is." You bear the risk of // using it. The contributors give no express warranties, // guarantees or conditions. You may have additional consumer // rights under your local laws which this license cannot change. // To the extent permitted under your local laws, the contributors // exclude the implied warranties of merchantability, fitness for // a particular purpose and non-infringement. // #ifndef OSD_CUDA_INIT_H #define OSD_CUDA_INIT_H #include #include // From "NVIDIA GPU Computing SDK 4.2/C/common/inc/cutil_inline_runtime.h": // Beginning of GPU Architecture definitions inline int _ConvertSMVer2Cores_local(int major, int minor) { // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM typedef struct { int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version int Cores; } sSMtoCores; sSMtoCores nGpuArchCoresPerSM[] = { { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class { 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class { -1, -1 } }; int index = 0; while (nGpuArchCoresPerSM[index].SM != -1) { if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) { return nGpuArchCoresPerSM[index].Cores; } index++; } printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor); return -1; } // end of GPU Architecture definitions // This function returns the best GPU (with maximum GFLOPS) inline int cutGetMaxGflopsDeviceId() { int current_device = 0, sm_per_multiproc = 0; int max_compute_perf = 0, max_perf_device = 0; int device_count = 0, best_SM_arch = 0; cudaDeviceProp deviceProp; cudaGetDeviceCount( &device_count ); // Find the best major SM Architecture GPU device while ( current_device < device_count ) { cudaGetDeviceProperties( &deviceProp, current_device ); if (deviceProp.major > 0 && deviceProp.major < 9999) { best_SM_arch = std::max(best_SM_arch, deviceProp.major); } current_device++; } // Find the best CUDA capable GPU device current_device = 0; while( current_device < device_count ) { cudaGetDeviceProperties( &deviceProp, current_device ); if (deviceProp.major == 9999 && deviceProp.minor == 9999) { sm_per_multiproc = 1; } else { sm_per_multiproc = _ConvertSMVer2Cores_local(deviceProp.major, deviceProp.minor); } int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; if( compute_perf > max_compute_perf ) { // If we find GPU with SM major > 2, search only these if ( best_SM_arch > 2 ) { // If our device==dest_SM_arch, choose this, or else pass if (deviceProp.major == best_SM_arch) { max_compute_perf = compute_perf; max_perf_device = current_device; } } else { max_compute_perf = compute_perf; max_perf_device = current_device; } } ++current_device; } return max_perf_device; } #endif //OSD_CUDA_INIT_H