OpenSubdiv/examples/common/cudaInit.h

//
//     Copyright (C) Pixar. All rights reserved.
//
//     This license governs use of the accompanying software. If you
//     use the software, you accept this license. If you do not accept
//     the license, do not use the software.
//
//     1. Definitions
//     The terms "reproduce," "reproduction," "derivative works," and
//     "distribution" have the same meaning here as under U.S.
//     copyright law.  A "contribution" is the original software, or
//     any additions or changes to the software.
//     A "contributor" is any person or entity that distributes its
//     contribution under this license.
//     "Licensed patents" are a contributor's patent claims that read
//     directly on its contribution.
//
//     2. Grant of Rights
//     (A) Copyright Grant- Subject to the terms of this license,
//     including the license conditions and limitations in section 3,
//     each contributor grants you a non-exclusive, worldwide,
//     royalty-free copyright license to reproduce its contribution,
//     prepare derivative works of its contribution, and distribute
//     its contribution or any derivative works that you create.
//     (B) Patent Grant- Subject to the terms of this license,
//     including the license conditions and limitations in section 3,
//     each contributor grants you a non-exclusive, worldwide,
//     royalty-free license under its licensed patents to make, have
//     made, use, sell, offer for sale, import, and/or otherwise
//     dispose of its contribution in the software or derivative works
//     of the contribution in the software.
//
//     3. Conditions and Limitations
//     (A) No Trademark License- This license does not grant you
//     rights to use any contributor's name, logo, or trademarks.
//     (B) If you bring a patent claim against any contributor over
//     patents that you claim are infringed by the software, your
//     patent license from such contributor to the software ends
//     automatically.
//     (C) If you distribute any portion of the software, you must
//     retain all copyright, patent, trademark, and attribution
//     notices that are present in the software.
//     (D) If you distribute any portion of the software in source
//     code form, you may do so only under this license by including a
//     complete copy of this license with your distribution. If you
//     distribute any portion of the software in compiled or object
//     code form, you may only do so under a license that complies
//     with this license.
//     (E) The software is licensed "as-is." You bear the risk of
//     using it. The contributors give no express warranties,
//     guarantees or conditions. You may have additional consumer
//     rights under your local laws which this license cannot change.
//     To the extent permitted under your local laws, the contributors
//     exclude the implied warranties of merchantability, fitness for
//     a particular purpose and non-infringement.
//
#ifndef OSD_CUDA_INIT_H
#define OSD_CUDA_INIT_H

#include <algorithm>
#include <cstdio>

// From "NVIDIA GPU Computing SDK 4.2/C/common/inc/cutil_inline_runtime.h":

// Beginning of GPU Architecture definitions
inline int _ConvertSMVer2Cores_local(int major, int minor)
{
    // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
    typedef struct {
        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
        int Cores;
    } sSMtoCores;

    sSMtoCores nGpuArchCoresPerSM[] =
    { { 0x10,  8 }, // Tesla Generation (SM 1.0) G80 class
      { 0x11,  8 }, // Tesla Generation (SM 1.1) G8x class
      { 0x12,  8 }, // Tesla Generation (SM 1.2) G9x class
      { 0x13,  8 }, // Tesla Generation (SM 1.3) GT200 class
      { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
      { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
      { 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class
      {   -1, -1 }
    };

    int index = 0;
    while (nGpuArchCoresPerSM[index].SM != -1) {
        if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
            return nGpuArchCoresPerSM[index].Cores;
        }
        index++;
    }
    printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
    return -1;
}
// end of GPU Architecture definitions

// This function returns the best GPU (with maximum GFLOPS)
inline int cutGetMaxGflopsDeviceId()
{
    int current_device   = 0, sm_per_multiproc = 0;
    int max_compute_perf = 0, max_perf_device  = 0;
    int device_count     = 0, best_SM_arch     = 0;
    cudaDeviceProp deviceProp;

    cudaGetDeviceCount( &device_count );
    // Find the best major SM Architecture GPU device
    while ( current_device < device_count ) {
        cudaGetDeviceProperties( &deviceProp, current_device );
        if (deviceProp.major > 0 && deviceProp.major < 9999) {
            best_SM_arch = std::max(best_SM_arch, deviceProp.major);
        }
        current_device++;
    }

    // Find the best CUDA capable GPU device
    current_device = 0;
    while( current_device < device_count ) {
        cudaGetDeviceProperties( &deviceProp, current_device );
        if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
            sm_per_multiproc = 1;
        } else {
            sm_per_multiproc = _ConvertSMVer2Cores_local(deviceProp.major, deviceProp.minor);
        }
        int compute_perf  = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
        if( compute_perf  > max_compute_perf ) {
            // If we find GPU with SM major > 2, search only these
            if ( best_SM_arch > 2 ) {
                // If our device==dest_SM_arch, choose this, or else pass
                if (deviceProp.major == best_SM_arch) {
                    max_compute_perf  = compute_perf;
                    max_perf_device   = current_device;
                }
            } else {
                max_compute_perf  = compute_perf;
                max_perf_device   = current_device;
            }
        }
        ++current_device;
    }
    return max_perf_device;
}

#endif //OSD_CUDA_INIT_H
- adding mayaViewer as another code sample - more work on the glutViewr example - fixing some compiling / linking issues for osd 2012-06-11 18:53:35 +00:00			`//`
			`// Copyright (C) Pixar. All rights reserved.`
			`//`
			`// This license governs use of the accompanying software. If you`
			`// use the software, you accept this license. If you do not accept`
			`// the license, do not use the software.`
			`//`
			`// 1. Definitions`
			`// The terms "reproduce," "reproduction," "derivative works," and`
			`// "distribution" have the same meaning here as under U.S.`
			`// copyright law. A "contribution" is the original software, or`
			`// any additions or changes to the software.`
			`// A "contributor" is any person or entity that distributes its`
			`// contribution under this license.`
			`// "Licensed patents" are a contributor's patent claims that read`
			`// directly on its contribution.`
			`//`
			`// 2. Grant of Rights`
			`// (A) Copyright Grant- Subject to the terms of this license,`
			`// including the license conditions and limitations in section 3,`
			`// each contributor grants you a non-exclusive, worldwide,`
			`// royalty-free copyright license to reproduce its contribution,`
			`// prepare derivative works of its contribution, and distribute`
			`// its contribution or any derivative works that you create.`
			`// (B) Patent Grant- Subject to the terms of this license,`
			`// including the license conditions and limitations in section 3,`
			`// each contributor grants you a non-exclusive, worldwide,`
			`// royalty-free license under its licensed patents to make, have`
			`// made, use, sell, offer for sale, import, and/or otherwise`
			`// dispose of its contribution in the software or derivative works`
			`// of the contribution in the software.`
			`//`
			`// 3. Conditions and Limitations`
			`// (A) No Trademark License- This license does not grant you`
			`// rights to use any contributor's name, logo, or trademarks.`
			`// (B) If you bring a patent claim against any contributor over`
			`// patents that you claim are infringed by the software, your`
			`// patent license from such contributor to the software ends`
			`// automatically.`
			`// (C) If you distribute any portion of the software, you must`
			`// retain all copyright, patent, trademark, and attribution`
			`// notices that are present in the software.`
			`// (D) If you distribute any portion of the software in source`
			`// code form, you may do so only under this license by including a`
			`// complete copy of this license with your distribution. If you`
			`// distribute any portion of the software in compiled or object`
			`// code form, you may only do so under a license that complies`
			`// with this license.`
			`// (E) The software is licensed "as-is." You bear the risk of`
			`// using it. The contributors give no express warranties,`
			`// guarantees or conditions. You may have additional consumer`
			`// rights under your local laws which this license cannot change.`
			`// To the extent permitted under your local laws, the contributors`
			`// exclude the implied warranties of merchantability, fitness for`
			`// a particular purpose and non-infringement.`
			`//`
			`#ifndef OSD_CUDA_INIT_H`
			`#define OSD_CUDA_INIT_H`

Release Candidate 1.0 : - [Feature Adaptive GPU Rendering of Catmull-Clark Surfaces](http://research.microsoft.com/en-us/um/people/cloop/tog2012.pdf). - New API architecture : we are planning to lock on to this new framework as the basis for backward compatibility, which we will enforce from Release 1.0 onward. Subsequent releases of OpenSubdiv should not break client code. - DirectX 11 support - and much more... 2012-12-11 01:15:13 +00:00			`#include <algorithm>`
Fixed include of cstdio for examples cudaInit.h 2013-01-19 00:03:17 +00:00			`#include <cstdio>`
Release Candidate 1.0 : - [Feature Adaptive GPU Rendering of Catmull-Clark Surfaces](http://research.microsoft.com/en-us/um/people/cloop/tog2012.pdf). - New API architecture : we are planning to lock on to this new framework as the basis for backward compatibility, which we will enforce from Release 1.0 onward. Subsequent releases of OpenSubdiv should not break client code. - DirectX 11 support - and much more... 2012-12-11 01:15:13 +00:00
- adding mayaViewer as another code sample - more work on the glutViewr example - fixing some compiling / linking issues for osd 2012-06-11 18:53:35 +00:00			`// From "NVIDIA GPU Computing SDK 4.2/C/common/inc/cutil_inline_runtime.h":`

			`// Beginning of GPU Architecture definitions`
			`inline int _ConvertSMVer2Cores_local(int major, int minor)`
			`{`
			`// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM`
			`typedef struct {`
			`int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version`
			`int Cores;`
			`} sSMtoCores;`

Siggrpah 2012 - rolling over all of prepro work into beta 1.1 2012-08-04 02:51:27 +00:00			`sSMtoCores nGpuArchCoresPerSM[] =`
- adding mayaViewer as another code sample - more work on the glutViewr example - fixing some compiling / linking issues for osd 2012-06-11 18:53:35 +00:00			`{ { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class`
			`{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class`
			`{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class`
			`{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class`
			`{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class`
			`{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class`
			`{ 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class`
			`{ -1, -1 }`
			`};`

			`int index = 0;`
			`while (nGpuArchCoresPerSM[index].SM != -1) {`
			`if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {`
			`return nGpuArchCoresPerSM[index].Cores;`
			`}`
			`index++;`
			`}`
			`printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor);`
			`return -1;`
			`}`
			`// end of GPU Architecture definitions`

			`// This function returns the best GPU (with maximum GFLOPS)`
			`inline int cutGetMaxGflopsDeviceId()`
			`{`
			`int current_device = 0, sm_per_multiproc = 0;`
			`int max_compute_perf = 0, max_perf_device = 0;`
			`int device_count = 0, best_SM_arch = 0;`
			`cudaDeviceProp deviceProp;`

			`cudaGetDeviceCount( &device_count );`
			`// Find the best major SM Architecture GPU device`
			`while ( current_device < device_count ) {`
			`cudaGetDeviceProperties( &deviceProp, current_device );`
			`if (deviceProp.major > 0 && deviceProp.major < 9999) {`
			`best_SM_arch = std::max(best_SM_arch, deviceProp.major);`
			`}`
			`current_device++;`
			`}`

			`// Find the best CUDA capable GPU device`
			`current_device = 0;`
			`while( current_device < device_count ) {`
			`cudaGetDeviceProperties( &deviceProp, current_device );`
			`if (deviceProp.major == 9999 && deviceProp.minor == 9999) {`
			`sm_per_multiproc = 1;`
			`} else {`
			`sm_per_multiproc = _ConvertSMVer2Cores_local(deviceProp.major, deviceProp.minor);`
			`}`
			`int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;`
			`if( compute_perf > max_compute_perf ) {`
			`// If we find GPU with SM major > 2, search only these`
			`if ( best_SM_arch > 2 ) {`
			`// If our device==dest_SM_arch, choose this, or else pass`
Siggrpah 2012 - rolling over all of prepro work into beta 1.1 2012-08-04 02:51:27 +00:00			`if (deviceProp.major == best_SM_arch) {`
- adding mayaViewer as another code sample - more work on the glutViewr example - fixing some compiling / linking issues for osd 2012-06-11 18:53:35 +00:00			`max_compute_perf = compute_perf;`
			`max_perf_device = current_device;`
			`}`
			`} else {`
			`max_compute_perf = compute_perf;`
			`max_perf_device = current_device;`
			`}`
			`}`
			`++current_device;`
			`}`
			`return max_perf_device;`
			`}`

			`#endif //OSD_CUDA_INIT_H`