mirror of
https://github.com/PixarAnimationStudios/OpenSubdiv
synced 2024-09-19 14:20:00 +00:00
cleanup CL/CUDA example harnesses.
refactor CL/CUDA specific initialization stuffs into examples/common/clDeviceContext and cudaDeviceContext, and update examples to use those structs. also - remove CL/CUDA tests from osd_regression. The tests for those kernels will be covered by glImaging. - update cuda initialization to use the GL-interoperable device if available. - remove CL specialization from glShareTopology, following the same pattern as we took in the previous OsdGLMesh refactoring. (still something strange with XFB kernels though) - fix file permissions.
This commit is contained in:
parent
99f1b57ba5
commit
82a0513326
@ -36,8 +36,6 @@ set(EXAMPLES_COMMON_SOURCE_FILES
|
||||
)
|
||||
|
||||
set(EXAMPLES_COMMON_HEADER_FILES
|
||||
clInit.h
|
||||
cudaInit.h
|
||||
font_image.h
|
||||
hdr_reader.h
|
||||
hud.h
|
||||
@ -86,10 +84,29 @@ if(DXSDK_FOUND)
|
||||
|
||||
endif()
|
||||
|
||||
if( OPENCL_FOUND )
|
||||
if(OPENCL_FOUND)
|
||||
include_directories("${OPENCL_INCLUDE_DIRS}")
|
||||
|
||||
list(APPEND EXAMPLES_COMMON_SOURCE_FILES
|
||||
clDeviceContext.cpp
|
||||
)
|
||||
list(APPEND EXAMPLES_COMMON_HEADER_FILES
|
||||
clDeviceContext.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if(CUDA_FOUND)
|
||||
include_directories("${CUDA_INCLUDE_DIRS}")
|
||||
|
||||
list(APPEND EXAMPLES_COMMON_SOURCE_FILES
|
||||
cudaDeviceContext.cpp
|
||||
)
|
||||
list(APPEND EXAMPLES_COMMON_HEADER_FILES
|
||||
cudaDeviceContext.h
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
include_directories(
|
||||
"${PROJECT_SOURCE_DIR}/opensubdiv"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}"
|
||||
@ -106,6 +123,6 @@ add_library(examples_common_obj
|
||||
OBJECT
|
||||
${EXAMPLES_COMMON_SOURCE_FILES}
|
||||
${EXAMPLES_COMMON_HEADER_FILES}
|
||||
${INC_FILES}
|
||||
${INC_FILES}
|
||||
)
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright 2013 Pixar
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
@ -22,8 +22,7 @@
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OSD_EXAMPLE_CL_INIT_H
|
||||
#define OSD_EXAMPLE_CL_INIT_H
|
||||
#include "clDeviceContext.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
@ -33,33 +32,44 @@
|
||||
#include <GL/glx.h>
|
||||
#endif
|
||||
|
||||
#include "osd/opencl.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
static inline bool HAS_CL_VERSION_1_1 () {
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
#ifdef OPENSUBDIV_HAS_CLEW
|
||||
static bool clewInitialized = false;
|
||||
static bool clewLoadSuccess;
|
||||
if (not clewInitialized) {
|
||||
clewInitialized = true;
|
||||
clewLoadSuccess = clewInit() == CLEW_SUCCESS;
|
||||
if (not clewLoadSuccess) {
|
||||
fprintf(stderr, "Loading OpenCL failed.\n");
|
||||
}
|
||||
}
|
||||
return clewLoadSuccess;
|
||||
#endif
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
CLDeviceContext::CLDeviceContext() :
|
||||
_clContext(NULL), _clCommandQueue(NULL) {
|
||||
}
|
||||
|
||||
static bool initCL(cl_context *clContext, cl_command_queue *clQueue)
|
||||
{
|
||||
CLDeviceContext::~CLDeviceContext() {
|
||||
|
||||
if (_clCommandQueue)
|
||||
clReleaseCommandQueue(_clCommandQueue);
|
||||
if (_clContext)
|
||||
clReleaseContext(_clContext);
|
||||
}
|
||||
|
||||
/*static*/
|
||||
bool
|
||||
CLDeviceContext::HAS_CL_VERSION_1_1 () {
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_CLEW
|
||||
static bool clewInitialized = false;
|
||||
static bool clewLoadSuccess;
|
||||
if (not clewInitialized) {
|
||||
clewInitialized = true;
|
||||
clewLoadSuccess = clewInit() == CLEW_SUCCESS;
|
||||
if (not clewLoadSuccess) {
|
||||
fprintf(stderr, "Loading OpenCL failed.\n");
|
||||
}
|
||||
}
|
||||
return clewLoadSuccess;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
CLDeviceContext::Initialize() {
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_CLEW
|
||||
if (!clGetPlatformIDs) {
|
||||
printf("Error clGetPlatformIDs function not bound.\n");
|
||||
@ -117,21 +127,21 @@ static bool initCL(cl_context *clContext, cl_command_queue *clQueue)
|
||||
int clDeviceUsed = 0;
|
||||
|
||||
#if defined(__APPLE__)
|
||||
*clContext = clCreateContext(props, 0, NULL, clLogMessagesToStdoutAPPLE, NULL, &ciErrNum);
|
||||
_clContext = clCreateContext(props, 0, NULL, clLogMessagesToStdoutAPPLE, NULL, &ciErrNum);
|
||||
if (ciErrNum != CL_SUCCESS) {
|
||||
printf("Error %d in clCreateContext\n", ciErrNum);
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t devicesSize = 0;
|
||||
clGetGLContextInfoAPPLE(*clContext, kCGLContext, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, 0, NULL, &devicesSize);
|
||||
clGetGLContextInfoAPPLE(_clContext, kCGLContext, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, 0, NULL, &devicesSize);
|
||||
int numDevices = int(devicesSize / sizeof(cl_device_id));
|
||||
if (numDevices == 0) {
|
||||
printf("No sharable devices.\n");
|
||||
return false;
|
||||
}
|
||||
cl_device_id *clDevices = new cl_device_id[numDevices];
|
||||
clGetGLContextInfoAPPLE(*clContext, kCGLContext, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, numDevices * sizeof(cl_device_id), clDevices, NULL);
|
||||
clGetGLContextInfoAPPLE(_clContext, kCGLContext, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, numDevices * sizeof(cl_device_id), clDevices, NULL);
|
||||
#else
|
||||
|
||||
// get the number of GPU devices available to the platform
|
||||
@ -190,7 +200,8 @@ static bool initCL(cl_context *clContext, cl_command_queue *clQueue)
|
||||
return false;
|
||||
}
|
||||
|
||||
*clContext = clCreateContext(props, 1, &clDevices[clDeviceUsed], NULL, NULL, &ciErrNum);
|
||||
_clContext = clCreateContext(props, 1, &clDevices[clDeviceUsed],
|
||||
NULL, NULL, &ciErrNum);
|
||||
if (ciErrNum != CL_SUCCESS) {
|
||||
printf("Error %d in clCreateContext\n", ciErrNum);
|
||||
delete[] clDevices;
|
||||
@ -198,7 +209,8 @@ static bool initCL(cl_context *clContext, cl_command_queue *clQueue)
|
||||
}
|
||||
#endif
|
||||
|
||||
*clQueue = clCreateCommandQueue(*clContext, clDevices[clDeviceUsed], 0, &ciErrNum);
|
||||
_clCommandQueue = clCreateCommandQueue(_clContext, clDevices[clDeviceUsed],
|
||||
0, &ciErrNum);
|
||||
delete[] clDevices;
|
||||
if (ciErrNum != CL_SUCCESS) {
|
||||
printf("Error %d in clCreateCommandQueue\n", ciErrNum);
|
||||
@ -207,10 +219,3 @@ static bool initCL(cl_context *clContext, cl_command_queue *clQueue)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void uninitCL(cl_context clContext, cl_command_queue clQueue)
|
||||
{
|
||||
clReleaseCommandQueue(clQueue);
|
||||
clReleaseContext(clContext);
|
||||
}
|
||||
|
||||
#endif // OSD_EXAMPLE_CL_INIT_H
|
57
examples/common/clDeviceContext.h
Normal file
57
examples/common/clDeviceContext.h
Normal file
@ -0,0 +1,57 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OSD_EXAMPLES_COMMON_CL_DEVICE_CONTEXT_H
|
||||
#define OSD_EXAMPLES_COMMON_CL_DEVICE_CONTEXT_H
|
||||
|
||||
#include "osd/opencl.h"
|
||||
|
||||
class CLDeviceContext {
|
||||
public:
|
||||
CLDeviceContext();
|
||||
~CLDeviceContext();
|
||||
|
||||
static bool HAS_CL_VERSION_1_1 ();
|
||||
|
||||
bool Initialize();
|
||||
|
||||
bool IsInitialized() const {
|
||||
return (_clContext != NULL);
|
||||
}
|
||||
|
||||
cl_context GetContext() const {
|
||||
return _clContext;
|
||||
}
|
||||
cl_command_queue GetCommandQueue() const {
|
||||
return _clCommandQueue;
|
||||
}
|
||||
|
||||
private:
|
||||
cl_context _clContext;
|
||||
cl_command_queue _clCommandQueue;
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif // OSD_EXAMPLES_COMMON_CL_DEVICE_CONTEXT_H
|
137
examples/common/cudaDeviceContext.cpp
Normal file
137
examples/common/cudaDeviceContext.cpp
Normal file
@ -0,0 +1,137 @@
|
||||
//
|
||||
// Copyright 2015 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#include "cudaDeviceContext.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <OpenGL/OpenGL.h>
|
||||
#else
|
||||
#include <X11/Xlib.h>
|
||||
#include <GL/glx.h>
|
||||
#endif
|
||||
|
||||
#include <cstdio>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#define message(fmt, ...)
|
||||
//#define message(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
|
||||
#define error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
#if CUDA_VERSION < 5000
|
||||
static int _GetCudaDeviceForCurrentGLContext()
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
|
||||
return 0;
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
|
||||
return 0;
|
||||
|
||||
#else // X11
|
||||
// If we don't have a current GL context, then choose the device which
|
||||
// matches the current X11 screen number.
|
||||
Display * display = glXGetCurrentDisplay();
|
||||
if (not display) {
|
||||
display = XOpenDisplay(NULL);
|
||||
if (display) {
|
||||
int screen = DefaultScreen(display);
|
||||
XCloseDisplay(display);
|
||||
message("CUDA init using device for default screen: %d\n", screen);
|
||||
return screen;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// We can't use the new interop API, so use the device
|
||||
// corresponding to the screen number of the current GL context.
|
||||
int screen = DefaultScreen(display);
|
||||
message("CUDA init using device for screen: %d\n", screen);
|
||||
return screen;
|
||||
#endif // X11
|
||||
}
|
||||
|
||||
#else // CUDA_VERSION >= 50000 -----------------------------------------
|
||||
static int _GetCudaDeviceForCurrentGLContext()
|
||||
{
|
||||
// Find and use the CUDA device for the current GL context
|
||||
unsigned int interopDeviceCount = 0;
|
||||
int interopDevices[1];
|
||||
cudaError_t status = cudaGLGetDevices(&interopDeviceCount, interopDevices,
|
||||
1, cudaGLDeviceListCurrentFrame);
|
||||
if (status == cudaErrorNoDevice or interopDeviceCount != 1) {
|
||||
message("CUDA no interop devices found.\n");
|
||||
return 0;
|
||||
}
|
||||
int device = interopDevices[0];
|
||||
|
||||
#if defined(_WIN32)
|
||||
return device;
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
return device;
|
||||
|
||||
#else // X11
|
||||
Display * display = glXGetCurrentDisplay();
|
||||
int screen = DefaultScreen(display);
|
||||
if (device != screen) {
|
||||
error("The CUDA interop device (%d) does not match "
|
||||
"the screen used by the current GL context (%d), "
|
||||
"which may cause slow performance on systems "
|
||||
"with multiple GPU devices.", device, screen);
|
||||
}
|
||||
message("CUDA init using device for current GL context: %d\n", device);
|
||||
return device;
|
||||
#endif
|
||||
}
|
||||
#endif // CUDA_VERSION -----------------------------------------------
|
||||
|
||||
CudaDeviceContext::CudaDeviceContext() :
|
||||
_initialized(false) {
|
||||
}
|
||||
|
||||
CudaDeviceContext::~CudaDeviceContext() {
|
||||
cudaDeviceReset();
|
||||
}
|
||||
|
||||
bool
|
||||
CudaDeviceContext::Initialize() {
|
||||
|
||||
// see if any cuda device is available.
|
||||
int deviceCount = 0;
|
||||
cudaGetDeviceCount(&deviceCount);
|
||||
message("CUDA device count: %d\n", deviceCount);
|
||||
if (deviceCount <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
cudaGLSetGLDevice(_GetCudaDeviceForCurrentGLContext());
|
||||
_initialized = true;
|
||||
return true;
|
||||
}
|
43
examples/common/cudaDeviceContext.h
Normal file
43
examples/common/cudaDeviceContext.h
Normal file
@ -0,0 +1,43 @@
|
||||
//
|
||||
// Copyright 2013 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OSD_EXAMPLES_COMMON_CUDA_DEVICE_CONTEXT_H
|
||||
#define OSD_EXAMPLES_COMMON_CUDA_DEVICE_CONTEXT_H
|
||||
|
||||
class CudaDeviceContext {
|
||||
public:
|
||||
CudaDeviceContext();
|
||||
~CudaDeviceContext();
|
||||
|
||||
bool Initialize();
|
||||
|
||||
bool IsInitialized() const {
|
||||
return _initialized;
|
||||
}
|
||||
|
||||
private:
|
||||
bool _initialized;
|
||||
};
|
||||
|
||||
#endif // OSD_EXAMPLES_COMMON_CUDA_DEVICE_CONTEXT_H
|
@ -1,111 +0,0 @@
|
||||
//
|
||||
// Copyright 2013 Pixar
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "Apache License")
|
||||
// with the following modification; you may not use this file except in
|
||||
// compliance with the Apache License and the following modification to it:
|
||||
// Section 6. Trademarks. is deleted and replaced with:
|
||||
//
|
||||
// 6. Trademarks. This License does not grant permission to use the trade
|
||||
// names, trademarks, service marks, or product names of the Licensor
|
||||
// and its affiliates, except as required to comply with Section 4(c) of
|
||||
// the License and to reproduce the content of the NOTICE file.
|
||||
//
|
||||
// You may obtain a copy of the Apache License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the Apache License with the above modification is
|
||||
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the Apache License for the specific
|
||||
// language governing permissions and limitations under the Apache License.
|
||||
//
|
||||
|
||||
#ifndef OSD_CUDA_INIT_H
|
||||
#define OSD_CUDA_INIT_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
|
||||
// From "NVIDIA GPU Computing SDK 4.2/C/common/inc/cutil_inline_runtime.h":
|
||||
|
||||
// Beginning of GPU Architecture definitions
|
||||
inline int _ConvertSMVer2Cores_local(int major, int minor)
|
||||
{
|
||||
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
|
||||
typedef struct {
|
||||
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
|
||||
int Cores;
|
||||
} sSMtoCores;
|
||||
|
||||
sSMtoCores nGpuArchCoresPerSM[] =
|
||||
{ { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
|
||||
{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
|
||||
{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
|
||||
{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
|
||||
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
|
||||
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
|
||||
{ 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class
|
||||
{ -1, -1 }
|
||||
};
|
||||
|
||||
int index = 0;
|
||||
while (nGpuArchCoresPerSM[index].SM != -1) {
|
||||
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
|
||||
return nGpuArchCoresPerSM[index].Cores;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
|
||||
return -1;
|
||||
}
|
||||
// end of GPU Architecture definitions
|
||||
|
||||
// This function returns the best GPU (with maximum GFLOPS)
|
||||
inline int cutGetMaxGflopsDeviceId()
|
||||
{
|
||||
int current_device = 0, sm_per_multiproc = 0;
|
||||
int max_compute_perf = 0, max_perf_device = 0;
|
||||
int device_count = 0, best_SM_arch = 0;
|
||||
cudaDeviceProp deviceProp;
|
||||
|
||||
cudaGetDeviceCount( &device_count );
|
||||
// Find the best major SM Architecture GPU device
|
||||
while ( current_device < device_count ) {
|
||||
cudaGetDeviceProperties( &deviceProp, current_device );
|
||||
if (deviceProp.major > 0 && deviceProp.major < 9999) {
|
||||
best_SM_arch = std::max(best_SM_arch, deviceProp.major);
|
||||
}
|
||||
current_device++;
|
||||
}
|
||||
|
||||
// Find the best CUDA capable GPU device
|
||||
current_device = 0;
|
||||
while( current_device < device_count ) {
|
||||
cudaGetDeviceProperties( &deviceProp, current_device );
|
||||
if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
|
||||
sm_per_multiproc = 1;
|
||||
} else {
|
||||
sm_per_multiproc = _ConvertSMVer2Cores_local(deviceProp.major, deviceProp.minor);
|
||||
}
|
||||
int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
|
||||
if( compute_perf > max_compute_perf ) {
|
||||
// If we find GPU with SM major > 2, search only these
|
||||
if ( best_SM_arch > 2 ) {
|
||||
// If our device==dest_SM_arch, choose this, or else pass
|
||||
if (deviceProp.major == best_SM_arch) {
|
||||
max_compute_perf = compute_perf;
|
||||
max_perf_device = current_device;
|
||||
}
|
||||
} else {
|
||||
max_compute_perf = compute_perf;
|
||||
max_perf_device = current_device;
|
||||
}
|
||||
}
|
||||
++current_device;
|
||||
}
|
||||
return max_perf_device;
|
||||
}
|
||||
|
||||
#endif //OSD_CUDA_INIT_H
|
0
examples/common/d3d11_hud.cpp
Executable file → Normal file
0
examples/common/d3d11_hud.cpp
Executable file → Normal file
@ -34,7 +34,6 @@ set(SHADER_FILES
|
||||
|
||||
set(SOURCE_FILES
|
||||
glImaging.cpp
|
||||
../common/patchColors.cpp
|
||||
)
|
||||
|
||||
set(PLATFORM_LIBRARIES
|
||||
@ -59,11 +58,23 @@ _stringify("${SHADER_FILES}" INC_FILES)
|
||||
|
||||
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
_add_possibly_cuda_executable(glImaging
|
||||
# optional dependency - enables screenshots
|
||||
# XXX: this is actually unnecessary for this test since glImaging
|
||||
# use stb_image_write, however, examples_common_obj has libpng
|
||||
# dependency so we need to add here. We'll remove the libpng dependency soon.
|
||||
find_package(PNG)
|
||||
if (PNG_FOUND)
|
||||
include_directories("${PNG_INCLUDE_DIRS}")
|
||||
list(APPEND PLATFORM_LIBRARIES "${PNG_LIBRARIES}")
|
||||
add_definitions(-DOPENSUBDIV_HAS_PNG)
|
||||
endif()
|
||||
|
||||
_add_glfw_executable(glImaging
|
||||
"${SOURCE_FILES}"
|
||||
"${SHADER_FILES}"
|
||||
"${INC_FILES}"
|
||||
$<TARGET_OBJECTS:regression_common_obj>
|
||||
$<TARGET_OBJECTS:examples_common_obj>
|
||||
)
|
||||
|
||||
add_dependencies(glImaging blarg )
|
||||
|
@ -66,15 +66,9 @@
|
||||
#include <osd/clComputeContext.h>
|
||||
#include <osd/clComputeController.h>
|
||||
|
||||
#include "../common/clInit.h"
|
||||
#include "../common/clDeviceContext.h"
|
||||
|
||||
struct CLContext {
|
||||
cl_context GetContext() const { return clContext; }
|
||||
cl_command_queue GetCommandQueue() const { return clQueue; }
|
||||
cl_context clContext;
|
||||
cl_command_queue clQueue;
|
||||
};
|
||||
CLContext g_clContext;
|
||||
CLDeviceContext g_clDeviceContext;
|
||||
OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL;
|
||||
#endif
|
||||
|
||||
@ -86,7 +80,8 @@
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "../common/cudaInit.h"
|
||||
#include "../common/cudaDeviceContext.h"
|
||||
CudaDeviceContext g_cudaDeviceContext;
|
||||
|
||||
OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL;
|
||||
#endif
|
||||
@ -297,17 +292,18 @@ createOsdMesh(std::string const &kernel,
|
||||
} else if(kernel == "CL") {
|
||||
if (not g_clComputeController) {
|
||||
g_clComputeController = new Osd::CLComputeController(
|
||||
g_clContext.clContext, g_clContext.clQueue);
|
||||
g_clDeviceContext.GetContext(),
|
||||
g_clDeviceContext.GetCommandQueue());
|
||||
}
|
||||
return new Osd::Mesh<Osd::CLGLVertexBuffer,
|
||||
Osd::CLComputeController,
|
||||
Osd::GLDrawContext,
|
||||
CLContext>(
|
||||
CLDeviceContext>(
|
||||
g_clComputeController,
|
||||
refiner,
|
||||
numVertexElements,
|
||||
numVaryingElements,
|
||||
level, bits, &g_clContext);
|
||||
level, bits, &g_clDeviceContext);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
} else if(kernel == "CUDA") {
|
||||
@ -726,15 +722,22 @@ int main(int argc, char ** argv) {
|
||||
// prep GPU kernel
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (kernel == "CL") {
|
||||
if (initCL(&g_clContext.clContext, &g_clContext.clQueue) == false) {
|
||||
std::cout << "Error in initializing OpenCL\n";
|
||||
exit(1);
|
||||
if (g_clDeviceContext.IsInitialized() == false) {
|
||||
if (g_clDeviceContext.Initialize() == false) {
|
||||
std::cout << "Error in initializing OpenCL\n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
if (kernel == "CUDA") {
|
||||
cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
|
||||
if (g_cudaDeviceContext.IsInitialized() == false) {
|
||||
if (g_cudaDeviceContext.Initialize() == false) {
|
||||
std::cout << "Error in initializing Cuda\n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (size_t i = 0; i < g_shapes.size(); ++i) {
|
||||
@ -754,12 +757,6 @@ int main(int argc, char ** argv) {
|
||||
|
||||
glfwSwapBuffers(window);
|
||||
}
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (kernel == "CL") {
|
||||
uninitCL(g_clContext.clContext, g_clContext.clQueue);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -80,16 +80,9 @@ OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL;
|
||||
#include <osd/clComputeContext.h>
|
||||
#include <osd/clComputeController.h>
|
||||
|
||||
#include "../common/clInit.h"
|
||||
|
||||
struct CLContext {
|
||||
cl_context GetContext() const { return clContext; }
|
||||
cl_command_queue GetCommandQueue() const { return clQueue; }
|
||||
cl_context clContext;
|
||||
cl_command_queue clQueue;
|
||||
};
|
||||
CLContext g_clContext;
|
||||
#include "../common/clDeviceContext.h"
|
||||
|
||||
CLDeviceContext g_clDeviceContext;
|
||||
OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL;
|
||||
#endif
|
||||
|
||||
@ -101,9 +94,9 @@ OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL;
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "../common/cudaInit.h"
|
||||
#include "../common/cudaDeviceContext.h"
|
||||
|
||||
bool g_cudaInitialized = false;
|
||||
CudaDeviceContext g_cudaDeviceContext;
|
||||
OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL;
|
||||
#endif
|
||||
|
||||
@ -1088,17 +1081,18 @@ createOsdMesh(int level, int kernel) {
|
||||
} else if (kernel == kCL) {
|
||||
if (not g_clComputeController) {
|
||||
g_clComputeController = new OpenSubdiv::Osd::CLComputeController(
|
||||
g_clContext.clContext, g_clContext.clQueue);
|
||||
g_clDeviceContext.GetContext(),
|
||||
g_clDeviceContext.GetCommandQueue());
|
||||
}
|
||||
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CLGLVertexBuffer,
|
||||
OpenSubdiv::Osd::CLComputeController,
|
||||
OpenSubdiv::Osd::GLDrawContext,
|
||||
CLContext>(
|
||||
CLDeviceContext>(
|
||||
g_clComputeController,
|
||||
refiner,
|
||||
numVertexElements,
|
||||
numVaryingElements,
|
||||
level, bits, &g_clContext);
|
||||
level, bits, &g_clDeviceContext);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
} else if (kernel == kCUDA) {
|
||||
@ -2042,12 +2036,10 @@ void uninitGL() {
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
delete g_clComputeController;
|
||||
uninitCL(g_clContext.clContext, g_clContext.clQueue);
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
delete g_cudaComputeController;
|
||||
cudaDeviceReset();
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
@ -2092,13 +2084,21 @@ callbackKernel(int k) {
|
||||
g_kernel = k;
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (g_kernel == kCL and g_clContext.clContext == NULL) {
|
||||
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
|
||||
// Initialize OpenCL
|
||||
if (initCL(&g_clContext.clContext, &g_clContext.clQueue) == false) {
|
||||
if (g_clDeviceContext.Initialize() == false) {
|
||||
printf("Error in initializing OpenCL\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
|
||||
if (g_cudaDeviceContext.Initialize() == false) {
|
||||
printf("Error in initializing Cuda\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
createOsdMesh(g_level, g_kernel);
|
||||
}
|
||||
@ -2474,12 +2474,6 @@ int main(int argc, char ** argv) {
|
||||
// activate feature adaptive tessellation if OSD supports it
|
||||
g_adaptive = OpenSubdiv::Osd::GLDrawContext::SupportsAdaptiveTessellation();
|
||||
|
||||
#if OPENSUBDIV_HAS_CUDA
|
||||
// Note: This function randomly crashes with linux 5.0-dev driver.
|
||||
// cudaGetDeviceProperties overrun stack..?
|
||||
cudaGLSetGLDevice(cutGetMaxGflopsDeviceId());
|
||||
#endif
|
||||
|
||||
int windowWidth = g_width, windowHeight = g_height;
|
||||
|
||||
// window size might not match framebuffer size on a high DPI display
|
||||
@ -2541,7 +2535,7 @@ int main(int argc, char ** argv) {
|
||||
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (HAS_CL_VERSION_1_1()) {
|
||||
if (CLDeviceContext::HAS_CL_VERSION_1_1()) {
|
||||
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
|
||||
}
|
||||
#endif
|
||||
|
@ -52,48 +52,50 @@ GLFWmonitor* g_primary=0;
|
||||
#include <osd/cpuGLVertexBuffer.h>
|
||||
#include <osd/cpuComputeContext.h>
|
||||
#include <osd/cpuComputeController.h>
|
||||
OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENMP
|
||||
#include <osd/ompComputeController.h>
|
||||
OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL;
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
#include <osd/tbbComputeController.h>
|
||||
OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL;
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
#include <osd/clGLVertexBuffer.h>
|
||||
#include <osd/clComputeContext.h>
|
||||
#include <osd/clComputeController.h>
|
||||
OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL;
|
||||
|
||||
#include "../common/clInit.h"
|
||||
|
||||
cl_context g_clContext;
|
||||
cl_command_queue g_clQueue;
|
||||
#include "../common/clDeviceContext.h"
|
||||
CLDeviceContext g_clDeviceContext;
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
#include <osd/cudaGLVertexBuffer.h>
|
||||
#include <osd/cudaComputeContext.h>
|
||||
#include <osd/cudaComputeController.h>
|
||||
OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL;
|
||||
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "../common/cudaInit.h"
|
||||
bool g_cudaInitialized = false;
|
||||
#include "../common/cudaDeviceContext.h"
|
||||
CudaDeviceContext g_cudaDeviceContext;
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
#include <osd/glslTransformFeedbackComputeContext.h>
|
||||
#include <osd/glslTransformFeedbackComputeController.h>
|
||||
#include <osd/glVertexBuffer.h>
|
||||
OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslXFBComputeController = NULL;
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
|
||||
#include <osd/glslComputeContext.h>
|
||||
#include <osd/glslComputeController.h>
|
||||
#include <osd/glVertexBuffer.h>
|
||||
OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL;
|
||||
#endif
|
||||
|
||||
|
||||
@ -155,16 +157,18 @@ private:
|
||||
int _numVertices; // # of vertices of single instance
|
||||
};
|
||||
|
||||
template <class VERTEX_BUFFER>
|
||||
template <class VERTEX_BUFFER, class DEVICE_CONTEXT>
|
||||
class Instances : public InstancesBase {
|
||||
public:
|
||||
Instances(int numInstances,
|
||||
Osd::VertexBufferDescriptor const &vertexDesc,
|
||||
Osd::VertexBufferDescriptor const &varyingDesc,
|
||||
bool interleaved,
|
||||
int numVertices) :
|
||||
int numVertices,
|
||||
DEVICE_CONTEXT *deviceContext) :
|
||||
InstancesBase(vertexDesc, varyingDesc, numVertices),
|
||||
_vertexBuffer(NULL), _varyingBuffer(NULL), _interleaved(interleaved) {
|
||||
_vertexBuffer(NULL), _varyingBuffer(NULL), _interleaved(interleaved),
|
||||
_deviceContext(deviceContext) {
|
||||
|
||||
if (interleaved) {
|
||||
assert(vertexDesc.stride == varyingDesc.stride);
|
||||
@ -206,11 +210,12 @@ public:
|
||||
}
|
||||
|
||||
VERTEX_BUFFER *createVertexBuffer(int numElements, int numVertices) {
|
||||
return VERTEX_BUFFER::Create(numElements, numVertices);
|
||||
return VERTEX_BUFFER::Create(numElements, numVertices, _deviceContext);
|
||||
}
|
||||
void updateVertexBuffer(VERTEX_BUFFER *vertexBuffer, const float *src, int startVertex,
|
||||
void updateVertexBuffer(VERTEX_BUFFER *vertexBuffer,
|
||||
const float *src, int startVertex,
|
||||
int numVertices) {
|
||||
vertexBuffer->UpdateData(src, startVertex, numVertices);
|
||||
vertexBuffer->UpdateData(src, startVertex, numVertices, _deviceContext);
|
||||
}
|
||||
|
||||
VERTEX_BUFFER *GetVertexBuffer() const { return _vertexBuffer; }
|
||||
@ -220,6 +225,7 @@ private:
|
||||
VERTEX_BUFFER *_vertexBuffer;
|
||||
VERTEX_BUFFER *_varyingBuffer;
|
||||
bool _interleaved;
|
||||
DEVICE_CONTEXT *_deviceContext;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -282,19 +288,27 @@ private:
|
||||
std::vector<float> _restPosition;
|
||||
};
|
||||
|
||||
template <class COMPUTE_CONTROLLER, class VERTEX_BUFFER>
|
||||
template <class COMPUTE_CONTROLLER, class VERTEX_BUFFER,
|
||||
class DEVICE_CONTEXT=void>
|
||||
class Topology : public TopologyBase {
|
||||
|
||||
public:
|
||||
|
||||
typedef COMPUTE_CONTROLLER ComputeController;
|
||||
typedef typename COMPUTE_CONTROLLER::ComputeContext ComputeContext;
|
||||
typedef DEVICE_CONTEXT DeviceContext;
|
||||
|
||||
Topology(Far::PatchTables const * patchTables,
|
||||
Far::StencilTables const * vertexStencils,
|
||||
Far::StencilTables const * varyingStencils)
|
||||
: TopologyBase(patchTables) {
|
||||
Topology(ComputeController * computeController,
|
||||
Far::PatchTables const * patchTables,
|
||||
Far::StencilTables const * vertexStencils,
|
||||
Far::StencilTables const * varyingStencils,
|
||||
DeviceContext * deviceContext = NULL)
|
||||
: TopologyBase(patchTables),
|
||||
_computeController(computeController),
|
||||
_deviceContext(deviceContext) {
|
||||
|
||||
_computeContext = ComputeContext::Create(vertexStencils, varyingStencils);
|
||||
_computeContext = ComputeContext::Create(
|
||||
vertexStencils, varyingStencils, deviceContext);
|
||||
|
||||
_numVertices = vertexStencils->GetNumStencils() +
|
||||
vertexStencils->GetNumControlVertices();
|
||||
@ -311,8 +325,8 @@ public:
|
||||
Osd::VertexBufferDescriptor const &globalVaryingDesc =
|
||||
instance->GetVaryingDesc();
|
||||
|
||||
Instances<VERTEX_BUFFER> *typedInstance =
|
||||
static_cast<Instances<VERTEX_BUFFER> *>(instance);
|
||||
Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *typedInstance =
|
||||
static_cast<Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *>(instance);
|
||||
|
||||
for (int i = 0; i < numInstances; ++i) {
|
||||
|
||||
@ -326,11 +340,11 @@ public:
|
||||
globalVaryingDesc.length,
|
||||
globalVaryingDesc.stride);
|
||||
|
||||
_computeController.Compute(_computeContext,
|
||||
typedInstance->GetVertexBuffer(),
|
||||
typedInstance->GetVaryingBuffer(),
|
||||
&vertexDesc,
|
||||
&varyingDesc);
|
||||
_computeController->Compute(_computeContext,
|
||||
typedInstance->GetVertexBuffer(),
|
||||
typedInstance->GetVaryingBuffer(),
|
||||
&vertexDesc,
|
||||
&varyingDesc);
|
||||
}
|
||||
}
|
||||
|
||||
@ -340,65 +354,29 @@ public:
|
||||
Osd::VertexBufferDescriptor const &varyingDesc,
|
||||
bool interleaved) {
|
||||
|
||||
return new Instances<VERTEX_BUFFER>(numInstances,
|
||||
vertexDesc,
|
||||
varyingDesc,
|
||||
interleaved,
|
||||
_numVertices);
|
||||
return new Instances<VERTEX_BUFFER, DEVICE_CONTEXT>(
|
||||
numInstances, vertexDesc, varyingDesc,
|
||||
interleaved, _numVertices, _deviceContext);
|
||||
}
|
||||
|
||||
virtual void Synchronize() {
|
||||
_computeController.Synchronize();
|
||||
_computeController->Synchronize();
|
||||
}
|
||||
|
||||
virtual void UpdateVertexTexture(InstancesBase *instances) {
|
||||
Instances<VERTEX_BUFFER> *typedInstance =
|
||||
static_cast<Instances<VERTEX_BUFFER> *>(instances);
|
||||
Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *typedInstance =
|
||||
static_cast<Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *>(instances);
|
||||
GetDrawContext()->UpdateVertexTexture(typedInstance->GetVertexBuffer());
|
||||
|
||||
updateVertexBufferStride(typedInstance->GetVertexBuffer()->GetNumElements());
|
||||
}
|
||||
|
||||
private:
|
||||
COMPUTE_CONTROLLER _computeController;
|
||||
ComputeController *_computeController;
|
||||
ComputeContext *_computeContext;
|
||||
DeviceContext *_deviceContext;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// CL specializations
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
|
||||
template<> Osd::CLGLVertexBuffer *
|
||||
Instances<Osd::CLGLVertexBuffer>::createVertexBuffer(
|
||||
int numElements, int numVertices) {
|
||||
return Osd::CLGLVertexBuffer::Create(
|
||||
numElements, numVertices, g_clContext);
|
||||
}
|
||||
|
||||
template<> void
|
||||
Instances<Osd::CLGLVertexBuffer>::updateVertexBuffer(
|
||||
Osd::CLGLVertexBuffer *vertexBuffer,
|
||||
const float *src, int startVertex, int numVertices) {
|
||||
vertexBuffer->UpdateData(src, startVertex, numVertices, g_clQueue);
|
||||
}
|
||||
|
||||
template<>
|
||||
Topology<Osd::CLComputeController, Osd::CLGLVertexBuffer>::
|
||||
Topology(Far::PatchTables const * patchTables,
|
||||
Far::StencilTables const * vertexStencils, Far::StencilTables const * varyingStencils) :
|
||||
TopologyBase(patchTables), _computeController(g_clContext, g_clQueue) {
|
||||
|
||||
_computeContext = ComputeContext::Create(vertexStencils, varyingStencils, g_clContext);
|
||||
|
||||
_numVertices = vertexStencils->GetNumStencils() +
|
||||
vertexStencils->GetNumControlVertices();
|
||||
}
|
||||
#endif
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
TopologyBase *g_topology = NULL;
|
||||
InstancesBase *g_instances = NULL;
|
||||
|
||||
@ -616,7 +594,8 @@ createOsdMesh( const std::string &shapeStr, int level, Scheme scheme=kCatmark )
|
||||
bool doAdaptive = (g_adaptive!=0 and scheme==kCatmark);
|
||||
|
||||
if (doAdaptive) {
|
||||
refiner->RefineAdaptive(Far::TopologyRefiner::AdaptiveOptions(level));
|
||||
Far::TopologyRefiner::AdaptiveOptions options(level);
|
||||
refiner->RefineAdaptive(options);
|
||||
} else {
|
||||
Far::TopologyRefiner::UniformOptions options(level);
|
||||
options.fullTopologyInLastLevel = true;
|
||||
@ -638,45 +617,82 @@ createOsdMesh( const std::string &shapeStr, int level, Scheme scheme=kCatmark )
|
||||
assert(vertexStencils);
|
||||
}
|
||||
|
||||
Far::PatchTables const * patchTables =
|
||||
Far::PatchTablesFactory::Create(*refiner);
|
||||
Far::PatchTables const * patchTables = NULL;
|
||||
{
|
||||
Far::PatchTablesFactory::Options poptions(level);
|
||||
poptions.SetEndCapType(
|
||||
Far::PatchTablesFactory::Options::ENDCAP_LEGACY_GREGORY);
|
||||
patchTables = Far::PatchTablesFactory::Create(*refiner, poptions);
|
||||
}
|
||||
|
||||
|
||||
// create partitioned patcharray
|
||||
TopologyBase *topology = NULL;
|
||||
|
||||
if (g_kernel == kCPU) {
|
||||
if (not g_cpuComputeController)
|
||||
g_cpuComputeController = new Osd::CpuComputeController();
|
||||
topology = new Topology<Osd::CpuComputeController,
|
||||
Osd::CpuGLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
|
||||
Osd::CpuGLVertexBuffer>(g_cpuComputeController,
|
||||
patchTables,
|
||||
vertexStencils, varyingStencils);
|
||||
#ifdef OPENSUBDIV_HAS_OPENMP
|
||||
} else if (g_kernel == kOPENMP) {
|
||||
if (not g_ompComputeController)
|
||||
g_ompComputeController = new Osd::OmpComputeController();
|
||||
topology = new Topology<Osd::OmpComputeController,
|
||||
Osd::CpuGLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
|
||||
Osd::CpuGLVertexBuffer>(g_ompComputeController,
|
||||
patchTables,
|
||||
vertexStencils, varyingStencils);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
} else if (g_kernel == kTBB) {
|
||||
if (not g_tbbComputeController)
|
||||
g_tbbComputeController = new Osd::TbbComputeController();
|
||||
topology = new Topology<Osd::TbbComputeController,
|
||||
Osd::CpuGLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
|
||||
Osd::CpuGLVertexBuffer>(g_tbbComputeController,
|
||||
patchTables,
|
||||
vertexStencils, varyingStencils);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
} else if (g_kernel == kCUDA) {
|
||||
if (not g_cudaComputeController)
|
||||
g_cudaComputeController = new Osd::CudaComputeController();
|
||||
topology = new Topology<Osd::CudaComputeController,
|
||||
Osd::CudaGLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
|
||||
Osd::CudaGLVertexBuffer>(g_cudaComputeController,
|
||||
patchTables,
|
||||
vertexStencils, varyingStencils);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
} else if (g_kernel == kCL) {
|
||||
if (not g_clComputeController)
|
||||
g_clComputeController = new Osd::CLComputeController(
|
||||
g_clDeviceContext.GetContext(),
|
||||
g_clDeviceContext.GetCommandQueue());
|
||||
topology = new Topology<Osd::CLComputeController,
|
||||
Osd::CLGLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
|
||||
Osd::CLGLVertexBuffer,
|
||||
CLDeviceContext>(g_clComputeController,
|
||||
patchTables,
|
||||
vertexStencils, varyingStencils,
|
||||
&g_clDeviceContext);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
} else if (g_kernel == kGLSL) {
|
||||
if (not g_glslXFBComputeController)
|
||||
g_glslXFBComputeController = new Osd::GLSLTransformFeedbackComputeController();
|
||||
topology = new Topology<Osd::GLSLTransformFeedbackComputeController,
|
||||
Osd::GLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
|
||||
Osd::GLVertexBuffer>(g_glslXFBComputeController,
|
||||
patchTables,
|
||||
vertexStencils, varyingStencils);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
|
||||
} else if (g_kernel == kGLSLCompute) {
|
||||
if (not g_glslComputeController)
|
||||
g_glslComputeController = new Osd::GLSLComputeController();
|
||||
topology = new Topology<Osd::GLSLComputeController,
|
||||
Osd::GLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
|
||||
Osd::GLVertexBuffer>(g_glslComputeController,
|
||||
patchTables,
|
||||
vertexStencils, varyingStencils);
|
||||
#endif
|
||||
} else {
|
||||
}
|
||||
@ -1257,10 +1273,27 @@ uninitGL() {
|
||||
if (g_topology)
|
||||
delete g_topology;
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
uninitCL(g_clContext, g_clQueue);
|
||||
delete g_cpuComputeController;
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENMP
|
||||
delete g_ompComputeController;
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_TBB
|
||||
delete g_tbbComputeController;
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
delete g_clComputeController;
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
delete g_cudaComputeController;
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
delete g_glslXFBComputeController;
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
|
||||
delete g_glslComputeController;
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@ -1363,8 +1396,8 @@ callbackKernel(int k) {
|
||||
g_kernel = k;
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (g_kernel == kCL and g_clContext == NULL) {
|
||||
if (initCL(&g_clContext, &g_clQueue) == false) {
|
||||
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
|
||||
if (g_clDeviceContext.Initialize() == false) {
|
||||
printf("Error in initializing OpenCL\n");
|
||||
exit(1);
|
||||
}
|
||||
@ -1372,9 +1405,11 @@ callbackKernel(int k) {
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
if (g_kernel == kCUDA and g_cudaInitialized == false) {
|
||||
g_cudaInitialized = true;
|
||||
cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
|
||||
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
|
||||
if (g_cudaDeviceContext.Initialize() == false) {
|
||||
printf("Error in initializing Cuda\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1457,7 +1492,7 @@ initHUD() {
|
||||
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (HAS_CL_VERSION_1_1()) {
|
||||
if (CLDeviceContext::HAS_CL_VERSION_1_1()) {
|
||||
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
|
||||
}
|
||||
#endif
|
||||
|
@ -67,16 +67,9 @@ OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
|
||||
#include <osd/clComputeContext.h>
|
||||
#include <osd/clComputeController.h>
|
||||
|
||||
#include "../common/clInit.h"
|
||||
|
||||
struct CLContext {
|
||||
cl_context GetContext() const { return clContext; }
|
||||
cl_command_queue GetCommandQueue() const { return clQueue; }
|
||||
cl_context clContext;
|
||||
cl_command_queue clQueue;
|
||||
};
|
||||
CLContext g_clContext;
|
||||
#include "../common/clDeviceContext.h"
|
||||
|
||||
CLDeviceContext g_clDeviceContext;
|
||||
OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL;
|
||||
#endif
|
||||
|
||||
@ -88,9 +81,9 @@ OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include "../common/cudaInit.h"
|
||||
#include "../common/cudaDeviceContext.h"
|
||||
|
||||
bool g_cudaInitialized = false;
|
||||
CudaDeviceContext g_cudaDeviceContext;
|
||||
OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL;
|
||||
#endif
|
||||
|
||||
@ -601,17 +594,18 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=
|
||||
} else if(kernel == kCL) {
|
||||
if (not g_clComputeController) {
|
||||
g_clComputeController = new OpenSubdiv::Osd::CLComputeController(
|
||||
g_clContext.clContext, g_clContext.clQueue);
|
||||
g_clDeviceContext.GetContext(),
|
||||
g_clDeviceContext.GetCommandQueue());
|
||||
}
|
||||
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CLGLVertexBuffer,
|
||||
OpenSubdiv::Osd::CLComputeController,
|
||||
OpenSubdiv::Osd::GLDrawContext,
|
||||
CLContext>(
|
||||
CLDeviceContext>(
|
||||
g_clComputeController,
|
||||
refiner,
|
||||
numVertexElements,
|
||||
numVaryingElements,
|
||||
level, bits, &g_clContext);
|
||||
level, bits, &g_clDeviceContext);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
} else if(kernel == kCUDA) {
|
||||
@ -1406,11 +1400,9 @@ uninitGL() {
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
delete g_clComputeController;
|
||||
uninitCL(g_clContext.clContext, g_clContext.clQueue);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
delete g_cudaComputeController;
|
||||
cudaDeviceReset();
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
|
||||
delete g_glslTransformFeedbackComputeController;
|
||||
@ -1495,17 +1487,19 @@ callbackKernel(int k) {
|
||||
g_kernel = k;
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (g_kernel == kCL and g_clContext.clContext == NULL) {
|
||||
if (initCL(&g_clContext.clContext, &g_clContext.clQueue) == false) {
|
||||
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
|
||||
if (g_clDeviceContext.Initialize() == false) {
|
||||
printf("Error in initializing OpenCL\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
if (g_kernel == kCUDA and g_cudaInitialized == false) {
|
||||
g_cudaInitialized = true;
|
||||
cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
|
||||
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
|
||||
if (g_cudaDeviceContext.Initialize() == false) {
|
||||
printf("Error in initializing Cuda\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1629,7 +1623,7 @@ initHUD() {
|
||||
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
|
||||
#endif
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (HAS_CL_VERSION_1_1()) {
|
||||
if (CLDeviceContext::HAS_CL_VERSION_1_1()) {
|
||||
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
|
||||
}
|
||||
#endif
|
||||
|
0
opensubdiv/osd/clD3D11VertexBuffer.cpp
Executable file → Normal file
0
opensubdiv/osd/clD3D11VertexBuffer.cpp
Executable file → Normal file
0
opensubdiv/osd/clD3D11VertexBuffer.h
Executable file → Normal file
0
opensubdiv/osd/clD3D11VertexBuffer.h
Executable file → Normal file
0
opensubdiv/osd/cpuComputeContext.cpp
Executable file → Normal file
0
opensubdiv/osd/cpuComputeContext.cpp
Executable file → Normal file
0
opensubdiv/osd/cpuD3D11VertexBuffer.h
Executable file → Normal file
0
opensubdiv/osd/cpuD3D11VertexBuffer.h
Executable file → Normal file
0
opensubdiv/osd/cpuGLVertexBuffer.cpp
Executable file → Normal file
0
opensubdiv/osd/cpuGLVertexBuffer.cpp
Executable file → Normal file
0
opensubdiv/osd/cudaComputeContext.cpp
Executable file → Normal file
0
opensubdiv/osd/cudaComputeContext.cpp
Executable file → Normal file
@ -51,7 +51,7 @@ CudaD3D11VertexBuffer *
|
||||
CudaD3D11VertexBuffer::Create(int numElements, int numVertices,
|
||||
ID3D11DeviceContext *deviceContext) {
|
||||
CudaD3D11VertexBuffer *instance =
|
||||
new CudaD3D11VertexBuffer(numElements, numVertices, device);
|
||||
new CudaD3D11VertexBuffer(numElements, numVertices);
|
||||
|
||||
ID3D11Device *device;
|
||||
deviceContext->GetDevice(&device);
|
||||
|
0
opensubdiv/osd/cudaGLVertexBuffer.cpp
Executable file → Normal file
0
opensubdiv/osd/cudaGLVertexBuffer.cpp
Executable file → Normal file
0
opensubdiv/osd/d3d11DrawContext.h
Executable file → Normal file
0
opensubdiv/osd/d3d11DrawContext.h
Executable file → Normal file
0
opensubdiv/osd/d3d11Mesh.h
Executable file → Normal file
0
opensubdiv/osd/d3d11Mesh.h
Executable file → Normal file
0
opensubdiv/osd/glVertexBuffer.cpp
Executable file → Normal file
0
opensubdiv/osd/glVertexBuffer.cpp
Executable file → Normal file
0
opensubdiv/osd/glslComputeContext.cpp
Executable file → Normal file
0
opensubdiv/osd/glslComputeContext.cpp
Executable file → Normal file
0
opensubdiv/osd/glslTransformFeedbackComputeContext.cpp
Executable file → Normal file
0
opensubdiv/osd/glslTransformFeedbackComputeContext.cpp
Executable file → Normal file
@ -42,15 +42,7 @@ if ( GLEW_FOUND )
|
||||
list(APPEND PLATFORM_LIBRARIES "${GLEW_LIBRARY}")
|
||||
endif()
|
||||
|
||||
if ( OPENCL_FOUND )
|
||||
list(APPEND PLATFORM_LIBRARIES
|
||||
"${OPENCL_LIBRARIES}"
|
||||
)
|
||||
include_directories( "${OPENCL_INCLUDE_DIRS}" )
|
||||
endif()
|
||||
|
||||
|
||||
_add_possibly_cuda_executable(osd_regression
|
||||
_add_executable(osd_regression
|
||||
"${SOURCE_FILES}"
|
||||
$<TARGET_OBJECTS:regression_common_obj>
|
||||
)
|
||||
|
@ -55,19 +55,6 @@ GLFWwindow* g_window=0;
|
||||
|
||||
#include <far/stencilTablesFactory.h>
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_CUDA
|
||||
#endif
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
#include <osd/clComputeContext.h>
|
||||
#include <osd/clComputeController.h>
|
||||
#include <osd/clGLVertexBuffer.h>
|
||||
static cl_context g_clContext;
|
||||
static cl_command_queue g_clQueue;
|
||||
#include "../../examples/common/clInit.h" // XXXX TODO move file out of examples
|
||||
#endif
|
||||
|
||||
|
||||
#include "../../regression/common/cmp_utils.h"
|
||||
#include "../../regression/common/hbr_utils.h"
|
||||
#include "../../regression/common/vtr_utils.h"
|
||||
@ -91,14 +78,12 @@ using namespace OpenSubdiv;
|
||||
enum BackendType {
|
||||
kBackendCPU = 0, // raw CPU
|
||||
kBackendCPUGL = 1, // CPU with GL-backed buffer
|
||||
kBackendCL = 2, // OpenCL
|
||||
kBackendCount
|
||||
};
|
||||
|
||||
static const char* g_BackendNames[kBackendCount] = {
|
||||
"CPU",
|
||||
"CPUGL",
|
||||
"CL",
|
||||
};
|
||||
|
||||
static int g_Backend = -1;
|
||||
@ -350,54 +335,6 @@ checkMeshCPUGL(FarTopologyRefiner *refiner,
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static int
|
||||
checkMeshCL( FarTopologyRefiner *refiner,
|
||||
const std::vector<xyzVV>& coarseverts,
|
||||
xyzmesh * refmesh) {
|
||||
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
|
||||
static Osd::CLComputeController *controller =
|
||||
new Osd::CLComputeController(g_clContext, g_clQueue);
|
||||
|
||||
Far::StencilTables const *vertexStencils;
|
||||
Far::StencilTables const *varyingStencils;
|
||||
buildStencilTables(*refiner, &vertexStencils, &varyingStencils);
|
||||
Osd::CLComputeContext *context = Osd::CLComputeContext::Create(
|
||||
vertexStencils, varyingStencils, g_clContext);
|
||||
|
||||
Osd::CLGLVertexBuffer *vb =
|
||||
Osd::CLGLVertexBuffer::Create(3, refiner->GetNumVerticesTotal(),
|
||||
g_clContext);
|
||||
|
||||
vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size(),
|
||||
g_clQueue );
|
||||
|
||||
controller->Compute( context, vb );
|
||||
|
||||
// read data back from CL buffer
|
||||
size_t dataSize = vb->GetNumVertices() * vb->GetNumElements();
|
||||
float* data = new float[dataSize];
|
||||
|
||||
clEnqueueReadBuffer (g_clQueue, vb->BindCLBuffer(g_clQueue), CL_TRUE, 0, dataSize * sizeof(float), data, 0, NULL, NULL);
|
||||
|
||||
int result = checkVertexBuffer(
|
||||
*refiner, refmesh, data, vb->GetNumElements());
|
||||
|
||||
delete[] data;
|
||||
delete context;
|
||||
delete vertexStencils;
|
||||
delete varyingStencils;
|
||||
delete vb;
|
||||
|
||||
return result;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
static int
|
||||
checkMesh( char const * msg, std::string const & shape, int levels, Scheme scheme, int backend ) {
|
||||
@ -422,9 +359,6 @@ checkMesh( char const * msg, std::string const & shape, int levels, Scheme schem
|
||||
case kBackendCPUGL:
|
||||
result = checkMeshCPUGL(refiner, vtrVertexData, refmesh);
|
||||
break;
|
||||
case kBackendCL:
|
||||
result = checkMeshCL(refiner, vtrVertexData, refmesh);
|
||||
break;
|
||||
}
|
||||
|
||||
delete refmesh;
|
||||
@ -438,18 +372,6 @@ int checkBackend(int backend, int levels) {
|
||||
|
||||
printf("*** checking backend : %s\n", g_BackendNames[backend]);
|
||||
|
||||
if (backend == kBackendCL) {
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
if (initCL(&g_clContext, &g_clQueue) == false) {
|
||||
printf(" Cannot initialize OpenCL, skipping...\n");
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
printf(" No OpenCL available, skipping...\n");
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int total = 0;
|
||||
|
||||
#define test_catmark_edgeonly
|
||||
@ -652,13 +574,6 @@ int checkBackend(int backend, int levels) {
|
||||
total += checkMesh( "test_bilinear_cube", bilinear_cube, levels, kBilinear, backend );
|
||||
#endif
|
||||
|
||||
|
||||
if (backend == kBackendCL) {
|
||||
#ifdef OPENSUBDIV_HAS_OPENCL
|
||||
uninitCL(g_clContext, g_clQueue);
|
||||
#endif
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user