cleanup CL/CUDA example harnesses.

refactor CL/CUDA specific initialization stuffs into
examples/common/clDeviceContext and cudaDeviceContext, and
update examples to use those structs.

also
- remove CL/CUDA tests from osd_regression. The tests for those kernels will be covered by glImaging.
- update cuda initialization to use the GL-interoperable device if available.
- remove CL specialization from glShareTopology, following the same pattern as we took in the previous OsdGLMesh refactoring. (still something strange with XFB kernels though)
- fix file permissions.
This commit is contained in:
Takahito Tejima 2015-04-28 15:46:37 -07:00
parent 99f1b57ba5
commit 82a0513326
27 changed files with 494 additions and 408 deletions

View File

@ -36,8 +36,6 @@ set(EXAMPLES_COMMON_SOURCE_FILES
)
set(EXAMPLES_COMMON_HEADER_FILES
clInit.h
cudaInit.h
font_image.h
hdr_reader.h
hud.h
@ -86,10 +84,29 @@ if(DXSDK_FOUND)
endif()
if( OPENCL_FOUND )
if(OPENCL_FOUND)
include_directories("${OPENCL_INCLUDE_DIRS}")
list(APPEND EXAMPLES_COMMON_SOURCE_FILES
clDeviceContext.cpp
)
list(APPEND EXAMPLES_COMMON_HEADER_FILES
clDeviceContext.h
)
endif()
if(CUDA_FOUND)
include_directories("${CUDA_INCLUDE_DIRS}")
list(APPEND EXAMPLES_COMMON_SOURCE_FILES
cudaDeviceContext.cpp
)
list(APPEND EXAMPLES_COMMON_HEADER_FILES
cudaDeviceContext.h
)
endif()
include_directories(
"${PROJECT_SOURCE_DIR}/opensubdiv"
"${CMAKE_CURRENT_BINARY_DIR}"
@ -106,6 +123,6 @@ add_library(examples_common_obj
OBJECT
${EXAMPLES_COMMON_SOURCE_FILES}
${EXAMPLES_COMMON_HEADER_FILES}
${INC_FILES}
${INC_FILES}
)

View File

@ -1,5 +1,5 @@
//
// Copyright 2013 Pixar
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
@ -22,8 +22,7 @@
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_EXAMPLE_CL_INIT_H
#define OSD_EXAMPLE_CL_INIT_H
#include "clDeviceContext.h"
#if defined(_WIN32)
#include <windows.h>
@ -33,33 +32,44 @@
#include <GL/glx.h>
#endif
#include "osd/opencl.h"
#include <cstdio>
#include <cstring>
#include <string>
static inline bool HAS_CL_VERSION_1_1 () {
#ifdef OPENSUBDIV_HAS_OPENCL
#ifdef OPENSUBDIV_HAS_CLEW
static bool clewInitialized = false;
static bool clewLoadSuccess;
if (not clewInitialized) {
clewInitialized = true;
clewLoadSuccess = clewInit() == CLEW_SUCCESS;
if (not clewLoadSuccess) {
fprintf(stderr, "Loading OpenCL failed.\n");
}
}
return clewLoadSuccess;
#endif
return true;
#else
return false;
#endif
CLDeviceContext::CLDeviceContext() :
_clContext(NULL), _clCommandQueue(NULL) {
}
static bool initCL(cl_context *clContext, cl_command_queue *clQueue)
{
CLDeviceContext::~CLDeviceContext() {
if (_clCommandQueue)
clReleaseCommandQueue(_clCommandQueue);
if (_clContext)
clReleaseContext(_clContext);
}
/*static*/
bool
CLDeviceContext::HAS_CL_VERSION_1_1 () {
#ifdef OPENSUBDIV_HAS_CLEW
static bool clewInitialized = false;
static bool clewLoadSuccess;
if (not clewInitialized) {
clewInitialized = true;
clewLoadSuccess = clewInit() == CLEW_SUCCESS;
if (not clewLoadSuccess) {
fprintf(stderr, "Loading OpenCL failed.\n");
}
}
return clewLoadSuccess;
#endif
return true;
}
bool
CLDeviceContext::Initialize() {
#ifdef OPENSUBDIV_HAS_CLEW
if (!clGetPlatformIDs) {
printf("Error clGetPlatformIDs function not bound.\n");
@ -117,21 +127,21 @@ static bool initCL(cl_context *clContext, cl_command_queue *clQueue)
int clDeviceUsed = 0;
#if defined(__APPLE__)
*clContext = clCreateContext(props, 0, NULL, clLogMessagesToStdoutAPPLE, NULL, &ciErrNum);
_clContext = clCreateContext(props, 0, NULL, clLogMessagesToStdoutAPPLE, NULL, &ciErrNum);
if (ciErrNum != CL_SUCCESS) {
printf("Error %d in clCreateContext\n", ciErrNum);
return false;
}
size_t devicesSize = 0;
clGetGLContextInfoAPPLE(*clContext, kCGLContext, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, 0, NULL, &devicesSize);
clGetGLContextInfoAPPLE(_clContext, kCGLContext, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, 0, NULL, &devicesSize);
int numDevices = int(devicesSize / sizeof(cl_device_id));
if (numDevices == 0) {
printf("No sharable devices.\n");
return false;
}
cl_device_id *clDevices = new cl_device_id[numDevices];
clGetGLContextInfoAPPLE(*clContext, kCGLContext, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, numDevices * sizeof(cl_device_id), clDevices, NULL);
clGetGLContextInfoAPPLE(_clContext, kCGLContext, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, numDevices * sizeof(cl_device_id), clDevices, NULL);
#else
// get the number of GPU devices available to the platform
@ -190,7 +200,8 @@ static bool initCL(cl_context *clContext, cl_command_queue *clQueue)
return false;
}
*clContext = clCreateContext(props, 1, &clDevices[clDeviceUsed], NULL, NULL, &ciErrNum);
_clContext = clCreateContext(props, 1, &clDevices[clDeviceUsed],
NULL, NULL, &ciErrNum);
if (ciErrNum != CL_SUCCESS) {
printf("Error %d in clCreateContext\n", ciErrNum);
delete[] clDevices;
@ -198,7 +209,8 @@ static bool initCL(cl_context *clContext, cl_command_queue *clQueue)
}
#endif
*clQueue = clCreateCommandQueue(*clContext, clDevices[clDeviceUsed], 0, &ciErrNum);
_clCommandQueue = clCreateCommandQueue(_clContext, clDevices[clDeviceUsed],
0, &ciErrNum);
delete[] clDevices;
if (ciErrNum != CL_SUCCESS) {
printf("Error %d in clCreateCommandQueue\n", ciErrNum);
@ -207,10 +219,3 @@ static bool initCL(cl_context *clContext, cl_command_queue *clQueue)
return true;
}
static void uninitCL(cl_context clContext, cl_command_queue clQueue)
{
clReleaseCommandQueue(clQueue);
clReleaseContext(clContext);
}
#endif // OSD_EXAMPLE_CL_INIT_H

View File

@ -0,0 +1,57 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_EXAMPLES_COMMON_CL_DEVICE_CONTEXT_H
#define OSD_EXAMPLES_COMMON_CL_DEVICE_CONTEXT_H
#include "osd/opencl.h"
class CLDeviceContext {
public:
CLDeviceContext();
~CLDeviceContext();
static bool HAS_CL_VERSION_1_1 ();
bool Initialize();
bool IsInitialized() const {
return (_clContext != NULL);
}
cl_context GetContext() const {
return _clContext;
}
cl_command_queue GetCommandQueue() const {
return _clCommandQueue;
}
private:
cl_context _clContext;
cl_command_queue _clCommandQueue;
};
#endif // OSD_EXAMPLES_COMMON_CL_DEVICE_CONTEXT_H

View File

@ -0,0 +1,137 @@
//
// Copyright 2015 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#include "cudaDeviceContext.h"
#if defined(_WIN32)
#include <windows.h>
#elif defined(__APPLE__)
#include <OpenGL/OpenGL.h>
#else
#include <X11/Xlib.h>
#include <GL/glx.h>
#endif
#include <cstdio>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>
#define message(fmt, ...)
//#define message(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
#define error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
// -----------------------------------------------------------------------
#if CUDA_VERSION < 5000
static int _GetCudaDeviceForCurrentGLContext()
{
#if defined(_WIN32)
return 0;
#elif defined(__APPLE__)
return 0;
#else // X11
// If we don't have a current GL context, then choose the device which
// matches the current X11 screen number.
Display * display = glXGetCurrentDisplay();
if (not display) {
display = XOpenDisplay(NULL);
if (display) {
int screen = DefaultScreen(display);
XCloseDisplay(display);
message("CUDA init using device for default screen: %d\n", screen);
return screen;
}
return 0;
}
// We can't use the new interop API, so use the device
// corresponding to the screen number of the current GL context.
int screen = DefaultScreen(display);
message("CUDA init using device for screen: %d\n", screen);
return screen;
#endif // X11
}
#else // CUDA_VERSION >= 50000 -----------------------------------------
static int _GetCudaDeviceForCurrentGLContext()
{
// Find and use the CUDA device for the current GL context
unsigned int interopDeviceCount = 0;
int interopDevices[1];
cudaError_t status = cudaGLGetDevices(&interopDeviceCount, interopDevices,
1, cudaGLDeviceListCurrentFrame);
if (status == cudaErrorNoDevice or interopDeviceCount != 1) {
message("CUDA no interop devices found.\n");
return 0;
}
int device = interopDevices[0];
#if defined(_WIN32)
return device;
#elif defined(__APPLE__)
return device;
#else // X11
Display * display = glXGetCurrentDisplay();
int screen = DefaultScreen(display);
if (device != screen) {
error("The CUDA interop device (%d) does not match "
"the screen used by the current GL context (%d), "
"which may cause slow performance on systems "
"with multiple GPU devices.", device, screen);
}
message("CUDA init using device for current GL context: %d\n", device);
return device;
#endif
}
#endif // CUDA_VERSION -----------------------------------------------
CudaDeviceContext::CudaDeviceContext() :
_initialized(false) {
}
CudaDeviceContext::~CudaDeviceContext() {
cudaDeviceReset();
}
bool
CudaDeviceContext::Initialize() {
// see if any cuda device is available.
int deviceCount = 0;
cudaGetDeviceCount(&deviceCount);
message("CUDA device count: %d\n", deviceCount);
if (deviceCount <= 0) {
return false;
}
cudaGLSetGLDevice(_GetCudaDeviceForCurrentGLContext());
_initialized = true;
return true;
}

View File

@ -0,0 +1,43 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_EXAMPLES_COMMON_CUDA_DEVICE_CONTEXT_H
#define OSD_EXAMPLES_COMMON_CUDA_DEVICE_CONTEXT_H
class CudaDeviceContext {
public:
CudaDeviceContext();
~CudaDeviceContext();
bool Initialize();
bool IsInitialized() const {
return _initialized;
}
private:
bool _initialized;
};
#endif // OSD_EXAMPLES_COMMON_CUDA_DEVICE_CONTEXT_H

View File

@ -1,111 +0,0 @@
//
// Copyright 2013 Pixar
//
// Licensed under the Apache License, Version 2.0 (the "Apache License")
// with the following modification; you may not use this file except in
// compliance with the Apache License and the following modification to it:
// Section 6. Trademarks. is deleted and replaced with:
//
// 6. Trademarks. This License does not grant permission to use the trade
// names, trademarks, service marks, or product names of the Licensor
// and its affiliates, except as required to comply with Section 4(c) of
// the License and to reproduce the content of the NOTICE file.
//
// You may obtain a copy of the Apache License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the Apache License with the above modification is
// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the Apache License for the specific
// language governing permissions and limitations under the Apache License.
//
#ifndef OSD_CUDA_INIT_H
#define OSD_CUDA_INIT_H
#include <algorithm>
#include <cstdio>
// From "NVIDIA GPU Computing SDK 4.2/C/common/inc/cutil_inline_runtime.h":
// Beginning of GPU Architecture definitions
inline int _ConvertSMVer2Cores_local(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct {
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] =
{ { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
{ 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
{ 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
{ 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
{ 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class
{ -1, -1 }
};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1) {
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
return -1;
}
// end of GPU Architecture definitions
// This function returns the best GPU (with maximum GFLOPS)
inline int cutGetMaxGflopsDeviceId()
{
int current_device = 0, sm_per_multiproc = 0;
int max_compute_perf = 0, max_perf_device = 0;
int device_count = 0, best_SM_arch = 0;
cudaDeviceProp deviceProp;
cudaGetDeviceCount( &device_count );
// Find the best major SM Architecture GPU device
while ( current_device < device_count ) {
cudaGetDeviceProperties( &deviceProp, current_device );
if (deviceProp.major > 0 && deviceProp.major < 9999) {
best_SM_arch = std::max(best_SM_arch, deviceProp.major);
}
current_device++;
}
// Find the best CUDA capable GPU device
current_device = 0;
while( current_device < device_count ) {
cudaGetDeviceProperties( &deviceProp, current_device );
if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
sm_per_multiproc = 1;
} else {
sm_per_multiproc = _ConvertSMVer2Cores_local(deviceProp.major, deviceProp.minor);
}
int compute_perf = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
if( compute_perf > max_compute_perf ) {
// If we find GPU with SM major > 2, search only these
if ( best_SM_arch > 2 ) {
// If our device==dest_SM_arch, choose this, or else pass
if (deviceProp.major == best_SM_arch) {
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
} else {
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
++current_device;
}
return max_perf_device;
}
#endif //OSD_CUDA_INIT_H

0
examples/common/d3d11_hud.cpp Executable file → Normal file
View File

View File

@ -34,7 +34,6 @@ set(SHADER_FILES
set(SOURCE_FILES
glImaging.cpp
../common/patchColors.cpp
)
set(PLATFORM_LIBRARIES
@ -59,11 +58,23 @@ _stringify("${SHADER_FILES}" INC_FILES)
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
_add_possibly_cuda_executable(glImaging
# optional dependency - enables screenshots
# XXX: this is actually unnecessary for this test since glImaging
# use stb_image_write, however, examples_common_obj has libpng
# dependency so we need to add here. We'll remove the libpng dependency soon.
find_package(PNG)
if (PNG_FOUND)
include_directories("${PNG_INCLUDE_DIRS}")
list(APPEND PLATFORM_LIBRARIES "${PNG_LIBRARIES}")
add_definitions(-DOPENSUBDIV_HAS_PNG)
endif()
_add_glfw_executable(glImaging
"${SOURCE_FILES}"
"${SHADER_FILES}"
"${INC_FILES}"
$<TARGET_OBJECTS:regression_common_obj>
$<TARGET_OBJECTS:examples_common_obj>
)
add_dependencies(glImaging blarg )

View File

@ -66,15 +66,9 @@
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
#include "../common/clInit.h"
#include "../common/clDeviceContext.h"
struct CLContext {
cl_context GetContext() const { return clContext; }
cl_command_queue GetCommandQueue() const { return clQueue; }
cl_context clContext;
cl_command_queue clQueue;
};
CLContext g_clContext;
CLDeviceContext g_clDeviceContext;
OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL;
#endif
@ -86,7 +80,8 @@
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>
#include "../common/cudaInit.h"
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL;
#endif
@ -297,17 +292,18 @@ createOsdMesh(std::string const &kernel,
} else if(kernel == "CL") {
if (not g_clComputeController) {
g_clComputeController = new Osd::CLComputeController(
g_clContext.clContext, g_clContext.clQueue);
g_clDeviceContext.GetContext(),
g_clDeviceContext.GetCommandQueue());
}
return new Osd::Mesh<Osd::CLGLVertexBuffer,
Osd::CLComputeController,
Osd::GLDrawContext,
CLContext>(
CLDeviceContext>(
g_clComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, &g_clContext);
level, bits, &g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if(kernel == "CUDA") {
@ -726,15 +722,22 @@ int main(int argc, char ** argv) {
// prep GPU kernel
#ifdef OPENSUBDIV_HAS_OPENCL
if (kernel == "CL") {
if (initCL(&g_clContext.clContext, &g_clContext.clQueue) == false) {
std::cout << "Error in initializing OpenCL\n";
exit(1);
if (g_clDeviceContext.IsInitialized() == false) {
if (g_clDeviceContext.Initialize() == false) {
std::cout << "Error in initializing OpenCL\n";
exit(1);
}
}
}
#endif
#ifdef OPENSUBDIV_HAS_CUDA
if (kernel == "CUDA") {
cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
if (g_cudaDeviceContext.IsInitialized() == false) {
if (g_cudaDeviceContext.Initialize() == false) {
std::cout << "Error in initializing Cuda\n";
exit(1);
}
}
}
#endif
for (size_t i = 0; i < g_shapes.size(); ++i) {
@ -754,12 +757,6 @@ int main(int argc, char ** argv) {
glfwSwapBuffers(window);
}
#ifdef OPENSUBDIV_HAS_OPENCL
if (kernel == "CL") {
uninitCL(g_clContext.clContext, g_clContext.clQueue);
}
#endif
}
return 0;

View File

@ -80,16 +80,9 @@ OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL;
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
#include "../common/clInit.h"
struct CLContext {
cl_context GetContext() const { return clContext; }
cl_command_queue GetCommandQueue() const { return clQueue; }
cl_context clContext;
cl_command_queue clQueue;
};
CLContext g_clContext;
#include "../common/clDeviceContext.h"
CLDeviceContext g_clDeviceContext;
OpenSubdiv::Osd::CLComputeController * g_clComputeController = NULL;
#endif
@ -101,9 +94,9 @@ OpenSubdiv::Osd::CpuComputeController * g_cpuComputeController = NULL;
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>
#include "../common/cudaInit.h"
#include "../common/cudaDeviceContext.h"
bool g_cudaInitialized = false;
CudaDeviceContext g_cudaDeviceContext;
OpenSubdiv::Osd::CudaComputeController * g_cudaComputeController = NULL;
#endif
@ -1088,17 +1081,18 @@ createOsdMesh(int level, int kernel) {
} else if (kernel == kCL) {
if (not g_clComputeController) {
g_clComputeController = new OpenSubdiv::Osd::CLComputeController(
g_clContext.clContext, g_clContext.clQueue);
g_clDeviceContext.GetContext(),
g_clDeviceContext.GetCommandQueue());
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CLGLVertexBuffer,
OpenSubdiv::Osd::CLComputeController,
OpenSubdiv::Osd::GLDrawContext,
CLContext>(
CLDeviceContext>(
g_clComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, &g_clContext);
level, bits, &g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if (kernel == kCUDA) {
@ -2042,12 +2036,10 @@ void uninitGL() {
#ifdef OPENSUBDIV_HAS_OPENCL
delete g_clComputeController;
uninitCL(g_clContext.clContext, g_clContext.clQueue);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
delete g_cudaComputeController;
cudaDeviceReset();
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
@ -2092,13 +2084,21 @@ callbackKernel(int k) {
g_kernel = k;
#ifdef OPENSUBDIV_HAS_OPENCL
if (g_kernel == kCL and g_clContext.clContext == NULL) {
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
// Initialize OpenCL
if (initCL(&g_clContext.clContext, &g_clContext.clQueue) == false) {
if (g_clDeviceContext.Initialize() == false) {
printf("Error in initializing OpenCL\n");
exit(1);
}
}
#endif
#ifdef OPENSUBDIV_HAS_CUDA
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
if (g_cudaDeviceContext.Initialize() == false) {
printf("Error in initializing Cuda\n");
exit(1);
}
}
#endif
createOsdMesh(g_level, g_kernel);
}
@ -2474,12 +2474,6 @@ int main(int argc, char ** argv) {
// activate feature adaptive tessellation if OSD supports it
g_adaptive = OpenSubdiv::Osd::GLDrawContext::SupportsAdaptiveTessellation();
#if OPENSUBDIV_HAS_CUDA
// Note: This function randomly crashes with linux 5.0-dev driver.
// cudaGetDeviceProperties overrun stack..?
cudaGLSetGLDevice(cutGetMaxGflopsDeviceId());
#endif
int windowWidth = g_width, windowHeight = g_height;
// window size might not match framebuffer size on a high DPI display
@ -2541,7 +2535,7 @@ int main(int argc, char ** argv) {
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
if (HAS_CL_VERSION_1_1()) {
if (CLDeviceContext::HAS_CL_VERSION_1_1()) {
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
}
#endif

View File

@ -52,48 +52,50 @@ GLFWmonitor* g_primary=0;
#include <osd/cpuGLVertexBuffer.h>
#include <osd/cpuComputeContext.h>
#include <osd/cpuComputeController.h>
OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
#ifdef OPENSUBDIV_HAS_OPENMP
#include <osd/ompComputeController.h>
OpenSubdiv::Osd::OmpComputeController *g_ompComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_TBB
#include <osd/tbbComputeController.h>
OpenSubdiv::Osd::TbbComputeController *g_tbbComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clGLVertexBuffer.h>
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL;
#include "../common/clInit.h"
cl_context g_clContext;
cl_command_queue g_clQueue;
#include "../common/clDeviceContext.h"
CLDeviceContext g_clDeviceContext;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
#include <osd/cudaGLVertexBuffer.h>
#include <osd/cudaComputeContext.h>
#include <osd/cudaComputeController.h>
OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL;
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>
#include "../common/cudaInit.h"
bool g_cudaInitialized = false;
#include "../common/cudaDeviceContext.h"
CudaDeviceContext g_cudaDeviceContext;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
#include <osd/glslTransformFeedbackComputeContext.h>
#include <osd/glslTransformFeedbackComputeController.h>
#include <osd/glVertexBuffer.h>
OpenSubdiv::Osd::GLSLTransformFeedbackComputeController *g_glslXFBComputeController = NULL;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
#include <osd/glslComputeContext.h>
#include <osd/glslComputeController.h>
#include <osd/glVertexBuffer.h>
OpenSubdiv::Osd::GLSLComputeController *g_glslComputeController = NULL;
#endif
@ -155,16 +157,18 @@ private:
int _numVertices; // # of vertices of single instance
};
template <class VERTEX_BUFFER>
template <class VERTEX_BUFFER, class DEVICE_CONTEXT>
class Instances : public InstancesBase {
public:
Instances(int numInstances,
Osd::VertexBufferDescriptor const &vertexDesc,
Osd::VertexBufferDescriptor const &varyingDesc,
bool interleaved,
int numVertices) :
int numVertices,
DEVICE_CONTEXT *deviceContext) :
InstancesBase(vertexDesc, varyingDesc, numVertices),
_vertexBuffer(NULL), _varyingBuffer(NULL), _interleaved(interleaved) {
_vertexBuffer(NULL), _varyingBuffer(NULL), _interleaved(interleaved),
_deviceContext(deviceContext) {
if (interleaved) {
assert(vertexDesc.stride == varyingDesc.stride);
@ -206,11 +210,12 @@ public:
}
VERTEX_BUFFER *createVertexBuffer(int numElements, int numVertices) {
return VERTEX_BUFFER::Create(numElements, numVertices);
return VERTEX_BUFFER::Create(numElements, numVertices, _deviceContext);
}
void updateVertexBuffer(VERTEX_BUFFER *vertexBuffer, const float *src, int startVertex,
void updateVertexBuffer(VERTEX_BUFFER *vertexBuffer,
const float *src, int startVertex,
int numVertices) {
vertexBuffer->UpdateData(src, startVertex, numVertices);
vertexBuffer->UpdateData(src, startVertex, numVertices, _deviceContext);
}
VERTEX_BUFFER *GetVertexBuffer() const { return _vertexBuffer; }
@ -220,6 +225,7 @@ private:
VERTEX_BUFFER *_vertexBuffer;
VERTEX_BUFFER *_varyingBuffer;
bool _interleaved;
DEVICE_CONTEXT *_deviceContext;
};
// ---------------------------------------------------------------------------
@ -282,19 +288,27 @@ private:
std::vector<float> _restPosition;
};
template <class COMPUTE_CONTROLLER, class VERTEX_BUFFER>
template <class COMPUTE_CONTROLLER, class VERTEX_BUFFER,
class DEVICE_CONTEXT=void>
class Topology : public TopologyBase {
public:
typedef COMPUTE_CONTROLLER ComputeController;
typedef typename COMPUTE_CONTROLLER::ComputeContext ComputeContext;
typedef DEVICE_CONTEXT DeviceContext;
Topology(Far::PatchTables const * patchTables,
Far::StencilTables const * vertexStencils,
Far::StencilTables const * varyingStencils)
: TopologyBase(patchTables) {
Topology(ComputeController * computeController,
Far::PatchTables const * patchTables,
Far::StencilTables const * vertexStencils,
Far::StencilTables const * varyingStencils,
DeviceContext * deviceContext = NULL)
: TopologyBase(patchTables),
_computeController(computeController),
_deviceContext(deviceContext) {
_computeContext = ComputeContext::Create(vertexStencils, varyingStencils);
_computeContext = ComputeContext::Create(
vertexStencils, varyingStencils, deviceContext);
_numVertices = vertexStencils->GetNumStencils() +
vertexStencils->GetNumControlVertices();
@ -311,8 +325,8 @@ public:
Osd::VertexBufferDescriptor const &globalVaryingDesc =
instance->GetVaryingDesc();
Instances<VERTEX_BUFFER> *typedInstance =
static_cast<Instances<VERTEX_BUFFER> *>(instance);
Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *typedInstance =
static_cast<Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *>(instance);
for (int i = 0; i < numInstances; ++i) {
@ -326,11 +340,11 @@ public:
globalVaryingDesc.length,
globalVaryingDesc.stride);
_computeController.Compute(_computeContext,
typedInstance->GetVertexBuffer(),
typedInstance->GetVaryingBuffer(),
&vertexDesc,
&varyingDesc);
_computeController->Compute(_computeContext,
typedInstance->GetVertexBuffer(),
typedInstance->GetVaryingBuffer(),
&vertexDesc,
&varyingDesc);
}
}
@ -340,65 +354,29 @@ public:
Osd::VertexBufferDescriptor const &varyingDesc,
bool interleaved) {
return new Instances<VERTEX_BUFFER>(numInstances,
vertexDesc,
varyingDesc,
interleaved,
_numVertices);
return new Instances<VERTEX_BUFFER, DEVICE_CONTEXT>(
numInstances, vertexDesc, varyingDesc,
interleaved, _numVertices, _deviceContext);
}
virtual void Synchronize() {
_computeController.Synchronize();
_computeController->Synchronize();
}
virtual void UpdateVertexTexture(InstancesBase *instances) {
Instances<VERTEX_BUFFER> *typedInstance =
static_cast<Instances<VERTEX_BUFFER> *>(instances);
Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *typedInstance =
static_cast<Instances<VERTEX_BUFFER, DEVICE_CONTEXT> *>(instances);
GetDrawContext()->UpdateVertexTexture(typedInstance->GetVertexBuffer());
updateVertexBufferStride(typedInstance->GetVertexBuffer()->GetNumElements());
}
private:
COMPUTE_CONTROLLER _computeController;
ComputeController *_computeController;
ComputeContext *_computeContext;
DeviceContext *_deviceContext;
};
// ---------------------------------------------------------------------------
// CL specializations
#ifdef OPENSUBDIV_HAS_OPENCL
template<> Osd::CLGLVertexBuffer *
Instances<Osd::CLGLVertexBuffer>::createVertexBuffer(
int numElements, int numVertices) {
return Osd::CLGLVertexBuffer::Create(
numElements, numVertices, g_clContext);
}
template<> void
Instances<Osd::CLGLVertexBuffer>::updateVertexBuffer(
Osd::CLGLVertexBuffer *vertexBuffer,
const float *src, int startVertex, int numVertices) {
vertexBuffer->UpdateData(src, startVertex, numVertices, g_clQueue);
}
template<>
Topology<Osd::CLComputeController, Osd::CLGLVertexBuffer>::
Topology(Far::PatchTables const * patchTables,
Far::StencilTables const * vertexStencils, Far::StencilTables const * varyingStencils) :
TopologyBase(patchTables), _computeController(g_clContext, g_clQueue) {
_computeContext = ComputeContext::Create(vertexStencils, varyingStencils, g_clContext);
_numVertices = vertexStencils->GetNumStencils() +
vertexStencils->GetNumControlVertices();
}
#endif
// ---------------------------------------------------------------------------
TopologyBase *g_topology = NULL;
InstancesBase *g_instances = NULL;
@ -616,7 +594,8 @@ createOsdMesh( const std::string &shapeStr, int level, Scheme scheme=kCatmark )
bool doAdaptive = (g_adaptive!=0 and scheme==kCatmark);
if (doAdaptive) {
refiner->RefineAdaptive(Far::TopologyRefiner::AdaptiveOptions(level));
Far::TopologyRefiner::AdaptiveOptions options(level);
refiner->RefineAdaptive(options);
} else {
Far::TopologyRefiner::UniformOptions options(level);
options.fullTopologyInLastLevel = true;
@ -638,45 +617,82 @@ createOsdMesh( const std::string &shapeStr, int level, Scheme scheme=kCatmark )
assert(vertexStencils);
}
Far::PatchTables const * patchTables =
Far::PatchTablesFactory::Create(*refiner);
Far::PatchTables const * patchTables = NULL;
{
Far::PatchTablesFactory::Options poptions(level);
poptions.SetEndCapType(
Far::PatchTablesFactory::Options::ENDCAP_LEGACY_GREGORY);
patchTables = Far::PatchTablesFactory::Create(*refiner, poptions);
}
// create partitioned patcharray
TopologyBase *topology = NULL;
if (g_kernel == kCPU) {
if (not g_cpuComputeController)
g_cpuComputeController = new Osd::CpuComputeController();
topology = new Topology<Osd::CpuComputeController,
Osd::CpuGLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
Osd::CpuGLVertexBuffer>(g_cpuComputeController,
patchTables,
vertexStencils, varyingStencils);
#ifdef OPENSUBDIV_HAS_OPENMP
} else if (g_kernel == kOPENMP) {
if (not g_ompComputeController)
g_ompComputeController = new Osd::OmpComputeController();
topology = new Topology<Osd::OmpComputeController,
Osd::CpuGLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
Osd::CpuGLVertexBuffer>(g_ompComputeController,
patchTables,
vertexStencils, varyingStencils);
#endif
#ifdef OPENSUBDIV_HAS_TBB
} else if (g_kernel == kTBB) {
if (not g_tbbComputeController)
g_tbbComputeController = new Osd::TbbComputeController();
topology = new Topology<Osd::TbbComputeController,
Osd::CpuGLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
Osd::CpuGLVertexBuffer>(g_tbbComputeController,
patchTables,
vertexStencils, varyingStencils);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if (g_kernel == kCUDA) {
if (not g_cudaComputeController)
g_cudaComputeController = new Osd::CudaComputeController();
topology = new Topology<Osd::CudaComputeController,
Osd::CudaGLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
Osd::CudaGLVertexBuffer>(g_cudaComputeController,
patchTables,
vertexStencils, varyingStencils);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
} else if (g_kernel == kCL) {
if (not g_clComputeController)
g_clComputeController = new Osd::CLComputeController(
g_clDeviceContext.GetContext(),
g_clDeviceContext.GetCommandQueue());
topology = new Topology<Osd::CLComputeController,
Osd::CLGLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
Osd::CLGLVertexBuffer,
CLDeviceContext>(g_clComputeController,
patchTables,
vertexStencils, varyingStencils,
&g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
} else if (g_kernel == kGLSL) {
if (not g_glslXFBComputeController)
g_glslXFBComputeController = new Osd::GLSLTransformFeedbackComputeController();
topology = new Topology<Osd::GLSLTransformFeedbackComputeController,
Osd::GLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
Osd::GLVertexBuffer>(g_glslXFBComputeController,
patchTables,
vertexStencils, varyingStencils);
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
} else if (g_kernel == kGLSLCompute) {
if (not g_glslComputeController)
g_glslComputeController = new Osd::GLSLComputeController();
topology = new Topology<Osd::GLSLComputeController,
Osd::GLVertexBuffer>(patchTables, vertexStencils, varyingStencils);
Osd::GLVertexBuffer>(g_glslComputeController,
patchTables,
vertexStencils, varyingStencils);
#endif
} else {
}
@ -1257,10 +1273,27 @@ uninitGL() {
if (g_topology)
delete g_topology;
#ifdef OPENSUBDIV_HAS_OPENCL
uninitCL(g_clContext, g_clQueue);
delete g_cpuComputeController;
#ifdef OPENSUBDIV_HAS_OPENMP
delete g_ompComputeController;
#endif
#ifdef OPENSUBDIV_HAS_TBB
delete g_tbbComputeController;
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
delete g_clComputeController;
#endif
#ifdef OPENSUBDIV_HAS_CUDA
delete g_cudaComputeController;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
delete g_glslXFBComputeController;
#endif
#ifdef OPENSUBDIV_HAS_GLSL_COMPUTE
delete g_glslComputeController;
#endif
}
//------------------------------------------------------------------------------
@ -1363,8 +1396,8 @@ callbackKernel(int k) {
g_kernel = k;
#ifdef OPENSUBDIV_HAS_OPENCL
if (g_kernel == kCL and g_clContext == NULL) {
if (initCL(&g_clContext, &g_clQueue) == false) {
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
if (g_clDeviceContext.Initialize() == false) {
printf("Error in initializing OpenCL\n");
exit(1);
}
@ -1372,9 +1405,11 @@ callbackKernel(int k) {
#endif
#ifdef OPENSUBDIV_HAS_CUDA
if (g_kernel == kCUDA and g_cudaInitialized == false) {
g_cudaInitialized = true;
cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
if (g_cudaDeviceContext.Initialize() == false) {
printf("Error in initializing Cuda\n");
exit(1);
}
}
#endif
@ -1457,7 +1492,7 @@ initHUD() {
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
if (HAS_CL_VERSION_1_1()) {
if (CLDeviceContext::HAS_CL_VERSION_1_1()) {
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
}
#endif

View File

@ -67,16 +67,9 @@ OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
#include "../common/clInit.h"
struct CLContext {
cl_context GetContext() const { return clContext; }
cl_command_queue GetCommandQueue() const { return clQueue; }
cl_context clContext;
cl_command_queue clQueue;
};
CLContext g_clContext;
#include "../common/clDeviceContext.h"
CLDeviceContext g_clDeviceContext;
OpenSubdiv::Osd::CLComputeController *g_clComputeController = NULL;
#endif
@ -88,9 +81,9 @@ OpenSubdiv::Osd::CpuComputeController *g_cpuComputeController = NULL;
#include <cuda_runtime_api.h>
#include <cuda_gl_interop.h>
#include "../common/cudaInit.h"
#include "../common/cudaDeviceContext.h"
bool g_cudaInitialized = false;
CudaDeviceContext g_cudaDeviceContext;
OpenSubdiv::Osd::CudaComputeController *g_cudaComputeController = NULL;
#endif
@ -601,17 +594,18 @@ createOsdMesh(ShapeDesc const & shapeDesc, int level, int kernel, Scheme scheme=
} else if(kernel == kCL) {
if (not g_clComputeController) {
g_clComputeController = new OpenSubdiv::Osd::CLComputeController(
g_clContext.clContext, g_clContext.clQueue);
g_clDeviceContext.GetContext(),
g_clDeviceContext.GetCommandQueue());
}
g_mesh = new OpenSubdiv::Osd::Mesh<OpenSubdiv::Osd::CLGLVertexBuffer,
OpenSubdiv::Osd::CLComputeController,
OpenSubdiv::Osd::GLDrawContext,
CLContext>(
CLDeviceContext>(
g_clComputeController,
refiner,
numVertexElements,
numVaryingElements,
level, bits, &g_clContext);
level, bits, &g_clDeviceContext);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
} else if(kernel == kCUDA) {
@ -1406,11 +1400,9 @@ uninitGL() {
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
delete g_clComputeController;
uninitCL(g_clContext.clContext, g_clContext.clQueue);
#endif
#ifdef OPENSUBDIV_HAS_CUDA
delete g_cudaComputeController;
cudaDeviceReset();
#endif
#ifdef OPENSUBDIV_HAS_GLSL_TRANSFORM_FEEDBACK
delete g_glslTransformFeedbackComputeController;
@ -1495,17 +1487,19 @@ callbackKernel(int k) {
g_kernel = k;
#ifdef OPENSUBDIV_HAS_OPENCL
if (g_kernel == kCL and g_clContext.clContext == NULL) {
if (initCL(&g_clContext.clContext, &g_clContext.clQueue) == false) {
if (g_kernel == kCL and (not g_clDeviceContext.IsInitialized())) {
if (g_clDeviceContext.Initialize() == false) {
printf("Error in initializing OpenCL\n");
exit(1);
}
}
#endif
#ifdef OPENSUBDIV_HAS_CUDA
if (g_kernel == kCUDA and g_cudaInitialized == false) {
g_cudaInitialized = true;
cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
if (g_kernel == kCUDA and (not g_cudaDeviceContext.IsInitialized())) {
if (g_cudaDeviceContext.Initialize() == false) {
printf("Error in initializing Cuda\n");
exit(1);
}
}
#endif
@ -1629,7 +1623,7 @@ initHUD() {
g_hud.AddPullDownButton(compute_pulldown, "CUDA", kCUDA);
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
if (HAS_CL_VERSION_1_1()) {
if (CLDeviceContext::HAS_CL_VERSION_1_1()) {
g_hud.AddPullDownButton(compute_pulldown, "OpenCL", kCL);
}
#endif

0
opensubdiv/osd/clD3D11VertexBuffer.cpp Executable file → Normal file
View File

0
opensubdiv/osd/clD3D11VertexBuffer.h Executable file → Normal file
View File

0
opensubdiv/osd/cpuComputeContext.cpp Executable file → Normal file
View File

0
opensubdiv/osd/cpuD3D11VertexBuffer.h Executable file → Normal file
View File

0
opensubdiv/osd/cpuGLVertexBuffer.cpp Executable file → Normal file
View File

0
opensubdiv/osd/cudaComputeContext.cpp Executable file → Normal file
View File

View File

@ -51,7 +51,7 @@ CudaD3D11VertexBuffer *
CudaD3D11VertexBuffer::Create(int numElements, int numVertices,
ID3D11DeviceContext *deviceContext) {
CudaD3D11VertexBuffer *instance =
new CudaD3D11VertexBuffer(numElements, numVertices, device);
new CudaD3D11VertexBuffer(numElements, numVertices);
ID3D11Device *device;
deviceContext->GetDevice(&device);

0
opensubdiv/osd/cudaGLVertexBuffer.cpp Executable file → Normal file
View File

0
opensubdiv/osd/d3d11DrawContext.h Executable file → Normal file
View File

0
opensubdiv/osd/d3d11Mesh.h Executable file → Normal file
View File

0
opensubdiv/osd/glVertexBuffer.cpp Executable file → Normal file
View File

0
opensubdiv/osd/glslComputeContext.cpp Executable file → Normal file
View File

0
opensubdiv/osd/glslTransformFeedbackComputeContext.cpp Executable file → Normal file
View File

View File

@ -42,15 +42,7 @@ if ( GLEW_FOUND )
list(APPEND PLATFORM_LIBRARIES "${GLEW_LIBRARY}")
endif()
if ( OPENCL_FOUND )
list(APPEND PLATFORM_LIBRARIES
"${OPENCL_LIBRARIES}"
)
include_directories( "${OPENCL_INCLUDE_DIRS}" )
endif()
_add_possibly_cuda_executable(osd_regression
_add_executable(osd_regression
"${SOURCE_FILES}"
$<TARGET_OBJECTS:regression_common_obj>
)

View File

@ -55,19 +55,6 @@ GLFWwindow* g_window=0;
#include <far/stencilTablesFactory.h>
#ifdef OPENSUBDIV_HAS_CUDA
#endif
#ifdef OPENSUBDIV_HAS_OPENCL
#include <osd/clComputeContext.h>
#include <osd/clComputeController.h>
#include <osd/clGLVertexBuffer.h>
static cl_context g_clContext;
static cl_command_queue g_clQueue;
#include "../../examples/common/clInit.h" // XXXX TODO move file out of examples
#endif
#include "../../regression/common/cmp_utils.h"
#include "../../regression/common/hbr_utils.h"
#include "../../regression/common/vtr_utils.h"
@ -91,14 +78,12 @@ using namespace OpenSubdiv;
enum BackendType {
kBackendCPU = 0, // raw CPU
kBackendCPUGL = 1, // CPU with GL-backed buffer
kBackendCL = 2, // OpenCL
kBackendCount
};
static const char* g_BackendNames[kBackendCount] = {
"CPU",
"CPUGL",
"CL",
};
static int g_Backend = -1;
@ -350,54 +335,6 @@ checkMeshCPUGL(FarTopologyRefiner *refiner,
return result;
}
//------------------------------------------------------------------------------
static int
checkMeshCL( FarTopologyRefiner *refiner,
const std::vector<xyzVV>& coarseverts,
xyzmesh * refmesh) {
#ifdef OPENSUBDIV_HAS_OPENCL
static Osd::CLComputeController *controller =
new Osd::CLComputeController(g_clContext, g_clQueue);
Far::StencilTables const *vertexStencils;
Far::StencilTables const *varyingStencils;
buildStencilTables(*refiner, &vertexStencils, &varyingStencils);
Osd::CLComputeContext *context = Osd::CLComputeContext::Create(
vertexStencils, varyingStencils, g_clContext);
Osd::CLGLVertexBuffer *vb =
Osd::CLGLVertexBuffer::Create(3, refiner->GetNumVerticesTotal(),
g_clContext);
vb->UpdateData( coarseverts[0].GetPos(), 0, (int)coarseverts.size(),
g_clQueue );
controller->Compute( context, vb );
// read data back from CL buffer
size_t dataSize = vb->GetNumVertices() * vb->GetNumElements();
float* data = new float[dataSize];
clEnqueueReadBuffer (g_clQueue, vb->BindCLBuffer(g_clQueue), CL_TRUE, 0, dataSize * sizeof(float), data, 0, NULL, NULL);
int result = checkVertexBuffer(
*refiner, refmesh, data, vb->GetNumElements());
delete[] data;
delete context;
delete vertexStencils;
delete varyingStencils;
delete vb;
return result;
#else
return 0;
#endif
}
//------------------------------------------------------------------------------
static int
checkMesh( char const * msg, std::string const & shape, int levels, Scheme scheme, int backend ) {
@ -422,9 +359,6 @@ checkMesh( char const * msg, std::string const & shape, int levels, Scheme schem
case kBackendCPUGL:
result = checkMeshCPUGL(refiner, vtrVertexData, refmesh);
break;
case kBackendCL:
result = checkMeshCL(refiner, vtrVertexData, refmesh);
break;
}
delete refmesh;
@ -438,18 +372,6 @@ int checkBackend(int backend, int levels) {
printf("*** checking backend : %s\n", g_BackendNames[backend]);
if (backend == kBackendCL) {
#ifdef OPENSUBDIV_HAS_OPENCL
if (initCL(&g_clContext, &g_clQueue) == false) {
printf(" Cannot initialize OpenCL, skipping...\n");
return 0;
}
#else
printf(" No OpenCL available, skipping...\n");
return 0;
#endif
}
int total = 0;
#define test_catmark_edgeonly
@ -652,13 +574,6 @@ int checkBackend(int backend, int levels) {
total += checkMesh( "test_bilinear_cube", bilinear_cube, levels, kBilinear, backend );
#endif
if (backend == kBackendCL) {
#ifdef OPENSUBDIV_HAS_OPENCL
uninitCL(g_clContext, g_clQueue);
#endif
}
return total;
}