Deduplicate CUDA and OpenCL wranglers
For now it was mainly about OpenCL wrangler being duplicated between Cycles and Compositor, but with OpenSubdiv work those wranglers were gonna to be duplicated just once again. This commit makes it so Cycles and Compositor uses wranglers from this repositories: - https://github.com/CudaWrangler/cuew - https://github.com/OpenCLWrangler/clew This repositories are based on the wranglers we used before and they'll be likely continued maintaining by us plus some more players in the market. Pretty much straightforward change with some tricks in the CMake/SCons to make this libs being passed to the linker after all other libraries in order to make OpenSubdiv linked against those wranglers in the future. For those who're worrying about Cycles being less standalone, it's not truth, it's rather more flexible now and in the future different wranglers might be used in Cycles. For now it'll just mean those libs would need to be put into Cycles repository together with some other libs from Blender such as mikkspace. This is mainly platform maintenance commit, should not be any changes to the user space. Reviewers: juicyfruit, dingto, campbellbarton Reviewed By: juicyfruit, dingto, campbellbarton Differential Revision: https://developer.blender.org/D707
This commit is contained in:
40
extern/cuew/CMakeLists.txt
vendored
Normal file
40
extern/cuew/CMakeLists.txt
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
# ***** BEGIN GPL LICENSE BLOCK *****
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#
|
||||
# The Original Code is Copyright (C) 2006, Blender Foundation
|
||||
# All rights reserved.
|
||||
#
|
||||
# The Original Code is: all of this file.
|
||||
#
|
||||
# Contributor(s): Jacques Beaurain.
|
||||
#
|
||||
# ***** END GPL LICENSE BLOCK *****
|
||||
|
||||
set(INC
|
||||
.
|
||||
include
|
||||
)
|
||||
|
||||
set(INC_SYS
|
||||
|
||||
)
|
||||
|
||||
set(SRC
|
||||
include/cuew.h
|
||||
src/cuew.c
|
||||
)
|
||||
|
||||
blender_add_lib(extern_cuew "${SRC}" "${INC}" "${INC_SYS}")
|
174
extern/cuew/LICENSE
vendored
Normal file
174
extern/cuew/LICENSE
vendored
Normal file
@@ -0,0 +1,174 @@
|
||||
|
||||
Modified Apache 2.0 License
|
||||
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor
|
||||
and its affiliates, except as required to comply with Section 4(c) of
|
||||
the License and to reproduce the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
12
extern/cuew/README
vendored
Normal file
12
extern/cuew/README
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
The CUDA Extension Wrangler Library (CUEW) is a cross-platform open-source
|
||||
C/C++ extension loading library. CUEW provides efficient run-time mechanisms
|
||||
for determining which CUDA functions and extensions extensions are supported
|
||||
on the target platform.
|
||||
|
||||
CUDA core and extension functionality is exposed in a single header file.
|
||||
GUEW has been tested on a variety of operating systems, including Windows,
|
||||
Linux, Mac OS X.
|
||||
|
||||
LICENSE
|
||||
|
||||
CUEW library is released under the Apache 2.0 license.
|
35
extern/cuew/SConscript
vendored
Normal file
35
extern/cuew/SConscript
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# ***** BEGIN GPL LICENSE BLOCK *****
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation,
|
||||
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#
|
||||
# The Original Code is Copyright (C) 2006, Blender Foundation
|
||||
# All rights reserved.
|
||||
#
|
||||
# The Original Code is: all of this file.
|
||||
#
|
||||
# Contributor(s): Nathan Letwory.
|
||||
#
|
||||
# ***** END GPL LICENSE BLOCK *****
|
||||
|
||||
Import ('env')
|
||||
|
||||
sources = env.Glob('src/cuew.c')
|
||||
|
||||
incs = 'include'
|
||||
defs = []
|
||||
|
||||
env.BlenderLib ('extern_cuew', sources, Split(incs), defines=defs, libtype=['system'], priority = [0])
|
35
extern/cuew/auto/cuda_errors.py
vendored
Normal file
35
extern/cuew/auto/cuda_errors.py
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
CUDA_ERRORS={
|
||||
'CUDA_SUCCESS': "No errors",
|
||||
'CUDA_ERROR_INVALID_VALUE': "Invalid value",
|
||||
'CUDA_ERROR_OUT_OF_MEMORY': "Out of memory",
|
||||
'CUDA_ERROR_NOT_INITIALIZED': "Driver not initialized",
|
||||
'CUDA_ERROR_DEINITIALIZED': "Driver deinitialized",
|
||||
'CUDA_ERROR_NO_DEVICE': "No CUDA-capable device available",
|
||||
'CUDA_ERROR_INVALID_DEVICE': "Invalid device",
|
||||
'CUDA_ERROR_INVALID_IMAGE': "Invalid kernel image",
|
||||
'CUDA_ERROR_INVALID_CONTEXT': "Invalid context",
|
||||
'CUDA_ERROR_CONTEXT_ALREADY_CURRENT': "Context already current",
|
||||
'CUDA_ERROR_MAP_FAILED': "Map failed",
|
||||
'CUDA_ERROR_UNMAP_FAILED': "Unmap failed",
|
||||
'CUDA_ERROR_ARRAY_IS_MAPPED': "Array is mapped",
|
||||
'CUDA_ERROR_ALREADY_MAPPED': "Already mapped",
|
||||
'CUDA_ERROR_NO_BINARY_FOR_GPU': "No binary for GPU",
|
||||
'CUDA_ERROR_ALREADY_ACQUIRED': "Already acquired",
|
||||
'CUDA_ERROR_NOT_MAPPED': "Not mapped",
|
||||
'CUDA_ERROR_NOT_MAPPED_AS_ARRAY': "Mapped resource not available for access as an array",
|
||||
'CUDA_ERROR_NOT_MAPPED_AS_POINTER': "Mapped resource not available for access as a pointer",
|
||||
'CUDA_ERROR_ECC_UNCORRECTABLE': "Uncorrectable ECC error detected",
|
||||
'CUDA_ERROR_UNSUPPORTED_LIMIT': "CUlimit not supported by device",
|
||||
'CUDA_ERROR_INVALID_SOURCE': "Invalid source",
|
||||
'CUDA_ERROR_FILE_NOT_FOUND': "File not found",
|
||||
'CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND': "Link to a shared object failed to resolve",
|
||||
'CUDA_ERROR_SHARED_OBJECT_INIT_FAILED': "Shared object initialization failed",
|
||||
'CUDA_ERROR_INVALID_HANDLE': "Invalid handle",
|
||||
'CUDA_ERROR_NOT_FOUND': "Not found",
|
||||
'CUDA_ERROR_NOT_READY': "CUDA not ready",
|
||||
'CUDA_ERROR_LAUNCH_FAILED': "Launch failed",
|
||||
'CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES': "Launch exceeded resources",
|
||||
'CUDA_ERROR_LAUNCH_TIMEOUT': "Launch exceeded timeout",
|
||||
'CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING': "Launch with incompatible texturing",
|
||||
'CUDA_ERROR_UNKNOWN': "Unknown error",
|
||||
}
|
125
extern/cuew/auto/cuda_extra.py
vendored
Normal file
125
extern/cuew/auto/cuda_extra.py
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
extra_code = """
|
||||
static void path_join(const char *path1,
|
||||
const char *path2,
|
||||
int maxlen,
|
||||
char *result) {
|
||||
#if defined(WIN32) || defined(_WIN32)
|
||||
const char separator = '\\\\';
|
||||
#else
|
||||
const char separator = '/';
|
||||
#endif
|
||||
int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
|
||||
if (n != -1 && n < maxlen) {
|
||||
result[n] = '\\0';
|
||||
}
|
||||
else {
|
||||
result[maxlen - 1] = '\\0';
|
||||
}
|
||||
}
|
||||
|
||||
static int path_exists(const char *path) {
|
||||
struct stat st;
|
||||
if (stat(path, &st)) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *cuewCompilerPath(void) {
|
||||
#ifdef _WIN32
|
||||
const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
|
||||
const char *executable = "nvcc.exe";
|
||||
#else
|
||||
const char *defaultpaths[] = {
|
||||
"/Developer/NVIDIA/CUDA-5.0/bin",
|
||||
"/usr/local/cuda-5.0/bin",
|
||||
"/usr/local/cuda/bin",
|
||||
"/Developer/NVIDIA/CUDA-6.0/bin",
|
||||
"/usr/local/cuda-6.0/bin",
|
||||
"/Developer/NVIDIA/CUDA-5.5/bin",
|
||||
"/usr/local/cuda-5.5/bin",
|
||||
NULL};
|
||||
const char *executable = "nvcc";
|
||||
#endif
|
||||
int i;
|
||||
|
||||
const char *binpath = getenv("CUDA_BIN_PATH");
|
||||
|
||||
static char nvcc[65536];
|
||||
|
||||
if (binpath) {
|
||||
path_join(binpath, executable, sizeof(nvcc), nvcc);
|
||||
if (path_exists(nvcc))
|
||||
return nvcc;
|
||||
}
|
||||
|
||||
for (i = 0; defaultpaths[i]; ++i) {
|
||||
path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
|
||||
if (path_exists(nvcc))
|
||||
return nvcc;
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
{
|
||||
FILE *handle = popen("which nvcc", "r");
|
||||
if (handle) {
|
||||
char buffer[4096] = {0};
|
||||
int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
|
||||
buffer[len] = '\\0';
|
||||
pclose(handle);
|
||||
|
||||
if (buffer[0])
|
||||
return "nvcc";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int cuewCompilerVersion(void) {
|
||||
const char *path = cuewCompilerPath();
|
||||
const char *marker = "Cuda compilation tools, release ";
|
||||
FILE *pipe;
|
||||
int major, minor;
|
||||
char *versionstr;
|
||||
char buf[128];
|
||||
char output[65536] = "\\0";
|
||||
char command[65536] = "\\0";
|
||||
|
||||
if (path == NULL)
|
||||
return 0;
|
||||
|
||||
/* get --version output */
|
||||
strncpy(command, path, sizeof(command));
|
||||
strncat(command, " --version", sizeof(command) - strlen(path));
|
||||
pipe = popen(command, "r");
|
||||
if (!pipe) {
|
||||
fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (!feof(pipe)) {
|
||||
if (fgets(buf, sizeof(buf), pipe) != NULL) {
|
||||
strncat(output, buf, sizeof(output) - strlen(output));
|
||||
}
|
||||
}
|
||||
|
||||
pclose(pipe);
|
||||
|
||||
/* parse version number */
|
||||
versionstr = strstr(output, marker);
|
||||
if (versionstr == NULL) {
|
||||
fprintf(stderr, "CUDA: failed to find version number in:\\n\\n%s\\n", output);
|
||||
return 0;
|
||||
}
|
||||
versionstr += strlen(marker);
|
||||
|
||||
if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
|
||||
fprintf(stderr, "CUDA: failed to parse version number from:\\n\\n%s\\n", output);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 10 * major + minor;
|
||||
}
|
||||
"""
|
591
extern/cuew/auto/cuew_gen.py
vendored
Normal file
591
extern/cuew/auto/cuew_gen.py
vendored
Normal file
@@ -0,0 +1,591 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright 2014 Blender Foundation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License
|
||||
|
||||
# This script generates either header or implementation file from
|
||||
# a CUDA header files.
|
||||
#
|
||||
# Usage: cuew hdr|impl [/path/to/cuda/includes]
|
||||
# - hdr means header file will be generated and printed to stdout.
|
||||
# - impl means implementation file will be generated and printed to stdout.
|
||||
# - /path/to/cuda/includes is a path to a folder with cuda.h and cudaGL.h
|
||||
# for which wrangler will be generated.
|
||||
|
||||
import os
|
||||
import sys
|
||||
from cuda_errors import CUDA_ERRORS
|
||||
from pycparser import c_parser, c_ast, parse_file
|
||||
from subprocess import Popen, PIPE
|
||||
|
||||
INCLUDE_DIR = "/usr/include"
|
||||
LIB = "CUEW"
|
||||
REAL_LIB = "CUDA"
|
||||
VERSION_MAJOR = "1"
|
||||
VERSION_MINOR = "2"
|
||||
COPYRIGHT = """/*
|
||||
* Copyright 2011-2014 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License
|
||||
*/"""
|
||||
FILES = ["cuda.h", "cudaGL.h"]
|
||||
|
||||
TYPEDEFS = []
|
||||
FUNC_TYPEDEFS = []
|
||||
SYMBOLS = []
|
||||
DEFINES = []
|
||||
DEFINES_V2 = []
|
||||
ERRORS = []
|
||||
|
||||
|
||||
class FuncDefVisitor(c_ast.NodeVisitor):
|
||||
indent = 0
|
||||
prev_complex = False
|
||||
dummy_typedefs = ['size_t', 'CUdeviceptr']
|
||||
|
||||
def _get_quals_string(self, node):
|
||||
if node.quals:
|
||||
return ' '.join(node.quals) + ' '
|
||||
return ''
|
||||
|
||||
def _get_ident_type(self, node):
|
||||
if isinstance(node, c_ast.PtrDecl):
|
||||
return self._get_ident_type(node.type.type) + '*'
|
||||
if isinstance(node, c_ast.ArrayDecl):
|
||||
return self._get_ident_type(node.type)
|
||||
elif isinstance(node, c_ast.Struct):
|
||||
if node.name:
|
||||
return 'struct ' + node.name
|
||||
else:
|
||||
self.indent += 1
|
||||
struct = self._stringify_struct(node)
|
||||
self.indent -= 1
|
||||
return "struct {\n" + \
|
||||
struct + (" " * self.indent) + "}"
|
||||
elif isinstance(node, c_ast.Union):
|
||||
self.indent += 1
|
||||
union = self._stringify_struct(node)
|
||||
self.indent -= 1
|
||||
return "union {\n" + union + (" " * self.indent) + "}"
|
||||
elif isinstance(node, c_ast.Enum):
|
||||
return 'enum ' + node.name
|
||||
elif isinstance(node, c_ast.TypeDecl):
|
||||
return self._get_ident_type(node.type)
|
||||
else:
|
||||
return node.names[0]
|
||||
|
||||
def _stringify_param(self, param):
|
||||
param_type = param.type
|
||||
result = self._get_quals_string(param)
|
||||
result += self._get_ident_type(param_type)
|
||||
if param.name:
|
||||
result += ' ' + param.name
|
||||
if isinstance(param_type, c_ast.ArrayDecl):
|
||||
# TODO(sergey): Workaround to deal with the
|
||||
# preprocessed file where array size got
|
||||
# substituded.
|
||||
dim = param_type.dim.value
|
||||
if param.name == "reserved" and dim == "64":
|
||||
dim = "CU_IPC_HANDLE_SIZE"
|
||||
result += '[' + dim + ']'
|
||||
return result
|
||||
|
||||
def _stringify_params(self, params):
|
||||
result = []
|
||||
for param in params:
|
||||
result.append(self._stringify_param(param))
|
||||
return ', '.join(result)
|
||||
|
||||
def _stringify_struct(self, node):
|
||||
result = ""
|
||||
children = node.children()
|
||||
for child in children:
|
||||
member = self._stringify_param(child[1])
|
||||
result += (" " * self.indent) + member + ";\n"
|
||||
return result
|
||||
|
||||
def _stringify_enum(self, node):
|
||||
result = ""
|
||||
children = node.children()
|
||||
for child in children:
|
||||
if isinstance(child[1], c_ast.EnumeratorList):
|
||||
enumerators = child[1].enumerators
|
||||
for enumerator in enumerators:
|
||||
result += (" " * self.indent) + enumerator.name
|
||||
if enumerator.value:
|
||||
result += " = " + enumerator.value.value
|
||||
result += ",\n"
|
||||
if enumerator.name.startswith("CUDA_ERROR_"):
|
||||
ERRORS.append(enumerator.name)
|
||||
return result
|
||||
|
||||
def visit_Decl(self, node):
|
||||
if node.type.__class__.__name__ == 'FuncDecl':
|
||||
if isinstance(node.type, c_ast.FuncDecl):
|
||||
func_decl = node.type
|
||||
func_decl_type = func_decl.type
|
||||
|
||||
typedef = 'typedef '
|
||||
symbol_name = None
|
||||
|
||||
if isinstance(func_decl_type, c_ast.TypeDecl):
|
||||
symbol_name = func_decl_type.declname
|
||||
typedef += self._get_quals_string(func_decl_type)
|
||||
typedef += self._get_ident_type(func_decl_type.type)
|
||||
typedef += ' CUDAAPI'
|
||||
typedef += ' t' + symbol_name
|
||||
elif isinstance(func_decl_type, c_ast.PtrDecl):
|
||||
ptr_type = func_decl_type.type
|
||||
symbol_name = ptr_type.declname
|
||||
typedef += self._get_quals_string(ptr_type)
|
||||
typedef += self._get_ident_type(func_decl_type)
|
||||
typedef += ' CUDAAPI'
|
||||
typedef += ' t' + symbol_name
|
||||
|
||||
typedef += '(' + \
|
||||
self._stringify_params(func_decl.args.params) + \
|
||||
');'
|
||||
|
||||
SYMBOLS.append(symbol_name)
|
||||
FUNC_TYPEDEFS.append(typedef)
|
||||
|
||||
def visit_Typedef(self, node):
|
||||
if node.name in self.dummy_typedefs:
|
||||
return
|
||||
|
||||
complex = False
|
||||
type = self._get_ident_type(node.type)
|
||||
quals = self._get_quals_string(node)
|
||||
|
||||
if isinstance(node.type.type, c_ast.Struct):
|
||||
self.indent += 1
|
||||
struct = self._stringify_struct(node.type.type)
|
||||
self.indent -= 1
|
||||
typedef = quals + type + " {\n" + struct + "} " + node.name
|
||||
complex = True
|
||||
elif isinstance(node.type.type, c_ast.Enum):
|
||||
self.indent += 1
|
||||
enum = self._stringify_enum(node.type.type)
|
||||
self.indent -= 1
|
||||
typedef = quals + type + " {\n" + enum + "} " + node.name
|
||||
complex = True
|
||||
else:
|
||||
typedef = quals + type + " " + node.name
|
||||
if complex or self.prev_complex:
|
||||
typedef = "\ntypedef " + typedef + ";"
|
||||
else:
|
||||
typedef = "typedef " + typedef + ";"
|
||||
|
||||
TYPEDEFS.append(typedef)
|
||||
|
||||
self.prev_complex = complex
|
||||
|
||||
|
||||
def get_latest_cpp():
|
||||
path_prefix = "/usr/bin"
|
||||
for cpp_version in ["9", "8", "7", "6", "5", "4"]:
|
||||
test_cpp = os.path.join(path_prefix, "cpp-4." + cpp_version)
|
||||
if os.path.exists(test_cpp):
|
||||
return test_cpp
|
||||
return None
|
||||
|
||||
|
||||
def preprocess_file(filename, cpp_path):
|
||||
args = [cpp_path, "-I./"]
|
||||
if filename.endswith("GL.h"):
|
||||
args.append("-DCUDAAPI= ")
|
||||
args.append(filename)
|
||||
|
||||
try:
|
||||
pipe = Popen(args,
|
||||
stdout=PIPE,
|
||||
universal_newlines=True)
|
||||
text = pipe.communicate()[0]
|
||||
except OSError as e:
|
||||
raise RuntimeError("Unable to invoke 'cpp'. " +
|
||||
'Make sure its path was passed correctly\n' +
|
||||
('Original error: %s' % e))
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def parse_files():
|
||||
parser = c_parser.CParser()
|
||||
cpp_path = get_latest_cpp()
|
||||
|
||||
for filename in FILES:
|
||||
filepath = os.path.join(INCLUDE_DIR, filename)
|
||||
dummy_typedefs = {}
|
||||
text = preprocess_file(filepath, cpp_path)
|
||||
|
||||
if filepath.endswith("GL.h"):
|
||||
dummy_typedefs = {
|
||||
"CUresult": "int",
|
||||
"CUgraphicsResource": "void *",
|
||||
"CUdevice": "void *",
|
||||
"CUcontext": "void *",
|
||||
"CUdeviceptr": "void *",
|
||||
"CUstream": "void *"
|
||||
}
|
||||
|
||||
text = "typedef int GLint;\n" + text
|
||||
text = "typedef unsigned int GLuint;\n" + text
|
||||
text = "typedef unsigned int GLenum;\n" + text
|
||||
text = "typedef long size_t;\n" + text
|
||||
|
||||
for typedef in sorted(dummy_typedefs):
|
||||
text = "typedef " + dummy_typedefs[typedef] + " " + \
|
||||
typedef + ";\n" + text
|
||||
|
||||
ast = parser.parse(text, filepath)
|
||||
|
||||
with open(filepath) as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
if line.startswith("#define"):
|
||||
line = line[8:-1]
|
||||
token = line.split()
|
||||
if token[0] not in ("__cuda_cuda_h__",
|
||||
"CUDA_CB",
|
||||
"CUDAAPI"):
|
||||
DEFINES.append(token)
|
||||
|
||||
for line in lines:
|
||||
# TODO(sergey): Use better matching rule for _v2 symbols.
|
||||
if line[0].isspace() and line.lstrip().startswith("#define"):
|
||||
line = line[12:-1]
|
||||
token = line.split()
|
||||
if len(token) == 2 and token[1].endswith("_v2"):
|
||||
DEFINES_V2.append(token)
|
||||
|
||||
v = FuncDefVisitor()
|
||||
for typedef in dummy_typedefs:
|
||||
v.dummy_typedefs.append(typedef)
|
||||
v.visit(ast)
|
||||
|
||||
FUNC_TYPEDEFS.append('')
|
||||
SYMBOLS.append('')
|
||||
|
||||
|
||||
def print_copyright():
|
||||
print(COPYRIGHT)
|
||||
print("")
|
||||
|
||||
|
||||
def open_header_guard():
|
||||
print("#ifndef __%s_H__" % (LIB))
|
||||
print("#define __%s_H__" % (LIB))
|
||||
print("")
|
||||
print("#ifdef __cplusplus")
|
||||
print("extern \"C\" {")
|
||||
print("#endif")
|
||||
print("")
|
||||
|
||||
|
||||
def close_header_guard():
|
||||
print("")
|
||||
print("#ifdef __cplusplus")
|
||||
print("}")
|
||||
print("#endif")
|
||||
print("")
|
||||
print("#endif /* __%s_H__ */" % (LIB))
|
||||
|
||||
|
||||
def print_header():
|
||||
print_copyright()
|
||||
open_header_guard()
|
||||
|
||||
# Fot size_t.
|
||||
print("#include <stdlib.h>")
|
||||
print("")
|
||||
|
||||
print("/* Defines. */")
|
||||
print("#define %s_VERSION_MAJOR %s" % (LIB, VERSION_MAJOR))
|
||||
print("#define %s_VERSION_MINOR %s" % (LIB, VERSION_MINOR))
|
||||
print("")
|
||||
for define in DEFINES:
|
||||
print('#define %s' % (' '.join(define)))
|
||||
print("")
|
||||
|
||||
print("""/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
|
||||
* the cuda library has both the old ones for compatibility and new
|
||||
* ones with _v2 postfix,
|
||||
*/""")
|
||||
for define in DEFINES_V2:
|
||||
print('#define %s' % (' '.join(define)))
|
||||
print("")
|
||||
|
||||
print("/* Types. */")
|
||||
|
||||
# We handle this specially because of the file is
|
||||
# getting preprocessed.
|
||||
print("""#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
|
||||
typedef unsigned long long CUdeviceptr;
|
||||
#else
|
||||
typedef unsigned int CUdeviceptr;
|
||||
#endif
|
||||
""")
|
||||
|
||||
for typedef in TYPEDEFS:
|
||||
print('%s' % (typedef))
|
||||
|
||||
# TDO(sergey): This is only specific to CUDA wrapper.
|
||||
print("""
|
||||
#ifdef _WIN32
|
||||
# define CUDAAPI __stdcall
|
||||
# define CUDA_CB __stdcall
|
||||
#else
|
||||
# define CUDAAPI
|
||||
# define CUDA_CB
|
||||
#endif
|
||||
""")
|
||||
|
||||
print("/* Function types. */")
|
||||
for func_typedef in FUNC_TYPEDEFS:
|
||||
print('%s' % (func_typedef))
|
||||
print("")
|
||||
|
||||
print("/* Function declarations. */")
|
||||
for symbol in SYMBOLS:
|
||||
if symbol:
|
||||
print('extern t%s *%s;' % (symbol, symbol))
|
||||
else:
|
||||
print("")
|
||||
|
||||
print("")
|
||||
print("enum {")
|
||||
print(" CUEW_SUCCESS = 0,")
|
||||
print(" CUEW_ERROR_OPEN_FAILED = -1,")
|
||||
print(" CUEW_ERROR_ATEXIT_FAILED = -2,")
|
||||
print("};")
|
||||
print("")
|
||||
print("int %sInit(void);" % (LIB.lower()))
|
||||
# TODO(sergey): Get rid of hardcoded CUresult.
|
||||
print("const char *%sErrorString(CUresult result);" % (LIB.lower()))
|
||||
print("const char *cuewCompilerPath(void);")
|
||||
print("int cuewCompilerVersion(void);")
|
||||
|
||||
close_header_guard()
|
||||
|
||||
|
||||
def print_dl_wrapper():
|
||||
print("""#ifdef _WIN32
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# define VC_EXTRALEAN
|
||||
# include <windows.h>
|
||||
|
||||
/* Utility macros. */
|
||||
|
||||
typedef HMODULE DynamicLibrary;
|
||||
|
||||
# define dynamic_library_open(path) LoadLibrary(path)
|
||||
# define dynamic_library_close(lib) FreeLibrary(lib)
|
||||
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
|
||||
#else
|
||||
# include <dlfcn.h>
|
||||
|
||||
typedef void* DynamicLibrary;
|
||||
|
||||
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
|
||||
# define dynamic_library_close(lib) dlclose(lib)
|
||||
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
|
||||
#endif
|
||||
""")
|
||||
|
||||
|
||||
def print_dl_helper_macro():
|
||||
print("""#define %s_LIBRARY_FIND_CHECKED(name) \\
|
||||
name = (t##name *)dynamic_library_find(lib, #name);
|
||||
|
||||
#define %s_LIBRARY_FIND(name) \\
|
||||
name = (t##name *)dynamic_library_find(lib, #name); \\
|
||||
assert(name);
|
||||
|
||||
static DynamicLibrary lib;""" % (REAL_LIB, REAL_LIB))
|
||||
print("")
|
||||
|
||||
|
||||
def print_dl_close():
|
||||
print("""static void %sExit(void) {
|
||||
if(lib != NULL) {
|
||||
/* Ignore errors. */
|
||||
dynamic_library_close(lib);
|
||||
lib = NULL;
|
||||
}
|
||||
}""" % (LIB.lower()))
|
||||
print("")
|
||||
|
||||
|
||||
def print_lib_path():
|
||||
# TODO(sergey): get rid of hardcoded libraries.
|
||||
print("""#ifdef _WIN32
|
||||
/* Expected in c:/windows/system or similar, no path needed. */
|
||||
const char *path = "nvcuda.dll";
|
||||
#elif defined(__APPLE__)
|
||||
/* Default installation path. */
|
||||
const char *path = "/usr/local/cuda/lib/libcuda.dylib";
|
||||
#else
|
||||
const char *path = "libcuda.so";
|
||||
#endif""")
|
||||
|
||||
|
||||
def print_init_guard():
|
||||
print(""" static int initialized = 0;
|
||||
static int result = 0;
|
||||
int error, driver_version;
|
||||
|
||||
if (initialized) {
|
||||
return result;
|
||||
}
|
||||
|
||||
initialized = 1;
|
||||
|
||||
error = atexit(cuewExit);
|
||||
if (error) {
|
||||
result = CUEW_ERROR_ATEXIT_FAILED;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Load library. */
|
||||
lib = dynamic_library_open(path);
|
||||
|
||||
if (lib == NULL) {
|
||||
result = CUEW_ERROR_OPEN_FAILED;
|
||||
return result;
|
||||
}""")
|
||||
print("")
|
||||
|
||||
|
||||
def print_driver_version_guard():
|
||||
# TODO(sergey): Currently it's hardcoded for CUDA only.
|
||||
print(""" /* Detect driver version. */
|
||||
driver_version = 1000;
|
||||
|
||||
%s_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
|
||||
if (cuDriverGetVersion) {
|
||||
cuDriverGetVersion(&driver_version);
|
||||
}
|
||||
|
||||
/* We require version 4.0. */
|
||||
if (driver_version < 4000) {
|
||||
result = CUEW_ERROR_OPEN_FAILED;
|
||||
return result;
|
||||
}""" % (REAL_LIB))
|
||||
|
||||
|
||||
def print_dl_init():
|
||||
print("int %sInit(void) {" % (LIB.lower()))
|
||||
|
||||
print(" /* Library paths. */")
|
||||
print_lib_path()
|
||||
print_init_guard()
|
||||
print_driver_version_guard()
|
||||
|
||||
print(" /* Fetch all function pointers. */")
|
||||
for symbol in SYMBOLS:
|
||||
if symbol:
|
||||
print(" %s_LIBRARY_FIND(%s);" % (REAL_LIB, symbol))
|
||||
else:
|
||||
print("")
|
||||
|
||||
print("")
|
||||
print(" result = CUEW_SUCCESS;")
|
||||
print(" return result;")
|
||||
|
||||
print("}")
|
||||
|
||||
|
||||
def print_implementation():
|
||||
print_copyright()
|
||||
|
||||
# TODO(sergey): Get rid of hardcoded header.
|
||||
print("""#ifdef _MSC_VER
|
||||
# define snprintf _snprintf
|
||||
# define popen _popen
|
||||
# define pclose _pclose
|
||||
# define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
""")
|
||||
print("#include <cuew.h>")
|
||||
print("#include <assert.h>")
|
||||
print("#include <stdio.h>")
|
||||
print("#include <string.h>")
|
||||
print("#include <sys/stat.h>")
|
||||
print("")
|
||||
|
||||
print_dl_wrapper()
|
||||
print_dl_helper_macro()
|
||||
|
||||
print("/* Function definitions. */")
|
||||
for symbol in SYMBOLS:
|
||||
if symbol:
|
||||
print('t%s *%s;' % (symbol, symbol))
|
||||
else:
|
||||
print("")
|
||||
print("")
|
||||
|
||||
print_dl_close()
|
||||
|
||||
print("/* Implementation function. */")
|
||||
print_dl_init()
|
||||
|
||||
print("")
|
||||
# TODO(sergey): Get rid of hardcoded CUresult.
|
||||
print("const char *%sErrorString(CUresult result) {" % (LIB.lower()))
|
||||
print(" switch(result) {")
|
||||
print(" case CUDA_SUCCESS: return \"No errors\";")
|
||||
|
||||
for error in ERRORS:
|
||||
if error in CUDA_ERRORS:
|
||||
str = CUDA_ERRORS[error]
|
||||
else:
|
||||
str = error[11:]
|
||||
print(" case %s: return \"%s\";" % (error, str))
|
||||
|
||||
print(" default: return \"Unknown CUDA error value\";")
|
||||
print(" }")
|
||||
print("}")
|
||||
|
||||
from cuda_extra import extra_code
|
||||
print(extra_code)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) != 2 and len(sys.argv) != 3:
|
||||
print("Usage: %s hdr|impl [/path/to/cuda/toolkit/include]" %
|
||||
(sys.argv[0]))
|
||||
exit(1)
|
||||
|
||||
if len(sys.argv) == 3:
|
||||
INCLUDE_DIR = sys.argv[2]
|
||||
|
||||
parse_files()
|
||||
|
||||
if sys.argv[1] == "hdr":
|
||||
print_header()
|
||||
elif sys.argv[1] == "impl":
|
||||
print_implementation()
|
||||
else:
|
||||
print("Unknown command %s" % (sys.argv[1]))
|
||||
exit(1)
|
10
extern/cuew/auto/cuew_gen.sh
vendored
Executable file
10
extern/cuew/auto/cuew_gen.sh
vendored
Executable file
@@ -0,0 +1,10 @@
|
||||
#!/bin/sh
|
||||
|
||||
# This script invokes cuew_gen.py and updates the
|
||||
# header and source files in the repository.
|
||||
|
||||
SCRIPT=`realpath -s $0`
|
||||
DIR=`dirname $SCRIPT`
|
||||
|
||||
python ${DIR}/cuew_gen.py hdr $@ > $DIR/../include/cuew.h
|
||||
python ${DIR}/cuew_gen.py impl $@ > $DIR/../src/cuew.c
|
3
extern/cuew/auto/stdlib.h
vendored
Normal file
3
extern/cuew/auto/stdlib.h
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
/* This file is needed to workaround issue with parsing system headers. */
|
||||
|
||||
typedef long size_t;
|
1138
extern/cuew/include/cuew.h
vendored
Normal file
1138
extern/cuew/include/cuew.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
710
extern/cuew/src/cuew.c
vendored
Normal file
710
extern/cuew/src/cuew.c
vendored
Normal file
@@ -0,0 +1,710 @@
|
||||
/*
|
||||
* Copyright 2011-2014 Blender Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# define snprintf _snprintf
|
||||
# define popen _popen
|
||||
# define pclose _pclose
|
||||
# define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#include <cuew.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
# define VC_EXTRALEAN
|
||||
# include <windows.h>
|
||||
|
||||
/* Utility macros. */
|
||||
|
||||
typedef HMODULE DynamicLibrary;
|
||||
|
||||
# define dynamic_library_open(path) LoadLibrary(path)
|
||||
# define dynamic_library_close(lib) FreeLibrary(lib)
|
||||
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
|
||||
#else
|
||||
# include <dlfcn.h>
|
||||
|
||||
typedef void* DynamicLibrary;
|
||||
|
||||
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
|
||||
# define dynamic_library_close(lib) dlclose(lib)
|
||||
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
|
||||
#endif
|
||||
|
||||
#define CUDA_LIBRARY_FIND_CHECKED(name) \
|
||||
name = (t##name *)dynamic_library_find(lib, #name);
|
||||
|
||||
#define CUDA_LIBRARY_FIND(name) \
|
||||
name = (t##name *)dynamic_library_find(lib, #name); \
|
||||
assert(name);
|
||||
|
||||
static DynamicLibrary lib;
|
||||
|
||||
/* Function definitions. */
|
||||
tcuGetErrorString *cuGetErrorString;
|
||||
tcuGetErrorName *cuGetErrorName;
|
||||
tcuInit *cuInit;
|
||||
tcuDriverGetVersion *cuDriverGetVersion;
|
||||
tcuDeviceGet *cuDeviceGet;
|
||||
tcuDeviceGetCount *cuDeviceGetCount;
|
||||
tcuDeviceGetName *cuDeviceGetName;
|
||||
tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
|
||||
tcuDeviceGetAttribute *cuDeviceGetAttribute;
|
||||
tcuDeviceGetProperties *cuDeviceGetProperties;
|
||||
tcuDeviceComputeCapability *cuDeviceComputeCapability;
|
||||
tcuCtxCreate_v2 *cuCtxCreate_v2;
|
||||
tcuCtxDestroy_v2 *cuCtxDestroy_v2;
|
||||
tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
|
||||
tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
|
||||
tcuCtxSetCurrent *cuCtxSetCurrent;
|
||||
tcuCtxGetCurrent *cuCtxGetCurrent;
|
||||
tcuCtxGetDevice *cuCtxGetDevice;
|
||||
tcuCtxSynchronize *cuCtxSynchronize;
|
||||
tcuCtxSetLimit *cuCtxSetLimit;
|
||||
tcuCtxGetLimit *cuCtxGetLimit;
|
||||
tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
|
||||
tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
|
||||
tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
|
||||
tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
|
||||
tcuCtxGetApiVersion *cuCtxGetApiVersion;
|
||||
tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
|
||||
tcuCtxAttach *cuCtxAttach;
|
||||
tcuCtxDetach *cuCtxDetach;
|
||||
tcuModuleLoad *cuModuleLoad;
|
||||
tcuModuleLoadData *cuModuleLoadData;
|
||||
tcuModuleLoadDataEx *cuModuleLoadDataEx;
|
||||
tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
|
||||
tcuModuleUnload *cuModuleUnload;
|
||||
tcuModuleGetFunction *cuModuleGetFunction;
|
||||
tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
|
||||
tcuModuleGetTexRef *cuModuleGetTexRef;
|
||||
tcuModuleGetSurfRef *cuModuleGetSurfRef;
|
||||
tcuLinkCreate *cuLinkCreate;
|
||||
tcuLinkAddData *cuLinkAddData;
|
||||
tcuLinkAddFile *cuLinkAddFile;
|
||||
tcuLinkComplete *cuLinkComplete;
|
||||
tcuLinkDestroy *cuLinkDestroy;
|
||||
tcuMemGetInfo_v2 *cuMemGetInfo_v2;
|
||||
tcuMemAlloc_v2 *cuMemAlloc_v2;
|
||||
tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
|
||||
tcuMemFree_v2 *cuMemFree_v2;
|
||||
tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
|
||||
tcuMemAllocHost_v2 *cuMemAllocHost_v2;
|
||||
tcuMemFreeHost *cuMemFreeHost;
|
||||
tcuMemHostAlloc *cuMemHostAlloc;
|
||||
tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
|
||||
tcuMemHostGetFlags *cuMemHostGetFlags;
|
||||
tcuMemAllocManaged *cuMemAllocManaged;
|
||||
tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
|
||||
tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
|
||||
tcuIpcGetEventHandle *cuIpcGetEventHandle;
|
||||
tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
|
||||
tcuIpcGetMemHandle *cuIpcGetMemHandle;
|
||||
tcuIpcOpenMemHandle *cuIpcOpenMemHandle;
|
||||
tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
|
||||
tcuMemHostRegister *cuMemHostRegister;
|
||||
tcuMemHostUnregister *cuMemHostUnregister;
|
||||
tcuMemcpy *cuMemcpy;
|
||||
tcuMemcpyPeer *cuMemcpyPeer;
|
||||
tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
|
||||
tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
|
||||
tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
|
||||
tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
|
||||
tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
|
||||
tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
|
||||
tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
|
||||
tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
|
||||
tcuMemcpy2D_v2 *cuMemcpy2D_v2;
|
||||
tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
|
||||
tcuMemcpy3D_v2 *cuMemcpy3D_v2;
|
||||
tcuMemcpy3DPeer *cuMemcpy3DPeer;
|
||||
tcuMemcpyAsync *cuMemcpyAsync;
|
||||
tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
|
||||
tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
|
||||
tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
|
||||
tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
|
||||
tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
|
||||
tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
|
||||
tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
|
||||
tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
|
||||
tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
|
||||
tcuMemsetD8_v2 *cuMemsetD8_v2;
|
||||
tcuMemsetD16_v2 *cuMemsetD16_v2;
|
||||
tcuMemsetD32_v2 *cuMemsetD32_v2;
|
||||
tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
|
||||
tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
|
||||
tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
|
||||
tcuMemsetD8Async *cuMemsetD8Async;
|
||||
tcuMemsetD16Async *cuMemsetD16Async;
|
||||
tcuMemsetD32Async *cuMemsetD32Async;
|
||||
tcuMemsetD2D8Async *cuMemsetD2D8Async;
|
||||
tcuMemsetD2D16Async *cuMemsetD2D16Async;
|
||||
tcuMemsetD2D32Async *cuMemsetD2D32Async;
|
||||
tcuArrayCreate_v2 *cuArrayCreate_v2;
|
||||
tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
|
||||
tcuArrayDestroy *cuArrayDestroy;
|
||||
tcuArray3DCreate_v2 *cuArray3DCreate_v2;
|
||||
tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
|
||||
tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
|
||||
tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
|
||||
tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
|
||||
tcuPointerGetAttribute *cuPointerGetAttribute;
|
||||
tcuPointerSetAttribute *cuPointerSetAttribute;
|
||||
tcuStreamCreate *cuStreamCreate;
|
||||
tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
|
||||
tcuStreamGetPriority *cuStreamGetPriority;
|
||||
tcuStreamGetFlags *cuStreamGetFlags;
|
||||
tcuStreamWaitEvent *cuStreamWaitEvent;
|
||||
tcuStreamAddCallback *cuStreamAddCallback;
|
||||
tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
|
||||
tcuStreamQuery *cuStreamQuery;
|
||||
tcuStreamSynchronize *cuStreamSynchronize;
|
||||
tcuStreamDestroy_v2 *cuStreamDestroy_v2;
|
||||
tcuEventCreate *cuEventCreate;
|
||||
tcuEventRecord *cuEventRecord;
|
||||
tcuEventQuery *cuEventQuery;
|
||||
tcuEventSynchronize *cuEventSynchronize;
|
||||
tcuEventDestroy_v2 *cuEventDestroy_v2;
|
||||
tcuEventElapsedTime *cuEventElapsedTime;
|
||||
tcuFuncGetAttribute *cuFuncGetAttribute;
|
||||
tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
|
||||
tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
|
||||
tcuLaunchKernel *cuLaunchKernel;
|
||||
tcuFuncSetBlockShape *cuFuncSetBlockShape;
|
||||
tcuFuncSetSharedSize *cuFuncSetSharedSize;
|
||||
tcuParamSetSize *cuParamSetSize;
|
||||
tcuParamSeti *cuParamSeti;
|
||||
tcuParamSetf *cuParamSetf;
|
||||
tcuParamSetv *cuParamSetv;
|
||||
tcuLaunch *cuLaunch;
|
||||
tcuLaunchGrid *cuLaunchGrid;
|
||||
tcuLaunchGridAsync *cuLaunchGridAsync;
|
||||
tcuParamSetTexRef *cuParamSetTexRef;
|
||||
tcuTexRefSetArray *cuTexRefSetArray;
|
||||
tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
|
||||
tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
|
||||
tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
|
||||
tcuTexRefSetFormat *cuTexRefSetFormat;
|
||||
tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
|
||||
tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
|
||||
tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
|
||||
tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
|
||||
tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
|
||||
tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
|
||||
tcuTexRefSetFlags *cuTexRefSetFlags;
|
||||
tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
|
||||
tcuTexRefGetArray *cuTexRefGetArray;
|
||||
tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
|
||||
tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
|
||||
tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
|
||||
tcuTexRefGetFormat *cuTexRefGetFormat;
|
||||
tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
|
||||
tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
|
||||
tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
|
||||
tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
|
||||
tcuTexRefGetFlags *cuTexRefGetFlags;
|
||||
tcuTexRefCreate *cuTexRefCreate;
|
||||
tcuTexRefDestroy *cuTexRefDestroy;
|
||||
tcuSurfRefSetArray *cuSurfRefSetArray;
|
||||
tcuSurfRefGetArray *cuSurfRefGetArray;
|
||||
tcuTexObjectCreate *cuTexObjectCreate;
|
||||
tcuTexObjectDestroy *cuTexObjectDestroy;
|
||||
tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
|
||||
tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
|
||||
tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
|
||||
tcuSurfObjectCreate *cuSurfObjectCreate;
|
||||
tcuSurfObjectDestroy *cuSurfObjectDestroy;
|
||||
tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
|
||||
tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
|
||||
tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
|
||||
tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
|
||||
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
|
||||
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
|
||||
tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
|
||||
tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
|
||||
tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags;
|
||||
tcuGraphicsMapResources *cuGraphicsMapResources;
|
||||
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
|
||||
tcuGetExportTable *cuGetExportTable;
|
||||
|
||||
tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
|
||||
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
|
||||
tcuGLGetDevices *cuGLGetDevices;
|
||||
tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
|
||||
tcuGLInit *cuGLInit;
|
||||
tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
|
||||
tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
|
||||
tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
|
||||
tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
|
||||
tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
|
||||
tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
|
||||
tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
|
||||
|
||||
|
||||
static void cuewExit(void) {
|
||||
if(lib != NULL) {
|
||||
/* Ignore errors. */
|
||||
dynamic_library_close(lib);
|
||||
lib = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Implementation function. */
|
||||
int cuewInit(void) {
|
||||
/* Library paths. */
|
||||
#ifdef _WIN32
|
||||
/* Expected in c:/windows/system or similar, no path needed. */
|
||||
const char *path = "nvcuda.dll";
|
||||
#elif defined(__APPLE__)
|
||||
/* Default installation path. */
|
||||
const char *path = "/usr/local/cuda/lib/libcuda.dylib";
|
||||
#else
|
||||
const char *path = "libcuda.so";
|
||||
#endif
|
||||
static int initialized = 0;
|
||||
static int result = 0;
|
||||
int error, driver_version;
|
||||
|
||||
if (initialized) {
|
||||
return result;
|
||||
}
|
||||
|
||||
initialized = 1;
|
||||
|
||||
error = atexit(cuewExit);
|
||||
if (error) {
|
||||
result = CUEW_ERROR_ATEXIT_FAILED;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Load library. */
|
||||
lib = dynamic_library_open(path);
|
||||
|
||||
if (lib == NULL) {
|
||||
result = CUEW_ERROR_OPEN_FAILED;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Detect driver version. */
|
||||
driver_version = 1000;
|
||||
|
||||
CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
|
||||
if (cuDriverGetVersion) {
|
||||
cuDriverGetVersion(&driver_version);
|
||||
}
|
||||
|
||||
/* We require version 4.0. */
|
||||
if (driver_version < 4000) {
|
||||
result = CUEW_ERROR_OPEN_FAILED;
|
||||
return result;
|
||||
}
|
||||
/* Fetch all function pointers. */
|
||||
CUDA_LIBRARY_FIND(cuGetErrorString);
|
||||
CUDA_LIBRARY_FIND(cuGetErrorName);
|
||||
CUDA_LIBRARY_FIND(cuInit);
|
||||
CUDA_LIBRARY_FIND(cuDriverGetVersion);
|
||||
CUDA_LIBRARY_FIND(cuDeviceGet);
|
||||
CUDA_LIBRARY_FIND(cuDeviceGetCount);
|
||||
CUDA_LIBRARY_FIND(cuDeviceGetName);
|
||||
CUDA_LIBRARY_FIND(cuDeviceTotalMem_v2);
|
||||
CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
|
||||
CUDA_LIBRARY_FIND(cuDeviceGetProperties);
|
||||
CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
|
||||
CUDA_LIBRARY_FIND(cuCtxCreate_v2);
|
||||
CUDA_LIBRARY_FIND(cuCtxDestroy_v2);
|
||||
CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2);
|
||||
CUDA_LIBRARY_FIND(cuCtxPopCurrent_v2);
|
||||
CUDA_LIBRARY_FIND(cuCtxSetCurrent);
|
||||
CUDA_LIBRARY_FIND(cuCtxGetCurrent);
|
||||
CUDA_LIBRARY_FIND(cuCtxGetDevice);
|
||||
CUDA_LIBRARY_FIND(cuCtxSynchronize);
|
||||
CUDA_LIBRARY_FIND(cuCtxSetLimit);
|
||||
CUDA_LIBRARY_FIND(cuCtxGetLimit);
|
||||
CUDA_LIBRARY_FIND(cuCtxGetCacheConfig);
|
||||
CUDA_LIBRARY_FIND(cuCtxSetCacheConfig);
|
||||
CUDA_LIBRARY_FIND(cuCtxGetSharedMemConfig);
|
||||
CUDA_LIBRARY_FIND(cuCtxSetSharedMemConfig);
|
||||
CUDA_LIBRARY_FIND(cuCtxGetApiVersion);
|
||||
CUDA_LIBRARY_FIND(cuCtxGetStreamPriorityRange);
|
||||
CUDA_LIBRARY_FIND(cuCtxAttach);
|
||||
CUDA_LIBRARY_FIND(cuCtxDetach);
|
||||
CUDA_LIBRARY_FIND(cuModuleLoad);
|
||||
CUDA_LIBRARY_FIND(cuModuleLoadData);
|
||||
CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
|
||||
CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
|
||||
CUDA_LIBRARY_FIND(cuModuleUnload);
|
||||
CUDA_LIBRARY_FIND(cuModuleGetFunction);
|
||||
CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2);
|
||||
CUDA_LIBRARY_FIND(cuModuleGetTexRef);
|
||||
CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
|
||||
CUDA_LIBRARY_FIND(cuLinkCreate);
|
||||
CUDA_LIBRARY_FIND(cuLinkAddData);
|
||||
CUDA_LIBRARY_FIND(cuLinkAddFile);
|
||||
CUDA_LIBRARY_FIND(cuLinkComplete);
|
||||
CUDA_LIBRARY_FIND(cuLinkDestroy);
|
||||
CUDA_LIBRARY_FIND(cuMemGetInfo_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemAlloc_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemAllocPitch_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemFree_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemGetAddressRange_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemAllocHost_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemFreeHost);
|
||||
CUDA_LIBRARY_FIND(cuMemHostAlloc);
|
||||
CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemHostGetFlags);
|
||||
CUDA_LIBRARY_FIND(cuMemAllocManaged);
|
||||
CUDA_LIBRARY_FIND(cuDeviceGetByPCIBusId);
|
||||
CUDA_LIBRARY_FIND(cuDeviceGetPCIBusId);
|
||||
CUDA_LIBRARY_FIND(cuIpcGetEventHandle);
|
||||
CUDA_LIBRARY_FIND(cuIpcOpenEventHandle);
|
||||
CUDA_LIBRARY_FIND(cuIpcGetMemHandle);
|
||||
CUDA_LIBRARY_FIND(cuIpcOpenMemHandle);
|
||||
CUDA_LIBRARY_FIND(cuIpcCloseMemHandle);
|
||||
CUDA_LIBRARY_FIND(cuMemHostRegister);
|
||||
CUDA_LIBRARY_FIND(cuMemHostUnregister);
|
||||
CUDA_LIBRARY_FIND(cuMemcpy);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyPeer);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyHtoD_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyDtoH_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyDtoD_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyDtoA_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyAtoD_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyHtoA_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyAtoH_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyAtoA_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpy2D_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpy3D_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpy3DPeer);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyAsync);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyPeerAsync);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpy2DAsync_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpy3DAsync_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemcpy3DPeerAsync);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD8_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD16_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD32_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD2D8_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD2D16_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD2D32_v2);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD8Async);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD16Async);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD32Async);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD2D8Async);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD2D16Async);
|
||||
CUDA_LIBRARY_FIND(cuMemsetD2D32Async);
|
||||
CUDA_LIBRARY_FIND(cuArrayCreate_v2);
|
||||
CUDA_LIBRARY_FIND(cuArrayGetDescriptor_v2);
|
||||
CUDA_LIBRARY_FIND(cuArrayDestroy);
|
||||
CUDA_LIBRARY_FIND(cuArray3DCreate_v2);
|
||||
CUDA_LIBRARY_FIND(cuArray3DGetDescriptor_v2);
|
||||
CUDA_LIBRARY_FIND(cuMipmappedArrayCreate);
|
||||
CUDA_LIBRARY_FIND(cuMipmappedArrayGetLevel);
|
||||
CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy);
|
||||
CUDA_LIBRARY_FIND(cuPointerGetAttribute);
|
||||
CUDA_LIBRARY_FIND(cuPointerSetAttribute);
|
||||
CUDA_LIBRARY_FIND(cuStreamCreate);
|
||||
CUDA_LIBRARY_FIND(cuStreamCreateWithPriority);
|
||||
CUDA_LIBRARY_FIND(cuStreamGetPriority);
|
||||
CUDA_LIBRARY_FIND(cuStreamGetFlags);
|
||||
CUDA_LIBRARY_FIND(cuStreamWaitEvent);
|
||||
CUDA_LIBRARY_FIND(cuStreamAddCallback);
|
||||
CUDA_LIBRARY_FIND(cuStreamAttachMemAsync);
|
||||
CUDA_LIBRARY_FIND(cuStreamQuery);
|
||||
CUDA_LIBRARY_FIND(cuStreamSynchronize);
|
||||
CUDA_LIBRARY_FIND(cuStreamDestroy_v2);
|
||||
CUDA_LIBRARY_FIND(cuEventCreate);
|
||||
CUDA_LIBRARY_FIND(cuEventRecord);
|
||||
CUDA_LIBRARY_FIND(cuEventQuery);
|
||||
CUDA_LIBRARY_FIND(cuEventSynchronize);
|
||||
CUDA_LIBRARY_FIND(cuEventDestroy_v2);
|
||||
CUDA_LIBRARY_FIND(cuEventElapsedTime);
|
||||
CUDA_LIBRARY_FIND(cuFuncGetAttribute);
|
||||
CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
|
||||
CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig);
|
||||
CUDA_LIBRARY_FIND(cuLaunchKernel);
|
||||
CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
|
||||
CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
|
||||
CUDA_LIBRARY_FIND(cuParamSetSize);
|
||||
CUDA_LIBRARY_FIND(cuParamSeti);
|
||||
CUDA_LIBRARY_FIND(cuParamSetf);
|
||||
CUDA_LIBRARY_FIND(cuParamSetv);
|
||||
CUDA_LIBRARY_FIND(cuLaunch);
|
||||
CUDA_LIBRARY_FIND(cuLaunchGrid);
|
||||
CUDA_LIBRARY_FIND(cuLaunchGridAsync);
|
||||
CUDA_LIBRARY_FIND(cuParamSetTexRef);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetArray);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetAddress2D_v3);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetFormat);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetMipmapFilterMode);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelBias);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelClamp);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetMaxAnisotropy);
|
||||
CUDA_LIBRARY_FIND(cuTexRefSetFlags);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetAddress_v2);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetArray);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetMipmappedArray);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetFormat);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetMipmapFilterMode);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelBias);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelClamp);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetMaxAnisotropy);
|
||||
CUDA_LIBRARY_FIND(cuTexRefGetFlags);
|
||||
CUDA_LIBRARY_FIND(cuTexRefCreate);
|
||||
CUDA_LIBRARY_FIND(cuTexRefDestroy);
|
||||
CUDA_LIBRARY_FIND(cuSurfRefSetArray);
|
||||
CUDA_LIBRARY_FIND(cuSurfRefGetArray);
|
||||
CUDA_LIBRARY_FIND(cuTexObjectCreate);
|
||||
CUDA_LIBRARY_FIND(cuTexObjectDestroy);
|
||||
CUDA_LIBRARY_FIND(cuTexObjectGetResourceDesc);
|
||||
CUDA_LIBRARY_FIND(cuTexObjectGetTextureDesc);
|
||||
CUDA_LIBRARY_FIND(cuTexObjectGetResourceViewDesc);
|
||||
CUDA_LIBRARY_FIND(cuSurfObjectCreate);
|
||||
CUDA_LIBRARY_FIND(cuSurfObjectDestroy);
|
||||
CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc);
|
||||
CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer);
|
||||
CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess);
|
||||
CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess);
|
||||
CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
|
||||
CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
|
||||
CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
|
||||
CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2);
|
||||
CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags);
|
||||
CUDA_LIBRARY_FIND(cuGraphicsMapResources);
|
||||
CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
|
||||
CUDA_LIBRARY_FIND(cuGetExportTable);
|
||||
|
||||
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
|
||||
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
|
||||
CUDA_LIBRARY_FIND(cuGLGetDevices);
|
||||
CUDA_LIBRARY_FIND(cuGLCtxCreate_v2);
|
||||
CUDA_LIBRARY_FIND(cuGLInit);
|
||||
CUDA_LIBRARY_FIND(cuGLRegisterBufferObject);
|
||||
CUDA_LIBRARY_FIND(cuGLMapBufferObject_v2);
|
||||
CUDA_LIBRARY_FIND(cuGLUnmapBufferObject);
|
||||
CUDA_LIBRARY_FIND(cuGLUnregisterBufferObject);
|
||||
CUDA_LIBRARY_FIND(cuGLSetBufferObjectMapFlags);
|
||||
CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2);
|
||||
CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
|
||||
|
||||
|
||||
result = CUEW_SUCCESS;
|
||||
return result;
|
||||
}
|
||||
|
||||
const char *cuewErrorString(CUresult result) {
|
||||
switch(result) {
|
||||
case CUDA_SUCCESS: return "No errors";
|
||||
case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
|
||||
case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
|
||||
case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
|
||||
case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
|
||||
case CUDA_ERROR_PROFILER_DISABLED: return "PROFILER_DISABLED";
|
||||
case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "PROFILER_NOT_INITIALIZED";
|
||||
case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "PROFILER_ALREADY_STARTED";
|
||||
case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "PROFILER_ALREADY_STOPPED";
|
||||
case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
|
||||
case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
|
||||
case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
|
||||
case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
|
||||
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current";
|
||||
case CUDA_ERROR_MAP_FAILED: return "Map failed";
|
||||
case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
|
||||
case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
|
||||
case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
|
||||
case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
|
||||
case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
|
||||
case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
|
||||
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array";
|
||||
case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
|
||||
case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
|
||||
case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
|
||||
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "CONTEXT_ALREADY_IN_USE";
|
||||
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "PEER_ACCESS_UNSUPPORTED";
|
||||
case CUDA_ERROR_INVALID_PTX: return "INVALID_PTX";
|
||||
case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
|
||||
case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
|
||||
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
|
||||
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
|
||||
case CUDA_ERROR_OPERATING_SYSTEM: return "OPERATING_SYSTEM";
|
||||
case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
|
||||
case CUDA_ERROR_NOT_FOUND: return "Not found";
|
||||
case CUDA_ERROR_NOT_READY: return "CUDA not ready";
|
||||
case CUDA_ERROR_ILLEGAL_ADDRESS: return "ILLEGAL_ADDRESS";
|
||||
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
|
||||
case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
|
||||
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
|
||||
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "PEER_ACCESS_ALREADY_ENABLED";
|
||||
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "PEER_ACCESS_NOT_ENABLED";
|
||||
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "PRIMARY_CONTEXT_ACTIVE";
|
||||
case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "CONTEXT_IS_DESTROYED";
|
||||
case CUDA_ERROR_ASSERT: return "ASSERT";
|
||||
case CUDA_ERROR_TOO_MANY_PEERS: return "TOO_MANY_PEERS";
|
||||
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "HOST_MEMORY_ALREADY_REGISTERED";
|
||||
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "HOST_MEMORY_NOT_REGISTERED";
|
||||
case CUDA_ERROR_HARDWARE_STACK_ERROR: return "HARDWARE_STACK_ERROR";
|
||||
case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "ILLEGAL_INSTRUCTION";
|
||||
case CUDA_ERROR_MISALIGNED_ADDRESS: return "MISALIGNED_ADDRESS";
|
||||
case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "INVALID_ADDRESS_SPACE";
|
||||
case CUDA_ERROR_INVALID_PC: return "INVALID_PC";
|
||||
case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
|
||||
case CUDA_ERROR_NOT_PERMITTED: return "NOT_PERMITTED";
|
||||
case CUDA_ERROR_NOT_SUPPORTED: return "NOT_SUPPORTED";
|
||||
case CUDA_ERROR_UNKNOWN: return "Unknown error";
|
||||
default: return "Unknown CUDA error value";
|
||||
}
|
||||
}
|
||||
|
||||
static void path_join(const char *path1,
|
||||
const char *path2,
|
||||
int maxlen,
|
||||
char *result) {
|
||||
#if defined(WIN32) || defined(_WIN32)
|
||||
const char separator = '\\';
|
||||
#else
|
||||
const char separator = '/';
|
||||
#endif
|
||||
int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
|
||||
if (n != -1 && n < maxlen) {
|
||||
result[n] = '\0';
|
||||
}
|
||||
else {
|
||||
result[maxlen - 1] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
static int path_exists(const char *path) {
|
||||
struct stat st;
|
||||
if (stat(path, &st)) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *cuewCompilerPath(void) {
|
||||
#ifdef _WIN32
|
||||
const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
|
||||
const char *executable = "nvcc.exe";
|
||||
#else
|
||||
const char *defaultpaths[] = {
|
||||
"/Developer/NVIDIA/CUDA-5.0/bin",
|
||||
"/usr/local/cuda-5.0/bin",
|
||||
"/usr/local/cuda/bin",
|
||||
"/Developer/NVIDIA/CUDA-6.0/bin",
|
||||
"/usr/local/cuda-6.0/bin",
|
||||
"/Developer/NVIDIA/CUDA-5.5/bin",
|
||||
"/usr/local/cuda-5.5/bin",
|
||||
NULL};
|
||||
const char *executable = "nvcc";
|
||||
#endif
|
||||
int i;
|
||||
|
||||
const char *binpath = getenv("CUDA_BIN_PATH");
|
||||
|
||||
static char nvcc[65536];
|
||||
|
||||
if (binpath) {
|
||||
path_join(binpath, executable, sizeof(nvcc), nvcc);
|
||||
if (path_exists(nvcc))
|
||||
return nvcc;
|
||||
}
|
||||
|
||||
for (i = 0; defaultpaths[i]; ++i) {
|
||||
path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
|
||||
if (path_exists(nvcc))
|
||||
return nvcc;
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
{
|
||||
FILE *handle = popen("which nvcc", "r");
|
||||
if (handle) {
|
||||
char buffer[4096] = {0};
|
||||
int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
|
||||
buffer[len] = '\0';
|
||||
pclose(handle);
|
||||
|
||||
if (buffer[0])
|
||||
return "nvcc";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int cuewCompilerVersion(void) {
|
||||
const char *path = cuewCompilerPath();
|
||||
const char *marker = "Cuda compilation tools, release ";
|
||||
FILE *pipe;
|
||||
int major, minor;
|
||||
char *versionstr;
|
||||
char buf[128];
|
||||
char output[65536] = "\0";
|
||||
char command[65536] = "\0";
|
||||
|
||||
if (path == NULL)
|
||||
return 0;
|
||||
|
||||
/* get --version output */
|
||||
strncpy(command, path, sizeof(command));
|
||||
strncat(command, " --version", sizeof(command) - strlen(path));
|
||||
pipe = popen(command, "r");
|
||||
if (!pipe) {
|
||||
fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (!feof(pipe)) {
|
||||
if (fgets(buf, sizeof(buf), pipe) != NULL) {
|
||||
strncat(output, buf, sizeof(output) - strlen(output));
|
||||
}
|
||||
}
|
||||
|
||||
pclose(pipe);
|
||||
|
||||
/* parse version number */
|
||||
versionstr = strstr(output, marker);
|
||||
if (versionstr == NULL) {
|
||||
fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output);
|
||||
return 0;
|
||||
}
|
||||
versionstr += strlen(marker);
|
||||
|
||||
if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
|
||||
fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 10 * major + minor;
|
||||
}
|
||||
|
Reference in New Issue
Block a user