summaryrefslogtreecommitdiffstats
path: root/Tests/CudaOnly
diff options
context:
space:
mode:
Diffstat (limited to 'Tests/CudaOnly')
-rw-r--r--Tests/CudaOnly/CMakeLists.txt12
-rw-r--r--Tests/CudaOnly/EnableStandard/CMakeLists.txt26
-rw-r--r--Tests/CudaOnly/EnableStandard/main.cu23
-rw-r--r--Tests/CudaOnly/EnableStandard/shared.cu15
-rw-r--r--Tests/CudaOnly/EnableStandard/static.cu9
-rw-r--r--Tests/CudaOnly/ExportPTX/CMakeLists.txt81
-rw-r--r--Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake19
-rw-r--r--Tests/CudaOnly/ExportPTX/kernelA.cu7
-rw-r--r--Tests/CudaOnly/ExportPTX/kernelB.cu8
-rw-r--r--Tests/CudaOnly/ExportPTX/main.cu28
-rw-r--r--Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt23
-rw-r--r--Tests/CudaOnly/GPUDebugFlag/main.cu71
-rw-r--r--Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt15
-rw-r--r--Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu77
-rw-r--r--Tests/CudaOnly/PDB/CMakeLists.txt19
-rw-r--r--Tests/CudaOnly/PDB/check_pdbs.cmake10
-rw-r--r--Tests/CudaOnly/PDB/main.cu4
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt55
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file1.cu10
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file1.h7
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file2.cu25
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file2.h10
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/main.cu76
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake14
-rw-r--r--Tests/CudaOnly/SeparateCompilation/CMakeLists.txt61
-rw-r--r--Tests/CudaOnly/SeparateCompilation/file1.cu10
-rw-r--r--Tests/CudaOnly/SeparateCompilation/file1.h7
-rw-r--r--Tests/CudaOnly/SeparateCompilation/file2.cu16
-rw-r--r--Tests/CudaOnly/SeparateCompilation/file2.h10
-rw-r--r--Tests/CudaOnly/SeparateCompilation/file3.cu22
-rw-r--r--Tests/CudaOnly/SeparateCompilation/file4.cu23
-rw-r--r--Tests/CudaOnly/SeparateCompilation/file5.cu23
-rw-r--r--Tests/CudaOnly/SeparateCompilation/main.cu68
-rw-r--r--Tests/CudaOnly/WithDefs/CMakeLists.txt50
-rw-r--r--Tests/CudaOnly/WithDefs/inc_cuda/inc_cuda.h1
-rw-r--r--Tests/CudaOnly/WithDefs/main.notcu86
36 files changed, 1021 insertions, 0 deletions
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
new file mode 100644
index 0000000..59f3e84
--- /dev/null
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -0,0 +1,12 @@
+
+ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
+ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
+ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
+ADD_TEST_MACRO(CudaOnly.LinkSystemDeviceLibraries CudaOnlyLinkSystemDeviceLibraries)
+ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
+ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
+ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
+
+if(MSVC)
+ ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB)
+endif()
diff --git a/Tests/CudaOnly/EnableStandard/CMakeLists.txt b/Tests/CudaOnly/EnableStandard/CMakeLists.txt
new file mode 100644
index 0000000..35a1deb
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/CMakeLists.txt
@@ -0,0 +1,26 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyEnableStandard CUDA)
+
+#Goal for this example:
+#build cuda sources that require C++11 to be enabled.
+
+add_library(CUDAStatic11 STATIC static.cu)
+add_library(CUDADynamic11 SHARED shared.cu)
+
+add_executable(CudaOnlyEnableStandard main.cu)
+target_link_libraries(CudaOnlyEnableStandard PRIVATE CUDAStatic11 CUDADynamic11)
+
+set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD 11)
+set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD_REQUIRED TRUE)
+
+#Verify CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES
+foreach(dir ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+ if(NOT IS_DIRECTORY "${dir}")
+ message(FATAL_ERROR
+ "CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES entry\n"
+ " ${dir}\n"
+ "is not an existing directory."
+ )
+ endif()
+endforeach()
diff --git a/Tests/CudaOnly/EnableStandard/main.cu b/Tests/CudaOnly/EnableStandard/main.cu
new file mode 100644
index 0000000..740c832
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/main.cu
@@ -0,0 +1,23 @@
+
+#include <iostream>
+
+#ifdef _WIN32
+# define IMPORT __declspec(dllimport)
+#else
+# define IMPORT
+#endif
+
+int static_cuda11_func(int);
+IMPORT int shared_cuda11_func(int);
+
+void test_functions()
+{
+ static_cuda11_func(int(42));
+ shared_cuda11_func(int(42));
+}
+
+int main(int argc, char** argv)
+{
+ test_functions();
+ return 0;
+}
diff --git a/Tests/CudaOnly/EnableStandard/shared.cu b/Tests/CudaOnly/EnableStandard/shared.cu
new file mode 100644
index 0000000..004cb83
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/shared.cu
@@ -0,0 +1,15 @@
+
+#include <type_traits>
+
+#ifdef _WIN32
+# define EXPORT __declspec(dllexport)
+#else
+# define EXPORT
+#endif
+
+using tt = std::true_type;
+using ft = std::false_type;
+EXPORT int __host__ shared_cuda11_func(int x)
+{
+ return x * x + std::integral_constant<int, 17>::value;
+}
diff --git a/Tests/CudaOnly/EnableStandard/static.cu b/Tests/CudaOnly/EnableStandard/static.cu
new file mode 100644
index 0000000..73e43a8
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/static.cu
@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ static_cuda11_func(int x)
+{
+ return x * x + std::integral_constant<int, 17>::value;
+}
diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
new file mode 100644
index 0000000..65d5243
--- /dev/null
+++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
@@ -0,0 +1,81 @@
+cmake_minimum_required(VERSION 3.8)
+project (CudaOnlyExportPTX CUDA)
+
+#Goal for this example:
+# How to generate PTX files instead of OBJECT files
+# How to reference PTX files for custom commands
+# How to install PTX files
+
+add_library(CudaPTX OBJECT kernelA.cu kernelB.cu)
+set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON)
+
+#Test ObjectFiles with file(GENERATE)
+file(GENERATE
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/gen_$<LOWER_CASE:$<CONFIG>/>path_to_objs.h
+ CONTENT [[
+
+#include <vector>
+#include <string>
+
+#ifndef path_to_objs
+#define path_to_objs
+
+static std::string ptx_paths = "$<TARGET_OBJECTS:CudaPTX>";
+
+#endif
+
+]]
+)
+#We are going to need a wrapper around bin2c for multiple reasons
+# 1. bin2c only converts a single file at a time
+# 2. bin2c has only standard out support, so we have to manually
+# redirect to a cmake buffer
+# 3. We want to pack everything into a single output file, so we
+# need to also pass the --name option
+set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h)
+
+get_filename_component(cuda_compiler_bin "${CMAKE_CUDA_COMPILER}" DIRECTORY)
+find_program(bin_to_c
+ NAMES bin2c
+ PATHS ${cuda_compiler_bin}
+ )
+if(NOT bin_to_c)
+ message(FATAL_ERROR
+ "bin2c not found:\n"
+ " CMAKE_CUDA_COMPILER='${CMAKE_CUDA_COMPILER}'\n"
+ " cuda_compiler_bin='${cuda_compiler_bin}'\n"
+ )
+endif()
+
+add_custom_command(
+ OUTPUT "${output_file}"
+ COMMAND ${CMAKE_COMMAND}
+ "-DBIN_TO_C_COMMAND=${bin_to_c}"
+ "-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>"
+ "-DOUTPUT=${output_file}"
+ -P ${CMAKE_CURRENT_SOURCE_DIR}/bin2c_wrapper.cmake
+ VERBATIM
+ DEPENDS $<TARGET_OBJECTS:CudaPTX>
+ COMMENT "Converting Object files to a C header"
+ )
+
+add_executable(CudaOnlyExportPTX main.cu ${output_file})
+add_dependencies(CudaOnlyExportPTX CudaPTX)
+target_include_directories(CudaOnlyExportPTX PRIVATE
+ ${CMAKE_CURRENT_BINARY_DIR} )
+target_compile_definitions(CudaOnlyExportPTX PRIVATE
+ "CONFIG_TYPE=gen_$<LOWER_CASE:$<CONFIG>>")
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlyExportPTX PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
+
+#Verify that we can install object targets properly
+install(TARGETS CudaPTX CudaOnlyExportPTX
+ EXPORT cudaPTX
+ RUNTIME DESTINATION bin
+ LIBRARY DESTINATION lib
+ OBJECTS DESTINATION objs
+ )
+install(EXPORT cudaPTX DESTINATION lib/cudaPTX)
diff --git a/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake b/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake
new file mode 100644
index 0000000..0baf934
--- /dev/null
+++ b/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake
@@ -0,0 +1,19 @@
+
+set(file_contents)
+foreach(obj ${OBJECTS})
+ get_filename_component(obj_ext ${obj} EXT)
+ get_filename_component(obj_name ${obj} NAME_WE)
+ get_filename_component(obj_dir ${obj} DIRECTORY)
+
+ if(obj_ext MATCHES ".ptx")
+ set(args --name ${obj_name} ${obj})
+ execute_process(COMMAND "${BIN_TO_C_COMMAND}" ${args}
+ WORKING_DIRECTORY ${obj_dir}
+ RESULT_VARIABLE result
+ OUTPUT_VARIABLE output
+ ERROR_VARIABLE error_var
+ )
+ set(file_contents "${file_contents} \n${output}")
+ endif()
+endforeach()
+file(WRITE "${OUTPUT}" "${file_contents}")
diff --git a/Tests/CudaOnly/ExportPTX/kernelA.cu b/Tests/CudaOnly/ExportPTX/kernelA.cu
new file mode 100644
index 0000000..fbe0d26
--- /dev/null
+++ b/Tests/CudaOnly/ExportPTX/kernelA.cu
@@ -0,0 +1,7 @@
+
+__global__ void kernelA(float* r, float* x, float* y, float* z, int size)
+{
+ for (int i = threadIdx.x; i < size; i += blockDim.x) {
+ r[i] = x[i] * y[i] + z[i];
+ }
+}
diff --git a/Tests/CudaOnly/ExportPTX/kernelB.cu b/Tests/CudaOnly/ExportPTX/kernelB.cu
new file mode 100644
index 0000000..11872e4
--- /dev/null
+++ b/Tests/CudaOnly/ExportPTX/kernelB.cu
@@ -0,0 +1,8 @@
+
+
+__global__ void kernelB(float* r, float* x, float* y, float* z, int size)
+{
+ for (int i = threadIdx.x; i < size; i += blockDim.x) {
+ r[i] = x[i] * y[i] + z[i];
+ }
+}
diff --git a/Tests/CudaOnly/ExportPTX/main.cu b/Tests/CudaOnly/ExportPTX/main.cu
new file mode 100644
index 0000000..132377c
--- /dev/null
+++ b/Tests/CudaOnly/ExportPTX/main.cu
@@ -0,0 +1,28 @@
+
+#include <iostream>
+
+/*
+ Define GENERATED_HEADER macro to allow c++ files to include headers
+ generated based on different configuration types.
+*/
+
+/* clang-format off */
+#define GENERATED_HEADER(x) GENERATED_HEADER0(CONFIG_TYPE/x)
+/* clang-format on */
+#define GENERATED_HEADER0(x) GENERATED_HEADER1(x)
+#define GENERATED_HEADER1(x) <x>
+
+#include GENERATED_HEADER(path_to_objs.h)
+
+#include "embedded_objs.h"
+
+int main(int argc, char** argv)
+{
+ (void)argc;
+ (void)argv;
+
+ unsigned char* ka = kernelA;
+ unsigned char* kb = kernelB;
+
+ return (ka != NULL && kb != NULL) ? 0 : 1;
+}
diff --git a/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt b/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt
new file mode 100644
index 0000000..5b96906
--- /dev/null
+++ b/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt
@@ -0,0 +1,23 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlGPUDebugFlag CUDA)
+
+#Goal for this example:
+#verify that -G enables gpu debug flags
+string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_30,code=compute_30")
+string(APPEND CMAKE_CUDA_FLAGS " -G")
+set(CMAKE_CUDA_STANDARD 11)
+
+add_executable(CudaOnlyGPUDebugFlag main.cu)
+
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9.0.0)
+ #CUDA's __CUDACC_DEBUG__ define was added in 9.0
+ #so if we are below 9.0.0 we will manually add the define so that the test
+ #passes
+ target_compile_definitions(CudaOnlyGPUDebugFlag PRIVATE "__CUDACC_DEBUG__")
+endif()
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlyGPUDebugFlag PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/GPUDebugFlag/main.cu b/Tests/CudaOnly/GPUDebugFlag/main.cu
new file mode 100644
index 0000000..1f3fc12
--- /dev/null
+++ b/Tests/CudaOnly/GPUDebugFlag/main.cu
@@ -0,0 +1,71 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+static __global__ void debug_kernel(bool* has_debug)
+{
+// Verify using the return code if we have GPU debug flag enabled
+#if defined(__CUDACC__) && defined(__CUDACC_DEBUG__)
+ *has_debug = true;
+#else
+ *has_debug = false;
+#endif
+}
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ if (prop.major >= 3) {
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+ << std::endl;
+
+ return 1;
+}
+
+int main(int argc, char** argv)
+{
+ bool* has_debug;
+ cudaError_t err = cudaMallocManaged(&has_debug, sizeof(bool));
+ if (err != cudaSuccess) {
+ std::cerr << "cudaMallocManaged failed:\n"
+ << " " << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
+ debug_kernel<<<1, 1>>>(has_debug);
+ err = cudaDeviceSynchronize();
+ if (err != cudaSuccess) {
+ std::cerr << "debug_kernel: kernel launch shouldn't have failed\n"
+ << "reason:\t" << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+ if (*has_debug == false) {
+ std::cerr << "debug_kernel: kernel not compiled with device debug"
+ << std::endl;
+ return 1;
+ }
+ return 0;
+}
diff --git a/Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt b/Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt
new file mode 100644
index 0000000..62be1e6
--- /dev/null
+++ b/Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required(VERSION 3.8)
+project(CudaOnlyLinkSystemDeviceLibraries CUDA)
+
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_35,code=compute_35 -gencode arch=compute_35,code=sm_35")
+set(CMAKE_CUDA_STANDARD 11)
+
+add_executable(CudaOnlyLinkSystemDeviceLibraries main.cu)
+set_target_properties( CudaOnlyLinkSystemDeviceLibraries
+ PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+target_link_libraries( CudaOnlyLinkSystemDeviceLibraries PRIVATE cublas_device)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlyLinkSystemDeviceLibraries PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu b/Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu
new file mode 100644
index 0000000..7eecec1
--- /dev/null
+++ b/Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu
@@ -0,0 +1,77 @@
+
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+__global__ void deviceCublasSgemm(int n, float alpha, float beta,
+ const float* d_A, const float* d_B,
+ float* d_C)
+{
+ cublasHandle_t cnpHandle;
+ cublasStatus_t status = cublasCreate(&cnpHandle);
+
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ return;
+ }
+
+ // Call function defined in the cublas_device system static library.
+ // This way we can verify that we properly pass system libraries to the
+ // device link line
+ status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha,
+ d_A, n, d_B, n, &beta, d_C, n);
+
+ cublasDestroy(cnpHandle);
+}
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+
+ if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) {
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card supporting compute >=3.5"
+ << std::endl;
+ return 1;
+}
+
+int main(int argc, char** argv)
+{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ // initial values that will make sure that the cublasSgemm won't actually
+ // do any work
+ int n = 0;
+ float alpha = 1;
+ float beta = 1;
+ float* d_A = nullptr;
+ float* d_B = nullptr;
+ float* d_C = nullptr;
+ deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C);
+
+ return 0;
+}
diff --git a/Tests/CudaOnly/PDB/CMakeLists.txt b/Tests/CudaOnly/PDB/CMakeLists.txt
new file mode 100644
index 0000000..34e1e5c
--- /dev/null
+++ b/Tests/CudaOnly/PDB/CMakeLists.txt
@@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 3.11)
+project (CudaOnlyPDB CUDA)
+
+add_executable(CudaOnlyPDB main.cu)
+set_target_properties(CudaOnlyPDB PROPERTIES
+ PDB_NAME LinkPDBName
+ PDB_OUTPUT_DIRECTORY LinkPDBDir
+ COMPILE_PDB_NAME CompPDBName
+ COMPILE_PDB_OUTPUT_DIRECTORY CompPDBDir
+ )
+
+set(pdbs
+ ${CMAKE_CURRENT_BINARY_DIR}/CompPDBDir/${CMAKE_CFG_INTDIR}/CompPDBName.pdb
+ ${CMAKE_CURRENT_BINARY_DIR}/LinkPDBDir/${CMAKE_CFG_INTDIR}/LinkPDBName.pdb
+ )
+add_custom_command(TARGET CudaOnlyPDB POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -Dconfig=$<CONFIG> "-Dpdbs=${pdbs}"
+ -P ${CMAKE_CURRENT_SOURCE_DIR}/check_pdbs.cmake
+ )
diff --git a/Tests/CudaOnly/PDB/check_pdbs.cmake b/Tests/CudaOnly/PDB/check_pdbs.cmake
new file mode 100644
index 0000000..5e01ca7
--- /dev/null
+++ b/Tests/CudaOnly/PDB/check_pdbs.cmake
@@ -0,0 +1,10 @@
+if(NOT "${config}" MATCHES "[Dd][Ee][Bb]")
+ return()
+endif()
+foreach(pdb ${pdbs})
+ if(EXISTS "${pdb}")
+ message(STATUS "PDB Exists: ${pdb}")
+ else()
+ message(SEND_ERROR "PDB MISSING:\n ${pdb}")
+ endif()
+endforeach()
diff --git a/Tests/CudaOnly/PDB/main.cu b/Tests/CudaOnly/PDB/main.cu
new file mode 100644
index 0000000..f8b643a
--- /dev/null
+++ b/Tests/CudaOnly/PDB/main.cu
@@ -0,0 +1,4 @@
+int main()
+{
+ return 0;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
new file mode 100644
index 0000000..0c453a9
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
@@ -0,0 +1,55 @@
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyResolveDeviceSymbols CUDA)
+
+# Find nm and dumpbin
+if(CMAKE_NM)
+ set(dump_command ${CMAKE_NM})
+ set(dump_args -g)
+else()
+ include(GetPrerequisites)
+ message(STATUS "calling list_prerequisites to find dumpbin")
+ list_prerequisites("${CMAKE_COMMAND}" 0 0 0)
+ if(gp_dumpbin)
+ set(dump_command ${gp_dumpbin})
+ set(dump_args /ARCHIVEMEMBERS)
+ endif()
+endif()
+
+#Goal for this example:
+# Build a static library that defines multiple methods and kernels that
+# use each other.
+# Resolve the device symbols into that static library
+# Verify that we can't use those device symbols from anything that links
+# to the static library
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+
+add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu)
+set_target_properties(CUDAResolveDeviceLib
+ PROPERTIES
+ CUDA_SEPARABLE_COMPILATION ON
+ CUDA_RESOLVE_DEVICE_SYMBOLS ON
+ POSITION_INDEPENDENT_CODE ON)
+
+if(dump_command)
+add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD
+ COMMAND ${CMAKE_COMMAND}
+ -DDUMP_COMMAND=${dump_command}
+ -DDUMP_ARGS=${dump_args}
+ -DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDAResolveDeviceLib>
+ -P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake
+ )
+endif()
+
+add_executable(CudaOnlyResolveDeviceSymbols main.cu)
+set_target_properties(CudaOnlyResolveDeviceSymbols
+ PROPERTIES
+ CUDA_SEPARABLE_COMPILATION ON)
+
+target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlyResolveDeviceSymbols PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu
new file mode 100644
index 0000000..1ce63bf
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu
@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+ result_type r;
+ r.input = x;
+ r.sum = x * x;
+ return r;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
new file mode 100644
index 0000000..ff1945c
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+ int input;
+ int sum;
+};
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
new file mode 100644
index 0000000..278fd6c
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
@@ -0,0 +1,25 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+ const result_type r = file1_func(x);
+ const result_type_dynamic rd{ r.input, r.sum, true };
+ return rd;
+}
+
+static __global__ void file2_kernel(result_type_dynamic& r, int x)
+{
+ // call static_func which is a method that is defined in the
+ // static library that is always out of date
+ r = file2_func(x);
+}
+
+int file2_launch_kernel(int x)
+{
+ result_type_dynamic r;
+ file2_kernel<<<1, 1>>>(r, x);
+ return r.sum;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
new file mode 100644
index 0000000..d2dbaa4
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+ int input;
+ int sum;
+ bool from_static;
+};
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
new file mode 100644
index 0000000..d464f96
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
@@ -0,0 +1,76 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+int file2_launch_kernel(int x);
+
+result_type_dynamic __device__ file2_func(int x);
+static __global__ void main_kernel(result_type_dynamic& r, int x)
+{
+ // call function that was not device linked to us, this will cause
+ // a runtime failure of "invalid device function"
+ r = file2_func(x);
+}
+
+int main_launch_kernel(int x)
+{
+ result_type_dynamic r;
+ main_kernel<<<1, 1>>>(r, x);
+ return r.sum;
+}
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ std::cout << "prop.major: " << prop.major << std::endl;
+ if (prop.major >= 3) {
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+ << std::endl;
+
+ return 1;
+}
+
+int main(int argc, char** argv)
+{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ main_launch_kernel(1);
+ cudaError_t err = cudaGetLastError();
+ if (err == cudaSuccess) {
+ // This kernel launch should fail as the file2_func was device linked
+ // into the static library and is not usable by the executable
+ std::cerr << "main_launch_kernel: kernel launch should have failed"
+ << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake
new file mode 100644
index 0000000..94d388b
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake
@@ -0,0 +1,14 @@
+execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH}
+ RESULT_VARIABLE RESULT
+ OUTPUT_VARIABLE OUTPUT
+ ERROR_VARIABLE ERROR
+)
+
+if(NOT "${RESULT}" STREQUAL "0")
+ message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]")
+endif()
+
+if(NOT "${OUTPUT}" MATCHES "(cmake_device_link|device-link)")
+ message(FATAL_ERROR
+ "No cuda device objects found, device linking did not occur")
+endif()
diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
new file mode 100644
index 0000000..c934c51
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
@@ -0,0 +1,61 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlySeparateCompilation CUDA)
+
+#Goal for this example:
+#Build a static library that defines multiple methods and kernels that
+#use each other.
+#After that confirm that we can call those methods from dynamic libraries
+#and executables.
+#We complicate the matter by also testing that multiple static libraries
+#all containing cuda separable compilation code links properly
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=\\\"compute_30,sm_30,sm_35\\\"")
+string(APPEND CMAKE_CUDA_FLAGS " --generate-code=arch=compute_50,code=[compute_50,sm_50,sm_52]")
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+add_library(CUDASeparateLibA STATIC file1.cu file2.cu file3.cu)
+get_property(sep_comp TARGET CUDASeparateLibA PROPERTY CUDA_SEPARABLE_COMPILATION)
+if(NOT sep_comp)
+ message(FATAL_ERROR "CUDA_SEPARABLE_COMPILATION not initialized")
+endif()
+unset(CMAKE_CUDA_SEPARABLE_COMPILATION)
+
+if(CMAKE_CUDA_SIMULATE_ID STREQUAL "MSVC")
+ # Test adding a flag that is not in our CUDA flag table for VS.
+ if(NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 8)
+ string(APPEND CMAKE_CUDA_FLAGS " --ftemplate-depth 50")
+ endif()
+ # Test adding a flag that nvcc should pass to the host compiler.
+ target_compile_options(CUDASeparateLibA PRIVATE -Xcompiler=-bigobj)
+endif()
+
+#Having file4/file5 in a shared library causes serious problems
+#with the nvcc linker and it will generate bad entries that will
+#cause a segv when trying to run the executable
+#
+add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
+target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
+
+add_executable(CudaOnlySeparateCompilation main.cu)
+target_link_libraries(CudaOnlySeparateCompilation
+ PRIVATE CUDASeparateLibB)
+
+set_target_properties(CUDASeparateLibA
+ CUDASeparateLibB
+ PROPERTIES CUDA_SEPARABLE_COMPILATION ON
+ POSITION_INDEPENDENT_CODE ON)
+
+if (CMAKE_GENERATOR MATCHES "^Visual Studio")
+ #Visual Studio CUDA integration will not perform device linking
+ #on a target that itself does not have GenerateRelocatableDeviceCode
+ #enabled.
+ set_target_properties(CudaOnlySeparateCompilation
+ PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+endif()
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/SeparateCompilation/file1.cu b/Tests/CudaOnly/SeparateCompilation/file1.cu
new file mode 100644
index 0000000..1ce63bf
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file1.cu
@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+ result_type r;
+ r.input = x;
+ r.sum = x * x;
+ return r;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file1.h b/Tests/CudaOnly/SeparateCompilation/file1.h
new file mode 100644
index 0000000..ff1945c
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file1.h
@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+ int input;
+ int sum;
+};
diff --git a/Tests/CudaOnly/SeparateCompilation/file2.cu b/Tests/CudaOnly/SeparateCompilation/file2.cu
new file mode 100644
index 0000000..74f3558
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file2.cu
@@ -0,0 +1,16 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+ if (x != 42) {
+ const result_type r = file1_func(x);
+ const result_type_dynamic rd{ r.input, r.sum, true };
+ return rd;
+ } else {
+ const result_type_dynamic rd{ x, x * x * x, false };
+ return rd;
+ }
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file2.h b/Tests/CudaOnly/SeparateCompilation/file2.h
new file mode 100644
index 0000000..d2dbaa4
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file2.h
@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+ int input;
+ int sum;
+ bool from_static;
+};
diff --git a/Tests/CudaOnly/SeparateCompilation/file3.cu b/Tests/CudaOnly/SeparateCompilation/file3.cu
new file mode 100644
index 0000000..155b513
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file3.cu
@@ -0,0 +1,22 @@
+
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static __global__ void file3_kernel(result_type& r, int x)
+{
+ // call static_func which is a method that is defined in the
+ // static library that is always out of date
+ r = file1_func(x);
+ result_type_dynamic rd = file2_func(x);
+}
+
+result_type file3_launch_kernel(int x)
+{
+ result_type r;
+ file3_kernel<<<1, 1>>>(r, x);
+ return r;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file4.cu b/Tests/CudaOnly/SeparateCompilation/file4.cu
new file mode 100644
index 0000000..2e3e01e
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file4.cu
@@ -0,0 +1,23 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static __global__ void file4_kernel(result_type& r, int x)
+{
+ // call static_func which is a method that is defined in the
+ // static library that is always out of date
+ r = file1_func(x);
+ result_type_dynamic rd = file2_func(x);
+}
+
+int file4_launch_kernel(int x)
+{
+ result_type r;
+ file4_kernel<<<1, 1>>>(r, x);
+ return r.sum;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file5.cu b/Tests/CudaOnly/SeparateCompilation/file5.cu
new file mode 100644
index 0000000..fee8e9e
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file5.cu
@@ -0,0 +1,23 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static __global__ void file5_kernel(result_type& r, int x)
+{
+ // call static_func which is a method that is defined in the
+ // static library that is always out of date
+ r = file1_func(x);
+ result_type_dynamic rd = file2_func(x);
+}
+
+int file5_launch_kernel(int x)
+{
+ result_type r;
+ file5_kernel<<<1, 1>>>(r, x);
+ return r.sum;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main.cu
new file mode 100644
index 0000000..40dbe5d
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/main.cu
@@ -0,0 +1,68 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+int file4_launch_kernel(int x);
+int file5_launch_kernel(int x);
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ if (prop.major >= 3) {
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+ << std::endl;
+
+ return 1;
+}
+
+int main(int argc, char** argv)
+{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ cudaError_t err;
+ file4_launch_kernel(42);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "file4_launch_kernel: kernel launch failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
+ file5_launch_kernel(42);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "file5_launch_kernel: kernel launch failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/Tests/CudaOnly/WithDefs/CMakeLists.txt b/Tests/CudaOnly/WithDefs/CMakeLists.txt
new file mode 100644
index 0000000..926d9ed
--- /dev/null
+++ b/Tests/CudaOnly/WithDefs/CMakeLists.txt
@@ -0,0 +1,50 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyWithDefs CUDA)
+
+#verify that we can pass explicit cuda arch flags
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 9)
+ set(debug_compile_flags --generate-code arch=compute_32,code=sm_32)
+else()
+ set(debug_compile_flags --generate-code arch=compute_20,code=sm_20)
+endif()
+if(CMAKE_CUDA_SIMULATE_ID STREQUAL "MSVC")
+ list(APPEND debug_compile_flags -Xcompiler=-WX)
+else()
+ list(APPEND debug_compile_flags -Xcompiler=-Werror)
+endif()
+set(release_compile_defs DEFREL)
+
+#Goal for this example:
+#build a executable that needs to be passed a complex define through add_defintions
+#this verifies we can pass things such as '_','(' to nvcc
+add_definitions("-DPACKED_DEFINE=__attribute__((packed))")
+
+add_executable(CudaOnlyWithDefs main.notcu)
+set_source_files_properties(main.notcu PROPERTIES LANGUAGE CUDA)
+
+target_compile_options(CudaOnlyWithDefs
+ PRIVATE
+ -DFLAG_COMPILE_LANG_$<COMPILE_LANGUAGE>
+ -DFLAG_LANG_IS_CUDA=$<COMPILE_LANGUAGE:CUDA>
+ -Xcompiler=-DHOST_DEFINE
+ $<$<CONFIG:DEBUG>:$<BUILD_INTERFACE:${debug_compile_flags}>>
+ )
+
+target_compile_definitions(CudaOnlyWithDefs
+ PRIVATE
+ $<$<CONFIG:RELEASE>:$<BUILD_INTERFACE:${release_compile_defs}>>
+ -DDEF_COMPILE_LANG_$<COMPILE_LANGUAGE>
+ -DDEF_LANG_IS_CUDA=$<COMPILE_LANGUAGE:CUDA>
+ )
+
+target_include_directories(CudaOnlyWithDefs
+ PRIVATE
+ $<$<COMPILE_LANGUAGE:CUDA>:${CMAKE_CURRENT_SOURCE_DIR}/inc_cuda>
+)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlyWithDefs PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/WithDefs/inc_cuda/inc_cuda.h b/Tests/CudaOnly/WithDefs/inc_cuda/inc_cuda.h
new file mode 100644
index 0000000..e228b58
--- /dev/null
+++ b/Tests/CudaOnly/WithDefs/inc_cuda/inc_cuda.h
@@ -0,0 +1 @@
+#define INC_CUDA
diff --git a/Tests/CudaOnly/WithDefs/main.notcu b/Tests/CudaOnly/WithDefs/main.notcu
new file mode 100644
index 0000000..98f73ce
--- /dev/null
+++ b/Tests/CudaOnly/WithDefs/main.notcu
@@ -0,0 +1,86 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+#include <inc_cuda.h>
+#ifndef INC_CUDA
+# error "INC_CUDA not defined!"
+#endif
+
+#ifndef HOST_DEFINE
+# error "HOST_DEFINE not defined!"
+#endif
+
+#ifndef PACKED_DEFINE
+# error "PACKED_DEFINE not defined!"
+#endif
+
+#ifndef FLAG_COMPILE_LANG_CUDA
+# error "FLAG_COMPILE_LANG_CUDA not defined!"
+#endif
+
+#ifndef FLAG_LANG_IS_CUDA
+# error "FLAG_LANG_IS_CUDA not defined!"
+#endif
+
+#if !FLAG_LANG_IS_CUDA
+# error "Expected FLAG_LANG_IS_CUDA"
+#endif
+
+#ifndef DEF_COMPILE_LANG_CUDA
+# error "DEF_COMPILE_LANG_CUDA not defined!"
+#endif
+
+#ifndef DEF_LANG_IS_CUDA
+# error "DEF_LANG_IS_CUDA not defined!"
+#endif
+
+#if !DEF_LANG_IS_CUDA
+# error "Expected DEF_LANG_IS_CUDA"
+#endif
+
+static __global__ void DetermineIfValidCudaDevice()
+{
+}
+
+#ifdef _MSC_VER
+# pragma pack(push, 1)
+# undef PACKED_DEFINE
+# define PACKED_DEFINE
+#endif
+struct PACKED_DEFINE result_type
+{
+ bool valid;
+ int value;
+#if defined(NDEBUG) && !defined(DEFREL)
+# error missing DEFREL flag
+#endif
+};
+#ifdef _MSC_VER
+# pragma pack(pop)
+#endif
+
+result_type can_launch_kernel()
+{
+ result_type r;
+ DetermineIfValidCudaDevice<<<1, 1>>>();
+ r.valid = (cudaSuccess == cudaGetLastError());
+ if (r.valid) {
+ r.value = 1;
+ } else {
+ r.value = -1;
+ }
+ return r;
+}
+
+int main(int argc, char** argv)
+{
+ cudaError_t err;
+ int nDevices = 0;
+ err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+ return 0;
+}