summaryrefslogtreecommitdiffstats
path: root/Tests/Cuda
diff options
context:
space:
mode:
Diffstat (limited to 'Tests/Cuda')
-rw-r--r--Tests/Cuda/CMakeLists.txt18
-rw-r--r--Tests/Cuda/CXXStandardSetTwice/CMakeLists.txt (renamed from Tests/Cuda/MixedStandardLevels/CMakeLists.txt)8
-rw-r--r--Tests/Cuda/CXXStandardSetTwice/main.cu (renamed from Tests/Cuda/MixedStandardLevels/main.cu)0
-rw-r--r--Tests/Cuda/Complex/CMakeLists.txt29
-rw-r--r--Tests/Cuda/Complex/dynamic.cu11
-rw-r--r--Tests/Cuda/Complex/main.cpp1
-rw-r--r--Tests/Cuda/Complex/mixed.cu6
-rw-r--r--Tests/Cuda/IncludePathNoToolkit/CMakeLists.txt (renamed from Tests/Cuda/ToolkitInclude/CMakeLists.txt)6
-rw-r--r--Tests/Cuda/IncludePathNoToolkit/main.cpp (renamed from Tests/Cuda/ToolkitInclude/main.cpp)0
-rw-r--r--Tests/Cuda/MixedStandardLevels1/CMakeLists.txt14
-rw-r--r--Tests/Cuda/MixedStandardLevels1/lib.cpp7
-rw-r--r--Tests/Cuda/MixedStandardLevels1/main.cu9
-rw-r--r--Tests/Cuda/MixedStandardLevels2/CMakeLists.txt14
-rw-r--r--Tests/Cuda/MixedStandardLevels2/lib.cpp7
-rw-r--r--Tests/Cuda/MixedStandardLevels2/main.cu11
-rw-r--r--Tests/Cuda/MixedStandardLevels3/CMakeLists.txt12
-rw-r--r--Tests/Cuda/MixedStandardLevels3/lib.cpp7
-rw-r--r--Tests/Cuda/MixedStandardLevels3/main.cu5
-rw-r--r--Tests/Cuda/MixedStandardLevels4/CMakeLists.txt14
-rw-r--r--Tests/Cuda/MixedStandardLevels4/lib.cpp16
-rw-r--r--Tests/Cuda/MixedStandardLevels4/main.cu5
-rw-r--r--Tests/Cuda/MixedStandardLevels5/CMakeLists.txt13
-rw-r--r--Tests/Cuda/MixedStandardLevels5/lib.cpp13
-rw-r--r--Tests/Cuda/MixedStandardLevels5/main.cu8
-rw-r--r--Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt35
-rw-r--r--Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp65
-rw-r--r--Tests/Cuda/SharedRuntimePlusToolkit/main.cpp23
-rw-r--r--Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp16
-rw-r--r--Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp92
-rw-r--r--Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp16
-rw-r--r--Tests/Cuda/SharedRuntimePlusToolkit/static.cpp16
-rw-r--r--Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt29
-rw-r--r--Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp59
-rw-r--r--Tests/Cuda/StaticRuntimePlusToolkit/main.cpp11
-rw-r--r--Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp8
-rw-r--r--Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp86
-rw-r--r--Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp8
-rw-r--r--Tests/Cuda/StaticRuntimePlusToolkit/static.cpp8
-rw-r--r--Tests/Cuda/Toolkit/CMakeLists.txt38
-rw-r--r--Tests/Cuda/Toolkit/main.cpp8
40 files changed, 729 insertions, 23 deletions
diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt
index 44c6005..58b9b03 100644
--- a/Tests/Cuda/CMakeLists.txt
+++ b/Tests/Cuda/CMakeLists.txt
@@ -1,11 +1,25 @@
ADD_TEST_MACRO(Cuda.Complex CudaComplex)
ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures)
+ADD_TEST_MACRO(Cuda.CXXStandardSetTwice CXXStandardSetTwice)
ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary)
-ADD_TEST_MACRO(Cuda.MixedStandardLevels MixedStandardLevels)
+ADD_TEST_MACRO(Cuda.MixedStandardLevels1 MixedStandardLevels1)
+ADD_TEST_MACRO(Cuda.MixedStandardLevels2 MixedStandardLevels2)
+ADD_TEST_MACRO(Cuda.MixedStandardLevels3 MixedStandardLevels3)
+ADD_TEST_MACRO(Cuda.MixedStandardLevels4 MixedStandardLevels4)
+ADD_TEST_MACRO(Cuda.MixedStandardLevels5 MixedStandardLevels5)
ADD_TEST_MACRO(Cuda.NotEnabled CudaNotEnabled)
ADD_TEST_MACRO(Cuda.SeparableCompCXXOnly SeparableCompCXXOnly)
-ADD_TEST_MACRO(Cuda.ToolkitInclude CudaToolkitInclude)
+ADD_TEST_MACRO(Cuda.Toolkit Toolkit)
+ADD_TEST_MACRO(Cuda.IncludePathNoToolkit IncludePathNoToolkit)
ADD_TEST_MACRO(Cuda.ProperDeviceLibraries ProperDeviceLibraries)
ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags)
+ADD_TEST_MACRO(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit)
+
+# The CUDA only ships the shared version of the toolkit libraries
+# on windows
+if(NOT WIN32)
+ ADD_TEST_MACRO(Cuda.StaticRuntimePlusToolkit StaticRuntimePlusToolkit)
+endif()
+
ADD_TEST_MACRO(Cuda.WithC CudaWithC)
diff --git a/Tests/Cuda/MixedStandardLevels/CMakeLists.txt b/Tests/Cuda/CXXStandardSetTwice/CMakeLists.txt
index b399662..1941c49 100644
--- a/Tests/Cuda/MixedStandardLevels/CMakeLists.txt
+++ b/Tests/Cuda/CXXStandardSetTwice/CMakeLists.txt
@@ -1,14 +1,14 @@
cmake_minimum_required(VERSION 3.7)
-project(MixedStandardLevels CXX CUDA)
+project(CXXStandardSetTwice CXX CUDA)
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
set(CMAKE_CXX_STANDARD 11)
-add_executable(MixedStandardLevels main.cu)
-target_compile_features(MixedStandardLevels PUBLIC cxx_std_11)
+add_executable(CXXStandardSetTwice main.cu)
+target_compile_features(CXXStandardSetTwice PUBLIC cxx_std_11)
if(APPLE)
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
- set_property(TARGET MixedStandardLevels PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+ set_property(TARGET CXXStandardSetTwice PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
endif()
diff --git a/Tests/Cuda/MixedStandardLevels/main.cu b/Tests/Cuda/CXXStandardSetTwice/main.cu
index d57c05a..d57c05a 100644
--- a/Tests/Cuda/MixedStandardLevels/main.cu
+++ b/Tests/Cuda/CXXStandardSetTwice/main.cu
diff --git a/Tests/Cuda/Complex/CMakeLists.txt b/Tests/Cuda/Complex/CMakeLists.txt
index d3d4b7c..08d1e16 100644
--- a/Tests/Cuda/Complex/CMakeLists.txt
+++ b/Tests/Cuda/Complex/CMakeLists.txt
@@ -22,18 +22,11 @@ set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
add_library(CudaComplexCppBase SHARED dynamic.cpp)
-add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu)
-set_target_properties(CudaComplexSeperableLib
- PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-set_target_properties( CudaComplexSeperableLib
- PROPERTIES POSITION_INDEPENDENT_CODE ON)
-
add_library(CudaComplexSharedLib SHARED dynamic.cu)
target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase)
+add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu)
add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu)
-set_target_properties(CudaComplexMixedLib
- PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(CudaComplexMixedLib
PUBLIC CudaComplexSharedLib
PRIVATE CudaComplexSeperableLib)
@@ -41,7 +34,27 @@ target_link_libraries(CudaComplexMixedLib
add_executable(CudaComplex main.cpp)
target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib)
+
+set_target_properties(CudaComplexMixedLib
+ CudaComplexSeperableLib
+ PROPERTIES
+ POSITION_INDEPENDENT_CODE ON
+ CUDA_SEPARABLE_COMPILATION ON
+ )
+set_target_properties(CudaComplexMixedLib
+ CudaComplexSharedLib
+ PROPERTIES
+ CUDA_RUNTIME_LIBRARY shared
+ )
+
+
if(APPLE)
# Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
set_property(TARGET CudaComplex PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
endif()
+
+if(UNIX)
+ # Help the shared cuda runtime find libcudart as it is not located
+ # in a default system searched location
+ set_property(TARGET CudaComplexMixedLib PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu
index 9da8853..7f2f2b5 100644
--- a/Tests/Cuda/Complex/dynamic.cu
+++ b/Tests/Cuda/Complex/dynamic.cu
@@ -54,17 +54,20 @@ EXPORT int choose_cuda_device()
return 1;
}
-EXPORT void cuda_dynamic_lib_func()
+EXPORT bool cuda_dynamic_lib_func()
{
- DetermineIfValidCudaDevice<<<1, 1>>>();
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
- std::cerr << "DetermineIfValidCudaDevice [SYNC] failed: "
+ std::cerr << "DetermineIfValidCudaDevice [Per Launch] failed: "
<< cudaGetErrorString(err) << std::endl;
+ return false;
}
+ DetermineIfValidCudaDevice<<<1, 1>>>();
err = cudaDeviceSynchronize();
if (err != cudaSuccess) {
- std::cerr << "DetermineIfValidCudaDevice [ASYNC] failed: "
+ std::cerr << "DetermineIfValidCudaDevice [SYNC] failed: "
<< cudaGetErrorString(cudaGetLastError()) << std::endl;
+ return false;
}
+ return true;
}
diff --git a/Tests/Cuda/Complex/main.cpp b/Tests/Cuda/Complex/main.cpp
index 6ca5952..da09b44 100644
--- a/Tests/Cuda/Complex/main.cpp
+++ b/Tests/Cuda/Complex/main.cpp
@@ -22,5 +22,6 @@ int main(int argc, char** argv)
int r1 = call_cuda_seperable_code(42);
int r2 = mixed_launch_kernel(42);
+
return (r1 == 42 || r2 == 42) ? 1 : 0;
}
diff --git a/Tests/Cuda/Complex/mixed.cu b/Tests/Cuda/Complex/mixed.cu
index 5b85aec..76119ad 100644
--- a/Tests/Cuda/Complex/mixed.cu
+++ b/Tests/Cuda/Complex/mixed.cu
@@ -15,7 +15,7 @@
result_type __device__ file1_func(int x);
result_type_dynamic __device__ file2_func(int x);
-IMPORT void __host__ cuda_dynamic_lib_func();
+IMPORT bool __host__ cuda_dynamic_lib_func();
static __global__ void mixed_kernel(result_type* r, int x)
{
@@ -25,7 +25,9 @@ static __global__ void mixed_kernel(result_type* r, int x)
EXPORT int mixed_launch_kernel(int x)
{
- cuda_dynamic_lib_func();
+ if (!cuda_dynamic_lib_func()) {
+ return x;
+ }
result_type* r;
cudaError_t err = cudaMallocManaged(&r, sizeof(result_type));
diff --git a/Tests/Cuda/ToolkitInclude/CMakeLists.txt b/Tests/Cuda/IncludePathNoToolkit/CMakeLists.txt
index f246b54..7be1561 100644
--- a/Tests/Cuda/ToolkitInclude/CMakeLists.txt
+++ b/Tests/Cuda/IncludePathNoToolkit/CMakeLists.txt
@@ -1,11 +1,11 @@
cmake_minimum_required(VERSION 3.8)
-project (ToolkitInclude CXX CUDA)
+project (IncludePathNoToolkit CXX CUDA)
#Goal for this example:
# Validate that between the CXX implicit include directories and the
# CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES directories we can find
# the cuda runtime headers
-add_executable(CudaToolkitInclude main.cpp)
-target_include_directories(CudaToolkitInclude PRIVATE
+add_executable(IncludePathNoToolkit main.cpp)
+target_include_directories(IncludePathNoToolkit PRIVATE
${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
diff --git a/Tests/Cuda/ToolkitInclude/main.cpp b/Tests/Cuda/IncludePathNoToolkit/main.cpp
index c8d5c6b..c8d5c6b 100644
--- a/Tests/Cuda/ToolkitInclude/main.cpp
+++ b/Tests/Cuda/IncludePathNoToolkit/main.cpp
diff --git a/Tests/Cuda/MixedStandardLevels1/CMakeLists.txt b/Tests/Cuda/MixedStandardLevels1/CMakeLists.txt
new file mode 100644
index 0000000..b03e51e
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels1/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 3.7)
+project(MixedStandardLevels1 CXX CUDA)
+
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CUDA_STANDARD 11)
+
+add_executable(MixedStandardLevels1 main.cu lib.cpp)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET MixedStandardLevels1 PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/Cuda/MixedStandardLevels1/lib.cpp b/Tests/Cuda/MixedStandardLevels1/lib.cpp
new file mode 100644
index 0000000..cabbacb
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels1/lib.cpp
@@ -0,0 +1,7 @@
+
+int func(int A, int B)
+{
+ // Verify that we have at least c++14
+ auto mult_func = [](auto a, auto b) { return a * b; };
+ return mult_func(A, B);
+}
diff --git a/Tests/Cuda/MixedStandardLevels1/main.cu b/Tests/Cuda/MixedStandardLevels1/main.cu
new file mode 100644
index 0000000..bc02c6d
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels1/main.cu
@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+int main(int argc, char** argv)
+{
+ // Verify that we have at least c++11
+ using returnv = std::integral_constant<int, 0>;
+ return returnv::value;
+}
diff --git a/Tests/Cuda/MixedStandardLevels2/CMakeLists.txt b/Tests/Cuda/MixedStandardLevels2/CMakeLists.txt
new file mode 100644
index 0000000..12dd328
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels2/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 3.7)
+project(MixedStandardLevels2 CXX CUDA)
+
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+
+set(CMAKE_CXX_STANDARD 17) #this can decay
+
+add_executable(MixedStandardLevels2 main.cu lib.cpp)
+target_compile_features(MixedStandardLevels2 PUBLIC cuda_std_11)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET MixedStandardLevels2 PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/Cuda/MixedStandardLevels2/lib.cpp b/Tests/Cuda/MixedStandardLevels2/lib.cpp
new file mode 100644
index 0000000..cabbacb
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels2/lib.cpp
@@ -0,0 +1,7 @@
+
+int func(int A, int B)
+{
+ // Verify that we have at least c++14
+ auto mult_func = [](auto a, auto b) { return a * b; };
+ return mult_func(A, B);
+}
diff --git a/Tests/Cuda/MixedStandardLevels2/main.cu b/Tests/Cuda/MixedStandardLevels2/main.cu
new file mode 100644
index 0000000..a97a41e
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels2/main.cu
@@ -0,0 +1,11 @@
+
+#if __cplusplus < 201103L && !defined(_MSC_VER)
+# error "invalid standard value"
+#endif
+#include <type_traits>
+
+int main(int argc, char** argv)
+{
+ using returnv = std::integral_constant<int, 0>;
+ return returnv::value;
+}
diff --git a/Tests/Cuda/MixedStandardLevels3/CMakeLists.txt b/Tests/Cuda/MixedStandardLevels3/CMakeLists.txt
new file mode 100644
index 0000000..2b611be
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels3/CMakeLists.txt
@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 3.7)
+project(MixedStandardLevels3 CXX CUDA)
+
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+
+add_executable(MixedStandardLevels3 main.cu lib.cpp)
+target_compile_features(MixedStandardLevels3 PUBLIC cuda_std_03 cxx_std_14)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET MixedStandardLevels3 PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/Cuda/MixedStandardLevels3/lib.cpp b/Tests/Cuda/MixedStandardLevels3/lib.cpp
new file mode 100644
index 0000000..cabbacb
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels3/lib.cpp
@@ -0,0 +1,7 @@
+
+int func(int A, int B)
+{
+ // Verify that we have at least c++14
+ auto mult_func = [](auto a, auto b) { return a * b; };
+ return mult_func(A, B);
+}
diff --git a/Tests/Cuda/MixedStandardLevels3/main.cu b/Tests/Cuda/MixedStandardLevels3/main.cu
new file mode 100644
index 0000000..1c19e8d
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels3/main.cu
@@ -0,0 +1,5 @@
+
+int main(int argc, char** argv)
+{
+ return 0;
+}
diff --git a/Tests/Cuda/MixedStandardLevels4/CMakeLists.txt b/Tests/Cuda/MixedStandardLevels4/CMakeLists.txt
new file mode 100644
index 0000000..faf6869
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels4/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 3.7)
+project(MixedStandardLevels4 CXX CUDA)
+
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+
+set(CMAKE_CUDA_STANDARD 03)
+
+add_executable(MixedStandardLevels4 main.cu lib.cpp)
+target_compile_features(MixedStandardLevels4 PUBLIC cxx_std_14)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET MixedStandardLevels4 PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/Cuda/MixedStandardLevels4/lib.cpp b/Tests/Cuda/MixedStandardLevels4/lib.cpp
new file mode 100644
index 0000000..ef6fc20
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels4/lib.cpp
@@ -0,0 +1,16 @@
+
+
+constexpr int func(int A, int B)
+{
+#if defined(_MSC_VER) && _MSC_VER < 1913
+ // no suppport for extended constexpr
+ return B * A;
+#else
+ // Verify that we have at least c++14
+ if (A < B) {
+ return A + B;
+ } else {
+ return B * A;
+ }
+#endif
+}
diff --git a/Tests/Cuda/MixedStandardLevels4/main.cu b/Tests/Cuda/MixedStandardLevels4/main.cu
new file mode 100644
index 0000000..1c19e8d
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels4/main.cu
@@ -0,0 +1,5 @@
+
+int main(int argc, char** argv)
+{
+ return 0;
+}
diff --git a/Tests/Cuda/MixedStandardLevels5/CMakeLists.txt b/Tests/Cuda/MixedStandardLevels5/CMakeLists.txt
new file mode 100644
index 0000000..7209f60
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels5/CMakeLists.txt
@@ -0,0 +1,13 @@
+cmake_minimum_required(VERSION 3.7)
+project(MixedStandardLevels5 CXX CUDA)
+
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+
+set(CMAKE_CXX_STANDARD 98)
+
+add_executable(MixedStandardLevels5 main.cu lib.cpp)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET MixedStandardLevels5 PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/Cuda/MixedStandardLevels5/lib.cpp b/Tests/Cuda/MixedStandardLevels5/lib.cpp
new file mode 100644
index 0000000..dd7b31b
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels5/lib.cpp
@@ -0,0 +1,13 @@
+
+#if __cplusplus >= 201103L
+# error "invalid standard value"
+#endif
+int func(int A, int B)
+{
+ // Verify that we have at least c++14
+ if (A < B) {
+ return A + B;
+ } else {
+ return B * A;
+ }
+}
diff --git a/Tests/Cuda/MixedStandardLevels5/main.cu b/Tests/Cuda/MixedStandardLevels5/main.cu
new file mode 100644
index 0000000..c79afd6
--- /dev/null
+++ b/Tests/Cuda/MixedStandardLevels5/main.cu
@@ -0,0 +1,8 @@
+
+#if __cplusplus >= 201103L
+# error "invalid standard value"
+#endif
+int main(int argc, char** argv)
+{
+ return 0;
+}
diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt b/Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt
new file mode 100644
index 0000000..48df558
--- /dev/null
+++ b/Tests/Cuda/SharedRuntimePlusToolkit/CMakeLists.txt
@@ -0,0 +1,35 @@
+cmake_minimum_required(VERSION 3.15)
+project(SharedRuntimePlusToolkit CXX)
+
+#Goal for this example:
+# Validate that with c++ we can use some components of the CUDA toolkit, and
+# specify the cuda runtime
+find_package(CUDAToolkit REQUIRED)
+
+add_library(Common OBJECT curand.cpp nppif.cpp)
+target_link_libraries(Common PRIVATE CUDA::toolkit)
+set_target_properties(Common PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+#shared runtime with shared toolkit libraries
+add_library(SharedToolkit SHARED shared.cpp)
+target_link_libraries(SharedToolkit PRIVATE Common PUBLIC CUDA::curand CUDA::nppif)
+target_link_libraries(SharedToolkit PUBLIC CUDA::cudart)
+
+# The CUDA only ships the shared version of the toolkit libraries
+# on windows
+if(NOT WIN32)
+ #shared runtime with static toolkit libraries
+ add_library(StaticToolkit SHARED static.cpp)
+ target_link_libraries(StaticToolkit PRIVATE Common CUDA::curand_static CUDA::nppif_static)
+ target_link_libraries(StaticToolkit PUBLIC CUDA::cudart)
+
+ #static runtime with mixed toolkit libraries
+ add_library(MixedToolkit SHARED mixed.cpp)
+ target_link_libraries(MixedToolkit PRIVATE Common CUDA::curand_static CUDA::nppif)
+ target_link_libraries(MixedToolkit PUBLIC CUDA::cudart)
+endif()
+
+add_executable(SharedRuntimePlusToolkit main.cpp)
+target_link_libraries(SharedRuntimePlusToolkit PRIVATE SharedToolkit
+ $<TARGET_NAME_IF_EXISTS:StaticToolkit>
+ $<TARGET_NAME_IF_EXISTS:MixedToolkit>)
diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp
new file mode 100644
index 0000000..fdd7b53
--- /dev/null
+++ b/Tests/Cuda/SharedRuntimePlusToolkit/curand.cpp
@@ -0,0 +1,65 @@
+// Comes from:
+// https://docs.nvidia.com/cuda/curand/host-api-overview.html#host-api-example
+
+#ifdef _WIN32
+# define EXPORT __declspec(dllexport)
+#else
+# define EXPORT
+#endif
+
+/*
+ * This program uses the host CURAND API to generate 100
+ * pseudorandom floats.
+ */
+#include <cuda.h>
+#include <curand.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define CUDA_CALL(x) \
+ do { \
+ if ((x) != cudaSuccess) { \
+ printf("Error at %s:%d\n", __FILE__, __LINE__); \
+ return EXIT_FAILURE; \
+ } \
+ } while (0)
+#define CURAND_CALL(x) \
+ do { \
+ if ((x) != CURAND_STATUS_SUCCESS) { \
+ printf("Error at %s:%d\n", __FILE__, __LINE__); \
+ return EXIT_FAILURE; \
+ } \
+ } while (0)
+
+EXPORT int curand_main()
+{
+ size_t n = 100;
+ size_t i;
+ curandGenerator_t gen;
+ float *devData, *hostData;
+
+ /* Allocate n floats on host */
+ hostData = (float*)calloc(n, sizeof(float));
+
+ /* Allocate n floats on device */
+ CUDA_CALL(cudaMalloc((void**)&devData, n * sizeof(float)));
+
+ /* Create pseudo-random number generator */
+ CURAND_CALL(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
+
+ /* Set seed */
+ CURAND_CALL(curandSetPseudoRandomGeneratorSeed(gen, 1234ULL));
+
+ /* Generate n floats on device */
+ CURAND_CALL(curandGenerateUniform(gen, devData, n));
+
+ /* Copy device memory to host */
+ CUDA_CALL(
+ cudaMemcpy(hostData, devData, n * sizeof(float), cudaMemcpyDeviceToHost));
+
+ /* Cleanup */
+ CURAND_CALL(curandDestroyGenerator(gen));
+ CUDA_CALL(cudaFree(devData));
+ free(hostData);
+ return EXIT_SUCCESS;
+}
diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/main.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/main.cpp
new file mode 100644
index 0000000..2a4da22
--- /dev/null
+++ b/Tests/Cuda/SharedRuntimePlusToolkit/main.cpp
@@ -0,0 +1,23 @@
+
+#ifdef _WIN32
+# define IMPORT __declspec(dllimport)
+IMPORT int shared_version();
+int static_version()
+{
+ return 0;
+}
+int mixed_version()
+{
+ return 0;
+}
+#else
+int shared_version();
+int static_version();
+int mixed_version();
+#endif
+
+int main()
+{
+ return mixed_version() == 0 && shared_version() == 0 &&
+ static_version() == 0;
+}
diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp
new file mode 100644
index 0000000..6de6886
--- /dev/null
+++ b/Tests/Cuda/SharedRuntimePlusToolkit/mixed.cpp
@@ -0,0 +1,16 @@
+
+#ifdef _WIN32
+# define IMPORT __declspec(dllimport)
+# define EXPORT __declspec(dllexport)
+#else
+# define IMPORT
+# define EXPORT
+#endif
+
+IMPORT int curand_main();
+IMPORT int nppif_main();
+
+EXPORT int mixed_version()
+{
+ return curand_main() == 0 && nppif_main() == 0;
+}
diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp
new file mode 100644
index 0000000..ac5341c
--- /dev/null
+++ b/Tests/Cuda/SharedRuntimePlusToolkit/nppif.cpp
@@ -0,0 +1,92 @@
+// Comes from
+// https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066
+
+#ifdef _WIN32
+# define EXPORT __declspec(dllexport)
+#else
+# define EXPORT
+#endif
+
+#include <cstdio>
+#include <iostream>
+
+#include <assert.h>
+#include <cuda_runtime_api.h>
+#include <nppi_filtering_functions.h>
+
+EXPORT int nppif_main()
+{
+ /**
+ * 8-bit unsigned single-channel 1D row convolution.
+ */
+ const int simgrows = 32;
+ const int simgcols = 32;
+ Npp8u *d_pSrc, *d_pDst;
+ const int nMaskSize = 3;
+ NppiSize oROI;
+ oROI.width = simgcols - nMaskSize;
+ oROI.height = simgrows;
+ const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]);
+ const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]);
+ const int simgpix = simgrows * simgcols;
+ const int dimgpix = oROI.width * oROI.height;
+ const int nSrcStep = simgcols * sizeof(d_pSrc[0]);
+ const int nDstStep = oROI.width * sizeof(d_pDst[0]);
+ const int pixval = 1;
+ const int nDivisor = 1;
+ const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval };
+ Npp32s* d_pKernel;
+ const Npp32s nAnchor = 2;
+ cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ err = cudaMalloc((void**)&d_pDst, dimgsize);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0]));
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ // set image to pixval initially
+ err = cudaMemset(d_pSrc, pixval, simgsize);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ err = cudaMemset(d_pDst, 0, dimgsize);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]),
+ cudaMemcpyHostToDevice);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ // copy src to dst
+ NppStatus ret =
+ nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel,
+ nMaskSize, nAnchor, nDivisor);
+ assert(ret == NPP_NO_ERROR);
+ Npp8u* h_imgres = new Npp8u[dimgpix];
+ err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ // test for filtering
+ for (int i = 0; i < dimgpix; i++) {
+ if (h_imgres[i] != (pixval * pixval * nMaskSize)) {
+ fprintf(stderr, "h_imgres at index %d failed to match\n", i);
+ return 1;
+ }
+ }
+
+ return 0;
+}
diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp
new file mode 100644
index 0000000..f3c3dbc
--- /dev/null
+++ b/Tests/Cuda/SharedRuntimePlusToolkit/shared.cpp
@@ -0,0 +1,16 @@
+
+#ifdef _WIN32
+# define IMPORT __declspec(dllimport)
+# define EXPORT __declspec(dllexport)
+#else
+# define IMPORT
+# define EXPORT
+#endif
+
+int curand_main();
+int nppif_main();
+
+EXPORT int shared_version()
+{
+ return curand_main() == 0 && nppif_main() == 0;
+}
diff --git a/Tests/Cuda/SharedRuntimePlusToolkit/static.cpp b/Tests/Cuda/SharedRuntimePlusToolkit/static.cpp
new file mode 100644
index 0000000..6932fa3
--- /dev/null
+++ b/Tests/Cuda/SharedRuntimePlusToolkit/static.cpp
@@ -0,0 +1,16 @@
+
+#ifdef _WIN32
+# define IMPORT __declspec(dllimport)
+# define EXPORT __declspec(dllexport)
+#else
+# define IMPORT
+# define EXPORT
+#endif
+
+IMPORT int curand_main();
+IMPORT int nppif_main();
+
+EXPORT int static_version()
+{
+ return curand_main() == 0 && nppif_main() == 0;
+}
diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt b/Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt
new file mode 100644
index 0000000..df6c392
--- /dev/null
+++ b/Tests/Cuda/StaticRuntimePlusToolkit/CMakeLists.txt
@@ -0,0 +1,29 @@
+cmake_minimum_required(VERSION 3.15)
+project(StaticRuntimePlusToolkit CXX)
+
+#Goal for this example:
+# Validate that with c++ we can use some components of the CUDA toolkit, and
+# specify the cuda runtime
+find_package(CUDAToolkit REQUIRED)
+
+add_library(Common OBJECT curand.cpp nppif.cpp)
+target_link_libraries(Common PRIVATE CUDA::toolkit)
+set_target_properties(Common PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+#static runtime with shared toolkit libraries
+add_library(SharedToolkit SHARED shared.cpp)
+target_link_libraries(SharedToolkit PRIVATE Common PUBLIC CUDA::curand CUDA::nppif)
+target_link_libraries(SharedToolkit PUBLIC CUDA::cudart_static)
+
+#static runtime with static toolkit libraries
+add_library(StaticToolkit SHARED static.cpp)
+target_link_libraries(StaticToolkit PRIVATE Common CUDA::curand_static CUDA::nppif_static)
+target_link_libraries(StaticToolkit PUBLIC CUDA::cudart_static)
+
+#static runtime with mixed toolkit libraries
+add_library(MixedToolkit SHARED mixed.cpp)
+target_link_libraries(MixedToolkit PRIVATE Common CUDA::curand CUDA::nppif_static)
+target_link_libraries(MixedToolkit PUBLIC CUDA::cudart_static)
+
+add_executable(StaticRuntimePlusToolkit main.cpp)
+target_link_libraries(StaticRuntimePlusToolkit PRIVATE SharedToolkit StaticToolkit MixedToolkit)
diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp
new file mode 100644
index 0000000..95872f0
--- /dev/null
+++ b/Tests/Cuda/StaticRuntimePlusToolkit/curand.cpp
@@ -0,0 +1,59 @@
+// Comes from:
+// https://docs.nvidia.com/cuda/curand/host-api-overview.html#host-api-example
+
+/*
+ * This program uses the host CURAND API to generate 100
+ * pseudorandom floats.
+ */
+#include <cuda.h>
+#include <curand.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define CUDA_CALL(x) \
+ do { \
+ if ((x) != cudaSuccess) { \
+ printf("Error at %s:%d\n", __FILE__, __LINE__); \
+ return EXIT_FAILURE; \
+ } \
+ } while (0)
+#define CURAND_CALL(x) \
+ do { \
+ if ((x) != CURAND_STATUS_SUCCESS) { \
+ printf("Error at %s:%d\n", __FILE__, __LINE__); \
+ return EXIT_FAILURE; \
+ } \
+ } while (0)
+
+int curand_main()
+{
+ size_t n = 100;
+ size_t i;
+ curandGenerator_t gen;
+ float *devData, *hostData;
+
+ /* Allocate n floats on host */
+ hostData = (float*)calloc(n, sizeof(float));
+
+ /* Allocate n floats on device */
+ CUDA_CALL(cudaMalloc((void**)&devData, n * sizeof(float)));
+
+ /* Create pseudo-random number generator */
+ CURAND_CALL(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT));
+
+ /* Set seed */
+ CURAND_CALL(curandSetPseudoRandomGeneratorSeed(gen, 1234ULL));
+
+ /* Generate n floats on device */
+ CURAND_CALL(curandGenerateUniform(gen, devData, n));
+
+ /* Copy device memory to host */
+ CUDA_CALL(
+ cudaMemcpy(hostData, devData, n * sizeof(float), cudaMemcpyDeviceToHost));
+
+ /* Cleanup */
+ CURAND_CALL(curandDestroyGenerator(gen));
+ CUDA_CALL(cudaFree(devData));
+ free(hostData);
+ return EXIT_SUCCESS;
+}
diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/main.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/main.cpp
new file mode 100644
index 0000000..5a09f8e
--- /dev/null
+++ b/Tests/Cuda/StaticRuntimePlusToolkit/main.cpp
@@ -0,0 +1,11 @@
+
+
+int shared_version();
+int static_version();
+int mixed_version();
+
+int main()
+{
+ return mixed_version() == 0 && shared_version() == 0 &&
+ static_version() == 0;
+}
diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp
new file mode 100644
index 0000000..a05140d
--- /dev/null
+++ b/Tests/Cuda/StaticRuntimePlusToolkit/mixed.cpp
@@ -0,0 +1,8 @@
+
+int curand_main();
+int nppif_main();
+
+int mixed_version()
+{
+ return curand_main() == 0 && nppif_main() == 0;
+}
diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp
new file mode 100644
index 0000000..2871090
--- /dev/null
+++ b/Tests/Cuda/StaticRuntimePlusToolkit/nppif.cpp
@@ -0,0 +1,86 @@
+// Comes from
+// https://devtalk.nvidia.com/default/topic/1037482/gpu-accelerated-libraries/help-me-help-you-with-modern-cmake-and-cuda-mwe-for-npp/post/5271066/#5271066
+
+#include <cstdio>
+#include <iostream>
+
+#include <assert.h>
+#include <cuda_runtime_api.h>
+#include <nppi_filtering_functions.h>
+
+int nppif_main()
+{
+ /**
+ * 8-bit unsigned single-channel 1D row convolution.
+ */
+ const int simgrows = 32;
+ const int simgcols = 32;
+ Npp8u *d_pSrc, *d_pDst;
+ const int nMaskSize = 3;
+ NppiSize oROI;
+ oROI.width = simgcols - nMaskSize;
+ oROI.height = simgrows;
+ const int simgsize = simgrows * simgcols * sizeof(d_pSrc[0]);
+ const int dimgsize = oROI.width * oROI.height * sizeof(d_pSrc[0]);
+ const int simgpix = simgrows * simgcols;
+ const int dimgpix = oROI.width * oROI.height;
+ const int nSrcStep = simgcols * sizeof(d_pSrc[0]);
+ const int nDstStep = oROI.width * sizeof(d_pDst[0]);
+ const int pixval = 1;
+ const int nDivisor = 1;
+ const Npp32s h_pKernel[nMaskSize] = { pixval, pixval, pixval };
+ Npp32s* d_pKernel;
+ const Npp32s nAnchor = 2;
+ cudaError_t err = cudaMalloc((void**)&d_pSrc, simgsize);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ err = cudaMalloc((void**)&d_pDst, dimgsize);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ err = cudaMalloc((void**)&d_pKernel, nMaskSize * sizeof(d_pKernel[0]));
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ // set image to pixval initially
+ err = cudaMemset(d_pSrc, pixval, simgsize);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ err = cudaMemset(d_pDst, 0, dimgsize);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ err = cudaMemcpy(d_pKernel, h_pKernel, nMaskSize * sizeof(d_pKernel[0]),
+ cudaMemcpyHostToDevice);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ // copy src to dst
+ NppStatus ret =
+ nppiFilterRow_8u_C1R(d_pSrc, nSrcStep, d_pDst, nDstStep, oROI, d_pKernel,
+ nMaskSize, nAnchor, nDivisor);
+ assert(ret == NPP_NO_ERROR);
+ Npp8u* h_imgres = new Npp8u[dimgpix];
+ err = cudaMemcpy(h_imgres, d_pDst, dimgsize, cudaMemcpyDeviceToHost);
+ if (err != cudaSuccess) {
+ fprintf(stderr, "Cuda error %d\n", __LINE__);
+ return 1;
+ }
+ // test for filtering
+ for (int i = 0; i < dimgpix; i++) {
+ if (h_imgres[i] != (pixval * pixval * nMaskSize)) {
+ fprintf(stderr, "h_imgres at index %d failed to match\n", i);
+ return 1;
+ }
+ }
+
+ return 0;
+}
diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp
new file mode 100644
index 0000000..9967b66
--- /dev/null
+++ b/Tests/Cuda/StaticRuntimePlusToolkit/shared.cpp
@@ -0,0 +1,8 @@
+
+int curand_main();
+int nppif_main();
+
+int shared_version()
+{
+ return curand_main() == 0 && nppif_main() == 0;
+}
diff --git a/Tests/Cuda/StaticRuntimePlusToolkit/static.cpp b/Tests/Cuda/StaticRuntimePlusToolkit/static.cpp
new file mode 100644
index 0000000..ca7eb4c
--- /dev/null
+++ b/Tests/Cuda/StaticRuntimePlusToolkit/static.cpp
@@ -0,0 +1,8 @@
+
+int curand_main();
+int nppif_main();
+
+int static_version()
+{
+ return curand_main() == 0 && nppif_main() == 0;
+}
diff --git a/Tests/Cuda/Toolkit/CMakeLists.txt b/Tests/Cuda/Toolkit/CMakeLists.txt
new file mode 100644
index 0000000..86b4652
--- /dev/null
+++ b/Tests/Cuda/Toolkit/CMakeLists.txt
@@ -0,0 +1,38 @@
+cmake_minimum_required(VERSION 3.15)
+project(Toolkit CXX)
+
+#Goal for this example:
+# Validate that we can use CUDAToolkit to find cuda include paths
+find_package(CUDAToolkit REQUIRED)
+
+message(STATUS "CUDAToolkit_VERSION: ${CUDAToolkit_VERSION}")
+message(STATUS "CUDAToolkit_VERSION_MAJOR: ${CUDAToolkit_VERSION_MAJOR}")
+message(STATUS "CUDAToolkit_VERSION_MINOR: ${CUDAToolkit_VERSION_MINOR}")
+message(STATUS "CUDAToolkit_VERSION_PATCH: ${CUDAToolkit_VERSION_PATCH}")
+message(STATUS "CUDAToolkit_BIN_DIR: ${CUDAToolkit_BIN_DIR}")
+message(STATUS "CUDAToolkit_INCLUDE_DIRS: ${CUDAToolkit_INCLUDE_DIRS}")
+message(STATUS "CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}")
+message(STATUS "CUDAToolkit_NVCC_EXECUTABLE ${CUDAToolkit_NVCC_EXECUTABLE}")
+
+# Verify that all the CUDA:: targets exist even when the CUDA language isn't enabled
+
+foreach (cuda_lib cudart cuda_driver cublas cufft cufftw curand cusolver cusparse nvgraph)
+ if(NOT TARGET CUDA::${cuda_lib})
+ message(FATAL_ERROR "The CUDA::${cuda_lib} target was expected but couldn't be found")
+ endif()
+endforeach()
+
+foreach (cuda_lib nppc nppial nppicc nppidei nppif nppig nppim nppist nppitc npps nppicom nppisu)
+ if(NOT TARGET CUDA::${cuda_lib})
+ message(FATAL_ERROR "The CUDA::${cuda_lib} target was expected but couldn't be found")
+ endif()
+endforeach()
+
+foreach (cuda_lib nvrtc nvToolsExt OpenCL)
+ if(NOT TARGET CUDA::${cuda_lib})
+ message(FATAL_ERROR "The CUDA::${cuda_lib} target was expected but couldn't be found")
+ endif()
+endforeach()
+
+add_executable(Toolkit main.cpp)
+target_link_libraries(Toolkit PRIVATE CUDA::toolkit)
diff --git a/Tests/Cuda/Toolkit/main.cpp b/Tests/Cuda/Toolkit/main.cpp
new file mode 100644
index 0000000..c8d5c6b
--- /dev/null
+++ b/Tests/Cuda/Toolkit/main.cpp
@@ -0,0 +1,8 @@
+// Only thing we care about is that these headers are found
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+int main()
+{
+ return 0;
+}