29 files changed, 563 insertions, 0 deletions
diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt
new file mode 100644
index 0000000..de48501
--- /dev/null
+++ b/Tests/Cuda/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+ADD_TEST_MACRO(Cuda.Complex CudaComplex)
+ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures)
+ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary)
+ADD_TEST_MACRO(Cuda.ToolkitInclude CudaToolkitInclude)
+ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags)
+ADD_TEST_MACRO(Cuda.WithC CudaWithC)
diff --git a/Tests/Cuda/Complex/CMakeLists.txt b/Tests/Cuda/Complex/CMakeLists.txt
new file mode 100644
index 0000000..a7137e3
--- /dev/null
+++ b/Tests/Cuda/Complex/CMakeLists.txt
@@ -0,0 +1,47 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaComplex CXX CUDA)
+#Goal for this example:
+
+#build a cpp dynamic library base
+#build a cuda static library base that uses separable compilation
+
+#build a cuda dynamic library that uses the first dynamic library
+#build a mixed cpp & cuda dynamic library uses all 3 previous libraries
+
+#lastly build a cpp executable that uses this last cuda dynamic library
+
+#this tests that we can properly handle linking cuda and cpp together
+#and also bulding cpp targets that need cuda implicit libraries
+
+#verify that we can pass explicit cuda arch flags
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+set(CMAKE_CUDA_STANDARD 11)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
+set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
+
+add_library(CudaComplexCppBase SHARED dynamic.cpp)
+add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu)
+set_target_properties(CudaComplexSeperableLib
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+set_target_properties( CudaComplexSeperableLib
+                       PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+add_library(CudaComplexSharedLib SHARED dynamic.cu)
+target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase)
+
+add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu)
+set_target_properties(CudaComplexMixedLib
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+target_link_libraries(CudaComplexMixedLib
+                      PUBLIC CudaComplexSharedLib
+                      PRIVATE CudaComplexSeperableLib)
+
+add_executable(CudaComplex main.cpp)
+target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib)
+
+if(APPLE)
+  # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+  set_property(TARGET CudaComplex PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/Cuda/Complex/dynamic.cpp b/Tests/Cuda/Complex/dynamic.cpp
new file mode 100644
index 0000000..3848ce7
--- /dev/null
+++ b/Tests/Cuda/Complex/dynamic.cpp
@@ -0,0 +1,11 @@
+
+#ifdef _WIN32
+#define EXPORT __declspec(dllexport)
+#else
+#define EXPORT
+#endif
+
+EXPORT int dynamic_base_func(int x)
+{
+  return x * x;
+}
diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu
new file mode 100644
index 0000000..a76973d
--- /dev/null
+++ b/Tests/Cuda/Complex/dynamic.cu
@@ -0,0 +1,69 @@
+
+#include <cuda.h>
+#include <iostream>
+#include <string>
+
+#ifdef _WIN32
+#define EXPORT __declspec(dllexport)
+#else
+#define EXPORT
+#endif
+
+int dynamic_base_func(int);
+
+EXPORT int __host__ cuda_dynamic_host_func(int x)
+{
+  return dynamic_base_func(x);
+}
+
+static __global__ void DetermineIfValidCudaDevice()
+{
+}
+
+EXPORT int choose_cuda_device()
+{
+  int nDevices = 0;
+  cudaError_t err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+              << std::endl;
+    return 1;
+  }
+  for (int i = 0; i < nDevices; ++i) {
+    cudaDeviceProp prop;
+    cudaError_t err = cudaGetDeviceProperties(&prop, i);
+    if (err != cudaSuccess) {
+      std::cerr << "Could not retrieve properties from CUDA device " << i
+                << std::endl;
+      return 1;
+    }
+    if (prop.major >= 3) {
+      err = cudaSetDevice(i);
+      if (err != cudaSuccess) {
+        std::cout << "Could not select CUDA device " << i << std::endl;
+      } else {
+        return 0;
+      }
+    }
+  }
+
+  std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+            << std::endl;
+
+  return 1;
+}
+
+EXPORT void cuda_dynamic_lib_func()
+{
+  DetermineIfValidCudaDevice<<<1, 1>>>();
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    std::cerr << "DetermineIfValidCudaDevice [SYNC] failed: "
+              << cudaGetErrorString(err) << std::endl;
+  }
+  err = cudaDeviceSynchronize();
+  if (err != cudaSuccess) {
+    std::cerr << "DetermineIfValidCudaDevice [ASYNC] failed: "
+              << cudaGetErrorString(cudaGetLastError()) << std::endl;
+  }
+}
diff --git a/Tests/Cuda/Complex/file1.cu b/Tests/Cuda/Complex/file1.cu
new file mode 100644
index 0000000..1ce63bf
--- /dev/null
+++ b/Tests/Cuda/Complex/file1.cu
@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+  result_type r;
+  r.input = x;
+  r.sum = x * x;
+  return r;
+}
diff --git a/Tests/Cuda/Complex/file1.h b/Tests/Cuda/Complex/file1.h
new file mode 100644
index 0000000..ff1945c
--- /dev/null
+++ b/Tests/Cuda/Complex/file1.h
@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+  int input;
+  int sum;
+};
diff --git a/Tests/Cuda/Complex/file2.cu b/Tests/Cuda/Complex/file2.cu
new file mode 100644
index 0000000..74f3558
--- /dev/null
+++ b/Tests/Cuda/Complex/file2.cu
@@ -0,0 +1,16 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+  if (x != 42) {
+    const result_type r = file1_func(x);
+    const result_type_dynamic rd{ r.input, r.sum, true };
+    return rd;
+  } else {
+    const result_type_dynamic rd{ x, x * x * x, false };
+    return rd;
+  }
+}
diff --git a/Tests/Cuda/Complex/file2.h b/Tests/Cuda/Complex/file2.h
new file mode 100644
index 0000000..d2dbaa4
--- /dev/null
+++ b/Tests/Cuda/Complex/file2.h
@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+  int input;
+  int sum;
+  bool from_static;
+};
diff --git a/Tests/Cuda/Complex/file3.cu b/Tests/Cuda/Complex/file3.cu
new file mode 100644
index 0000000..d055b42
--- /dev/null
+++ b/Tests/Cuda/Complex/file3.cu
@@ -0,0 +1,48 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static __global__ void file3_kernel(result_type* r, int x)
+{
+  *r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file3_launch_kernel(int x)
+{
+  result_type* r;
+  cudaError_t err = cudaMallocManaged(&r, sizeof(result_type));
+  if (err != cudaSuccess) {
+    std::cerr << "file3_launch_kernel: cudaMallocManaged failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return x;
+  }
+
+  file3_kernel<<<1, 1>>>(r, x);
+  err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    std::cerr << "file3_kernel [SYNC] failed: " << cudaGetErrorString(err)
+              << std::endl;
+    return x;
+  }
+  err = cudaDeviceSynchronize();
+  if (err != cudaSuccess) {
+    std::cerr << "file3_kernel [ASYNC] failed: "
+              << cudaGetErrorString(cudaGetLastError()) << std::endl;
+    return x;
+  }
+  int result = r->sum;
+  err = cudaFree(r);
+  if (err != cudaSuccess) {
+    std::cerr << "file3_launch_kernel: cudaFree failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return x;
+  }
+
+  return result;
+}
diff --git a/Tests/Cuda/Complex/main.cpp b/Tests/Cuda/Complex/main.cpp
new file mode 100644
index 0000000..2498235
--- /dev/null
+++ b/Tests/Cuda/Complex/main.cpp
@@ -0,0 +1,26 @@
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+#ifdef _WIN32
+#define IMPORT __declspec(dllimport)
+#else
+#define IMPORT
+#endif
+
+IMPORT int choose_cuda_device();
+IMPORT int call_cuda_seperable_code(int x);
+IMPORT int mixed_launch_kernel(int x);
+
+int main(int argc, char** argv)
+{
+  int ret = choose_cuda_device();
+  if (ret) {
+    return 0;
+  }
+
+  int r1 = call_cuda_seperable_code(42);
+  int r2 = mixed_launch_kernel(42);
+  return (r1 == 42 || r2 == 42) ? 1 : 0;
+}
diff --git a/Tests/Cuda/Complex/mixed.cpp b/Tests/Cuda/Complex/mixed.cpp
new file mode 100644
index 0000000..bd32e51
--- /dev/null
+++ b/Tests/Cuda/Complex/mixed.cpp
@@ -0,0 +1,22 @@
+
+#ifdef _WIN32
+#define EXPORT __declspec(dllexport)
+#define IMPORT __declspec(dllimport)
+#else
+#define EXPORT
+#define IMPORT
+#endif
+
+int dynamic_base_func(int);
+IMPORT int cuda_dynamic_host_func(int);
+int file3_launch_kernel(int);
+
+int dynamic_final_func(int x)
+{
+  return cuda_dynamic_host_func(dynamic_base_func(x));
+}
+
+EXPORT int call_cuda_seperable_code(int x)
+{
+  return file3_launch_kernel(x);
+}
diff --git a/Tests/Cuda/Complex/mixed.cu b/Tests/Cuda/Complex/mixed.cu
new file mode 100644
index 0000000..a81ccb7
--- /dev/null
+++ b/Tests/Cuda/Complex/mixed.cu
@@ -0,0 +1,61 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+#ifdef _WIN32
+#define EXPORT __declspec(dllexport)
+#define IMPORT __declspec(dllimport)
+#else
+#define EXPORT
+#define IMPORT
+#endif
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+IMPORT void __host__ cuda_dynamic_lib_func();
+
+static __global__ void mixed_kernel(result_type* r, int x)
+{
+  *r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+EXPORT int mixed_launch_kernel(int x)
+{
+  cuda_dynamic_lib_func();
+
+  result_type* r;
+  cudaError_t err = cudaMallocManaged(&r, sizeof(result_type));
+  if (err != cudaSuccess) {
+    std::cerr << "mixed_launch_kernel: cudaMallocManaged failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return x;
+  }
+
+  mixed_kernel<<<1, 1>>>(r, x);
+  err = cudaGetLastError();
+  if (err != cudaSuccess) {
+    std::cerr << "mixed_kernel [SYNC] failed: " << cudaGetErrorString(err)
+              << std::endl;
+    return x;
+  }
+  err = cudaDeviceSynchronize();
+  if (err != cudaSuccess) {
+    std::cerr << "mixed_kernel [ASYNC] failed: "
+              << cudaGetErrorString(cudaGetLastError()) << std::endl;
+    return x;
+  }
+
+  int result = r->sum;
+  err = cudaFree(r);
+  if (err != cudaSuccess) {
+    std::cerr << "mixed_launch_kernel: cudaFree failed: "
+              << cudaGetErrorString(err) << std::endl;
+    return x;
+  }
+
+  return result;
+}
diff --git a/Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt b/Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt
new file mode 100644
index 0000000..9fda2d0
--- /dev/null
+++ b/Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaConsumeCompileFeatures CXX CUDA)
+#Goal for this example:
+
+#build a c++11 library that express a c++11 public compile feature
+#link a cuda library and verify it builds with c++11 enabled
+
+#build a standalone c++/cuda mixed executable where we express a c++11
+#compile feature.
+
+
+add_library(CudaConsumeLib STATIC static.cpp static.cu)
+target_compile_features(CudaConsumeLib PUBLIC cxx_nullptr)
+
+add_executable(CudaConsumeCompileFeatures main.cu)
+target_link_libraries(CudaConsumeCompileFeatures PRIVATE CudaConsumeLib)
diff --git a/Tests/Cuda/ConsumeCompileFeatures/main.cu b/Tests/Cuda/ConsumeCompileFeatures/main.cu
new file mode 100644
index 0000000..bc32450
--- /dev/null
+++ b/Tests/Cuda/ConsumeCompileFeatures/main.cu
@@ -0,0 +1,20 @@
+
+#include <iostream>
+
+int static_cxx11_func(int);
+
+void test_functions()
+{
+  auto x = static_cxx11_func(int(42));
+  std::cout << x << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+  test_functions();
+  std::cout
+    << "this executable doesn't use cuda code, just call methods defined"
+    << std::endl;
+  std::cout << "in libraries that have cuda code" << std::endl;
+  return 0;
+}
diff --git a/Tests/Cuda/ConsumeCompileFeatures/static.cpp b/Tests/Cuda/ConsumeCompileFeatures/static.cpp
new file mode 100644
index 0000000..565d52e
--- /dev/null
+++ b/Tests/Cuda/ConsumeCompileFeatures/static.cpp
@@ -0,0 +1,10 @@
+
+
+#include <type_traits>
+
+int static_cuda11_func(int);
+
+int static_cxx11_func(int x)
+{
+  return static_cuda11_func(x) + std::integral_constant<int, 32>::value;
+}
diff --git a/Tests/Cuda/ConsumeCompileFeatures/static.cu b/Tests/Cuda/ConsumeCompileFeatures/static.cu
new file mode 100644
index 0000000..73e43a8
--- /dev/null
+++ b/Tests/Cuda/ConsumeCompileFeatures/static.cu
@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ static_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}
diff --git a/Tests/Cuda/ObjectLibrary/CMakeLists.txt b/Tests/Cuda/ObjectLibrary/CMakeLists.txt
new file mode 100644
index 0000000..276dc92
--- /dev/null
+++ b/Tests/Cuda/ObjectLibrary/CMakeLists.txt
@@ -0,0 +1,17 @@
+cmake_minimum_required(VERSION 3.7)
+project (CudaObjectLibrary CUDA CXX)
+#Goal for this example:
+
+#build a object files some with cuda and some without than
+#embed these into an executable
+
+add_library(CudaMixedObjectLib OBJECT static.cu static.cpp)
+
+add_executable(CudaObjectLibrary
+               main.cpp
+               $<TARGET_OBJECTS:CudaMixedObjectLib>)
+
+if(APPLE)
+  # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+  set_property(TARGET CudaObjectLibrary PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/Cuda/ObjectLibrary/main.cpp b/Tests/Cuda/ObjectLibrary/main.cpp
new file mode 100644
index 0000000..4d2f890
--- /dev/null
+++ b/Tests/Cuda/ObjectLibrary/main.cpp
@@ -0,0 +1,22 @@
+
+#include <iostream>
+
+int static_func(int);
+int file1_sq_func(int);
+
+int test_functions()
+{
+  return file1_sq_func(static_func(42));
+}
+
+int main(int argc, char** argv)
+{
+  if (test_functions() == 1) {
+    return 1;
+  }
+  std::cout
+    << "this executable doesn't use cuda code, just call methods defined"
+    << std::endl;
+  std::cout << "in object files that have cuda code" << std::endl;
+  return 0;
+}
diff --git a/Tests/Cuda/ObjectLibrary/static.cpp b/Tests/Cuda/ObjectLibrary/static.cpp
new file mode 100644
index 0000000..6db1f91
--- /dev/null
+++ b/Tests/Cuda/ObjectLibrary/static.cpp
@@ -0,0 +1,6 @@
+int file1_sq_func(int);
+
+int static_func(int x)
+{
+  return file1_sq_func(x);
+}
diff --git a/Tests/Cuda/ObjectLibrary/static.cu b/Tests/Cuda/ObjectLibrary/static.cu
new file mode 100644
index 0000000..aa35729
--- /dev/null
+++ b/Tests/Cuda/ObjectLibrary/static.cu
@@ -0,0 +1,21 @@
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+int __host__ file1_sq_func(int x)
+{
+  cudaError_t err;
+  int nDevices = 0;
+  err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << "nDevices: " << nDevices << std::endl;
+    std::cerr << "err: " << err << std::endl;
+    return 1;
+  }
+  std::cout << "this library uses cuda code" << std::endl;
+  std::cout << "you have " << nDevices << " devices that support cuda"
+            << std::endl;
+
+  return x * x;
+}
diff --git a/Tests/Cuda/ProperLinkFlags/CMakeLists.txt b/Tests/Cuda/ProperLinkFlags/CMakeLists.txt
new file mode 100644
index 0000000..b6e0e39
--- /dev/null
+++ b/Tests/Cuda/ProperLinkFlags/CMakeLists.txt
@@ -0,0 +1,20 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (ProperLinkFlags CUDA CXX)
+
+#Goal for this example:
+#Verify that when we have CXX and CUDA enabled and we link an executable that
+#has CUDA and CXX we use the CUDA link flags when doing the device link
+#step
+
+#Specify a set of valid CUDA flags and an invalid set of CXX flags ( for CUDA )
+#to make sure we don't use the CXX flags when linking CUDA executables
+string(APPEND CMAKE_CUDA_FLAGS " -arch=sm_35 --use_fast_math")
+set(CMAKE_CXX_FLAGS "-Wall")
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+add_executable(ProperLinkFlags file1.cu main.cxx)
+
+set_target_properties( ProperLinkFlags
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
diff --git a/Tests/Cuda/ProperLinkFlags/file1.cu b/Tests/Cuda/ProperLinkFlags/file1.cu
new file mode 100644
index 0000000..9a105f0
--- /dev/null
+++ b/Tests/Cuda/ProperLinkFlags/file1.cu
@@ -0,0 +1,11 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+  __ldg(&x);
+  result_type r;
+  r.input = x;
+  r.sum = x * x;
+  return r;
+}
diff --git a/Tests/Cuda/ProperLinkFlags/file1.h b/Tests/Cuda/ProperLinkFlags/file1.h
new file mode 100644
index 0000000..ff1945c
--- /dev/null
+++ b/Tests/Cuda/ProperLinkFlags/file1.h
@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+  int input;
+  int sum;
+};
diff --git a/Tests/Cuda/ProperLinkFlags/main.cxx b/Tests/Cuda/ProperLinkFlags/main.cxx
new file mode 100644
index 0000000..7c0ee9e
--- /dev/null
+++ b/Tests/Cuda/ProperLinkFlags/main.cxx
@@ -0,0 +1,9 @@
+
+#include <iostream>
+
+#include "file1.h"
+
+int main(int argc, char** argv)
+{
+  return 0;
+}
diff --git a/Tests/Cuda/ToolkitInclude/CMakeLists.txt b/Tests/Cuda/ToolkitInclude/CMakeLists.txt
new file mode 100644
index 0000000..f246b54
--- /dev/null
+++ b/Tests/Cuda/ToolkitInclude/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.8)
+project (ToolkitInclude CXX CUDA)
+
+#Goal for this example:
+# Validate that between the CXX implicit include directories and the
+# CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES directories we can find
+# the cuda runtime headers
+
+add_executable(CudaToolkitInclude main.cpp)
+target_include_directories(CudaToolkitInclude PRIVATE
+                           ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
diff --git a/Tests/Cuda/ToolkitInclude/main.cpp b/Tests/Cuda/ToolkitInclude/main.cpp
new file mode 100644
index 0000000..c8d5c6b
--- /dev/null
+++ b/Tests/Cuda/ToolkitInclude/main.cpp
@@ -0,0 +1,8 @@
+// Only thing we care about is that these headers are found
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+int main()
+{
+  return 0;
+}
diff --git a/Tests/Cuda/WithC/CMakeLists.txt b/Tests/Cuda/WithC/CMakeLists.txt
new file mode 100644
index 0000000..831ce12
--- /dev/null
+++ b/Tests/Cuda/WithC/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.7)
+project(CudaComplex CUDA C)
+
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+
+add_executable(CudaWithC main.c cuda.cu)
+
+if(APPLE)
+  # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+  set_property(TARGET CudaWithC PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/Cuda/WithC/cuda.cu b/Tests/Cuda/WithC/cuda.cu
new file mode 100644
index 0000000..06bd7b9
--- /dev/null
+++ b/Tests/Cuda/WithC/cuda.cu
@@ -0,0 +1,16 @@
+#include <cuda.h>
+
+#include <iostream>
+
+extern "C" int use_cuda(void)
+{
+  int nDevices = 0;
+  cudaError_t err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+              << std::endl;
+    return 1;
+  }
+  std::cout << "Found " << nDevices << " CUDA enabled devices" << std::endl;
+  return 0;
+}
diff --git a/Tests/Cuda/WithC/main.c b/Tests/Cuda/WithC/main.c
new file mode 100644
index 0000000..cb5fddc
--- /dev/null
+++ b/Tests/Cuda/WithC/main.c
@@ -0,0 +1,14 @@
+extern int use_cuda(void);
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+int main()
+{
+#ifdef _WIN32
+  /* Use an API that requires CMake's "standard" C libraries.  */
+  GetOpenFileName(NULL);
+#endif
+  return use_cuda();
+}