12 files changed, 124 insertions, 196 deletions
diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index 13b1789..1c48159 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -93,6 +93,12 @@ if(NOT CMAKE_NOT_USING_CONFIG_FLAGS)
 
 endif()
 
+if(CMAKE_CUDA_STANDARD_LIBRARIES_INIT)
+  set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES_INIT}"
+    CACHE STRING "Libraries linked by default with all CUDA applications.")
+  mark_as_advanced(CMAKE_CUDA_STANDARD_LIBRARIES)
+endif()
+
 include(CMakeCommonLanguageInclude)
 
 # now define the following rules:
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index 7b6d17b..375e230 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -75,13 +75,6 @@ if(MSVC_CUDA_ARCHITECTURE_ID)
     "set(MSVC_CUDA_ARCHITECTURE_ID ${MSVC_CUDA_ARCHITECTURE_ID})")
 endif()
 
-#if this compiler vendor is matches NVIDIA we can determine
-#what the host compiler is. This only needs to be done if the CMAKE_CUDA_HOST_COMPILER
-#has NOT been explicitly set
-#
-#Find the line from compiler ID that contains a.out ( or last line )
-#We also need to find the implicit link lines. Which can be done by replacing
-#the compiler with cuda-fake-ld  and pass too CMAKE_PARSE_IMPLICIT_LINK_INFO
 if(CMAKE_CUDA_COMPILER_ID STREQUAL NVIDIA)
   set(_nvcc_log "")
   string(REPLACE "\r" "" _nvcc_output_orig "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
index ae35132..7903313 100644
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -13,7 +13,7 @@ set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
 
 string(APPEND CMAKE_CUDA_FLAGS_INIT " ")
 string(APPEND CMAKE_CUDA_FLAGS_DEBUG_INIT " -g")
-string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -Os -DNDEBUG")
+string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -O1 -DNDEBUG")
 string(APPEND CMAKE_CUDA_FLAGS_RELEASE_INIT " -O3 -DNDEBUG")
 string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT " -O2 -g -DNDEBUG")
 
diff --git a/Modules/Platform/Windows-NVIDIA-CUDA.cmake b/Modules/Platform/Windows-NVIDIA-CUDA.cmake
index 809ee06..eda41e0 100644
--- a/Modules/Platform/Windows-NVIDIA-CUDA.cmake
+++ b/Modules/Platform/Windows-NVIDIA-CUDA.cmake
@@ -40,3 +40,5 @@ string(APPEND CMAKE_CUDA_FLAGS_DEBUG_INIT " -Xcompiler=-MDd,-Zi,-RTC1")
 string(APPEND CMAKE_CUDA_FLAGS_RELEASE_INIT " -Xcompiler=-MD")
 string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT " -Xcompiler=-MD")
 string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -Xcompiler=-MD")
+
+set(CMAKE_CUDA_STANDARD_LIBRARIES_INIT "${CMAKE_C_STANDARD_LIBRARIES_INIT}")
diff --git a/Source/CMakeVersion.cmake b/Source/CMakeVersion.cmake
index 8b1882f..2efc5bf 100644
--- a/Source/CMakeVersion.cmake
+++ b/Source/CMakeVersion.cmake
@@ -1,5 +1,5 @@
 # CMake version number components.
 set(CMake_VERSION_MAJOR 3)
 set(CMake_VERSION_MINOR 8)
-set(CMake_VERSION_PATCH 20170214)
+set(CMake_VERSION_PATCH 20170215)
 #set(CMake_VERSION_RC 1)
diff --git a/Source/cmVisualStudioGeneratorOptions.cxx b/Source/cmVisualStudioGeneratorOptions.cxx
index c0913e6..6bacfa1 100644
--- a/Source/cmVisualStudioGeneratorOptions.cxx
+++ b/Source/cmVisualStudioGeneratorOptions.cxx
@@ -252,19 +252,19 @@ void cmVisualStudioGeneratorOptions::OutputPreprocessorDefinitions(
   if (this->Defines.empty()) {
     return;
   }
+  const char* tag = "PreprocessorDefinitions";
   if (this->Version >= cmGlobalVisualStudioGenerator::VS10) {
     // if there are configuration specific flags, then
     // use the configuration specific tag for PreprocessorDefinitions
     if (!this->Configuration.empty()) {
       fout << prefix;
       this->TargetGenerator->WritePlatformConfigTag(
-        "PreprocessorDefinitions", this->Configuration.c_str(), 0, 0, 0,
-        &fout);
+        tag, this->Configuration.c_str(), 0, 0, 0, &fout);
     } else {
-      fout << prefix << "<PreprocessorDefinitions>";
+      fout << prefix << "<" << tag << ">";
     }
   } else {
-    fout << prefix << "PreprocessorDefinitions=\"";
+    fout << prefix << tag << "=\"";
   }
   const char* sep = "";
   for (std::vector<std::string>::const_iterator di = this->Defines.begin();
@@ -291,7 +291,7 @@ void cmVisualStudioGeneratorOptions::OutputPreprocessorDefinitions(
     sep = ";";
   }
   if (this->Version >= cmGlobalVisualStudioGenerator::VS10) {
-    fout << ";%(PreprocessorDefinitions)</PreprocessorDefinitions>" << suffix;
+    fout << ";%(" << tag << ")</" << tag << ">" << suffix;
   } else {
     fout << "\"" << suffix;
   }
diff --git a/Source/kwsys/SystemInformation.cxx b/Source/kwsys/SystemInformation.cxx
index e01dcd7..86f7552 100644
--- a/Source/kwsys/SystemInformation.cxx
+++ b/Source/kwsys/SystemInformation.cxx
@@ -35,8 +35,13 @@
 #include "SystemInformation.hxx.in"
 #endif
 
+#include <algorithm>
+#include <bitset>
+#include <cassert>
 #include <fstream>
 #include <iostream>
+#include <limits>
+#include <set>
 #include <sstream>
 #include <string>
 #include <vector>
@@ -394,7 +399,6 @@ public:
     bool SupportsMP;
     bool HasMMXPlus;
     bool HasSSEMMX;
-    bool SupportsHyperthreading;
     unsigned int LogicalProcessorsPerPhysical;
     int APIC_ID;
     CPUPowerManagement PowerManagement;
@@ -463,10 +467,9 @@ protected:
   unsigned int NumberOfLogicalCPU;
   unsigned int NumberOfPhysicalCPU;
 
-  int CPUCount(); // For windows
-  unsigned char LogicalCPUPerPhysicalCPU();
+  void CPUCountWindows();    // For windows
   unsigned char GetAPICId(); // For windows
-  bool IsHyperThreadingSupported();
+  bool IsSMTSupported();
   static LongLong GetCyclesDifference(DELAY_FUNC, unsigned int); // For windows
 
   // For Linux and Cygwin, /proc/cpuinfo formats are slightly different
@@ -1542,7 +1545,7 @@ void SystemInformationImplementation::RunCPUCheck()
     RetrieveProcessorSerialNumber();
   }
 
-  this->CPUCount();
+  this->CPUCountWindows();
 
 #elif defined(__APPLE__)
   this->ParseSysCtl();
@@ -2090,16 +2093,10 @@ bool SystemInformationImplementation::RetrieveCPUFeatures()
 
   // Retrieve Intel specific extended features.
   if (this->ChipManufacturer == Intel) {
-    this->Features.ExtendedFeatures.SupportsHyperthreading =
-      ((cpuinfo[3] & 0x10000000) !=
-       0); // Intel specific: Hyperthreading --> Bit 28
-    this->Features.ExtendedFeatures.LogicalProcessorsPerPhysical =
-      (this->Features.ExtendedFeatures.SupportsHyperthreading)
-      ? ((cpuinfo[1] & 0x00FF0000) >> 16)
-      : 1;
-
-    if ((this->Features.ExtendedFeatures.SupportsHyperthreading) &&
-        (this->Features.HasAPIC)) {
+    bool SupportsSMT =
+      ((cpuinfo[3] & 0x10000000) != 0); // Intel specific: SMT --> Bit 28
+
+    if ((SupportsSMT) && (this->Features.HasAPIC)) {
       // Retrieve APIC information if there is one present.
       this->Features.ExtendedFeatures.APIC_ID =
         ((cpuinfo[1] & 0xFF000000) >> 24);
@@ -3401,7 +3398,7 @@ bool SystemInformationImplementation::RetreiveInformationFromCpuInfoFile()
   fclose(fd);
   buffer.resize(fileSize - 2);
   // Number of logical CPUs (combination of multiple processors, multi-core
-  // and hyperthreading)
+  // and SMT)
   size_t pos = buffer.find("processor\t");
   while (pos != buffer.npos) {
     this->NumberOfLogicalCPU++;
@@ -3409,30 +3406,25 @@ bool SystemInformationImplementation::RetreiveInformationFromCpuInfoFile()
   }
 
 #ifdef __linux
-  // Find the largest physical id.
-  int maxId = -1;
+  // Count sockets.
+  std::set<int> PhysicalIDs;
   std::string idc = this->ExtractValueFromCpuInfoFile(buffer, "physical id");
   while (this->CurrentPositionInFile != buffer.npos) {
     int id = atoi(idc.c_str());
-    if (id > maxId) {
-      maxId = id;
-    }
+    PhysicalIDs.insert(id);
     idc = this->ExtractValueFromCpuInfoFile(buffer, "physical id",
                                             this->CurrentPositionInFile + 1);
   }
+  uint64_t NumberOfSockets = PhysicalIDs.size();
+  NumberOfSockets = std::max(NumberOfSockets, (uint64_t)1);
   // Physical ids returned by Linux don't distinguish cores.
   // We want to record the total number of cores in this->NumberOfPhysicalCPU
   // (checking only the first proc)
-  std::string cores = this->ExtractValueFromCpuInfoFile(buffer, "cpu cores");
-  int numberOfCoresPerCPU = atoi(cores.c_str());
-  if (maxId > 0) {
-    this->NumberOfPhysicalCPU =
-      static_cast<unsigned int>(numberOfCoresPerCPU * (maxId + 1));
-  } else {
-    // Linux Sparc: get cpu count
-    this->NumberOfPhysicalCPU =
-      atoi(this->ExtractValueFromCpuInfoFile(buffer, "ncpus active").c_str());
-  }
+  std::string Cores = this->ExtractValueFromCpuInfoFile(buffer, "cpu cores");
+  unsigned int NumberOfCoresPerSocket = (unsigned int)atoi(Cores.c_str());
+  NumberOfCoresPerSocket = std::max(NumberOfCoresPerSocket, 1u);
+  this->NumberOfPhysicalCPU =
+    NumberOfCoresPerSocket * (unsigned int)NumberOfSockets;
 
 #else // __CYGWIN__
   // does not have "physical id" entries, neither "cpu cores"
@@ -3447,7 +3439,7 @@ bool SystemInformationImplementation::RetreiveInformationFromCpuInfoFile()
   if (this->NumberOfPhysicalCPU <= 0) {
     this->NumberOfPhysicalCPU = 1;
   }
-  // LogicalProcessorsPerPhysical>1 => hyperthreading.
+  // LogicalProcessorsPerPhysical>1 => SMT.
   this->Features.ExtendedFeatures.LogicalProcessorsPerPhysical =
     this->NumberOfLogicalCPU / this->NumberOfPhysicalCPU;
 
@@ -4322,68 +4314,10 @@ void SystemInformationImplementation::DelayOverhead(unsigned int uiMS)
   (void)uiMS;
 }
 
-/** Return the number of logical CPU per physical CPUs Works only for windows
- */
-unsigned char SystemInformationImplementation::LogicalCPUPerPhysicalCPU(void)
-{
-#ifdef __APPLE__
-  size_t len = 4;
-  int cores_per_package = 0;
-  int err = sysctlbyname("machdep.cpu.cores_per_package", &cores_per_package,
-                         &len, NULL, 0);
-  if (err != 0) {
-    return 1; // That name was not found, default to 1
-  }
-  return static_cast<unsigned char>(cores_per_package);
-#else
-  int Regs[4] = { 0, 0, 0, 0 };
-#if USE_CPUID
-  if (!this->IsHyperThreadingSupported()) {
-    return static_cast<unsigned char>(1); // HT not supported
-  }
-  call_cpuid(1, Regs);
-#endif
-  return static_cast<unsigned char>((Regs[1] & NUM_LOGICAL_BITS) >> 16);
-#endif
-}
-
 /** Works only for windows */
-bool SystemInformationImplementation::IsHyperThreadingSupported()
+bool SystemInformationImplementation::IsSMTSupported()
 {
-  if (this->Features.ExtendedFeatures.SupportsHyperthreading) {
-    return true;
-  }
-
-#if USE_CPUID
-  int Regs[4] = { 0, 0, 0, 0 }, VendorId[4] = { 0, 0, 0, 0 };
-  // Get vendor id string
-  if (!call_cpuid(0, VendorId)) {
-    return false;
-  }
-  // eax contains family processor type
-  // edx has info about the availability of hyper-Threading
-  if (!call_cpuid(1, Regs)) {
-    return false;
-  }
-
-  if (((Regs[0] & FAMILY_ID) == PENTIUM4_ID) || (Regs[0] & EXT_FAMILY_ID)) {
-    if (VendorId[1] == 0x756e6547) // 'uneG'
-    {
-      if (VendorId[3] == 0x49656e69) // 'Ieni'
-      {
-        if (VendorId[2] == 0x6c65746e) // 'letn'
-        {
-          // Genuine Intel with hyper-Threading technology
-          this->Features.ExtendedFeatures.SupportsHyperthreading =
-            ((Regs[3] & HT_BIT) != 0);
-          return this->Features.ExtendedFeatures.SupportsHyperthreading;
-        }
-      }
-    }
-  }
-#endif
-
-  return 0; // Not genuine Intel processor
+  return this->Features.ExtendedFeatures.LogicalProcessorsPerPhysical > 1;
 }
 
 /** Return the APIC Id. Works only for windows. */
@@ -4392,7 +4326,7 @@ unsigned char SystemInformationImplementation::GetAPICId()
   int Regs[4] = { 0, 0, 0, 0 };
 
 #if USE_CPUID
-  if (!this->IsHyperThreadingSupported()) {
+  if (!this->IsSMTSupported()) {
     return static_cast<unsigned char>(-1); // HT not supported
   }                                        // Logical processor = 1
   call_cpuid(1, Regs);
@@ -4402,102 +4336,46 @@ unsigned char SystemInformationImplementation::GetAPICId()
 }
 
 /** Count the number of CPUs. Works only on windows. */
-int SystemInformationImplementation::CPUCount()
+void SystemInformationImplementation::CPUCountWindows()
 {
 #if defined(_WIN32)
-  unsigned char StatusFlag = 0;
-  SYSTEM_INFO info;
-
+  std::vector<SYSTEM_LOGICAL_PROCESSOR_INFORMATION> ProcInfo;
   this->NumberOfPhysicalCPU = 0;
   this->NumberOfLogicalCPU = 0;
-  info.dwNumberOfProcessors = 0;
-  GetSystemInfo(&info);
-
-  // Number of physical processors in a non-Intel system
-  // or in a 32-bit Intel system with Hyper-Threading technology disabled
-  this->NumberOfPhysicalCPU = (unsigned char)info.dwNumberOfProcessors;
-
-  if (this->IsHyperThreadingSupported()) {
-    unsigned char HT_Enabled = 0;
-    this->NumberOfLogicalCPU = this->LogicalCPUPerPhysicalCPU();
-    if (this->NumberOfLogicalCPU >=
-        1) // >1 Doesn't mean HT is enabled in the BIOS
-    {
-      HANDLE hCurrentProcessHandle;
-#ifndef _WIN64
-#define DWORD_PTR DWORD
-#endif
-      DWORD_PTR dwProcessAffinity;
-      DWORD_PTR dwSystemAffinity;
-      DWORD dwAffinityMask;
-
-      // Calculate the appropriate  shifts and mask based on the
-      // number of logical processors.
-      unsigned int i = 1;
-      unsigned char PHY_ID_MASK = 0xFF;
-      // unsigned char PHY_ID_SHIFT = 0;
-
-      while (i < this->NumberOfLogicalCPU) {
-        i *= 2;
-        PHY_ID_MASK <<= 1;
-        // PHY_ID_SHIFT++;
-      }
 
-      hCurrentProcessHandle = GetCurrentProcess();
-      GetProcessAffinityMask(hCurrentProcessHandle, &dwProcessAffinity,
-                             &dwSystemAffinity);
-
-      // Check if available process affinity mask is equal to the
-      // available system affinity mask
-      if (dwProcessAffinity != dwSystemAffinity) {
-        StatusFlag = HT_CANNOT_DETECT;
-        this->NumberOfPhysicalCPU = (unsigned char)-1;
-        return StatusFlag;
-      }
-
-      dwAffinityMask = 1;
-      while (dwAffinityMask != 0 && dwAffinityMask <= dwProcessAffinity) {
-        // Check if this CPU is available
-        if (dwAffinityMask & dwProcessAffinity) {
-          if (SetProcessAffinityMask(hCurrentProcessHandle, dwAffinityMask)) {
-            unsigned char APIC_ID, LOG_ID;
-            Sleep(0); // Give OS time to switch CPU
-
-            APIC_ID = GetAPICId();
-            LOG_ID = APIC_ID & ~PHY_ID_MASK;
-
-            if (LOG_ID != 0) {
-              HT_Enabled = 1;
-            }
-          }
-        }
-        dwAffinityMask = dwAffinityMask << 1;
-      }
-      // Reset the processor affinity
-      SetProcessAffinityMask(hCurrentProcessHandle, dwProcessAffinity);
+  {
+    DWORD Length = 0;
+    DWORD rc = GetLogicalProcessorInformation(NULL, &Length);
+    assert(FALSE == rc);
+    (void)rc; // Silence unused variable warning in Borland C++ 5.81
+    assert(GetLastError() == ERROR_INSUFFICIENT_BUFFER);
+    ProcInfo.resize(Length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION));
+    rc = GetLogicalProcessorInformation(&ProcInfo[0], &Length);
+    assert(rc != FALSE);
+    (void)rc; // Silence unused variable warning in Borland C++ 5.81
+  }
+
+  typedef std::vector<SYSTEM_LOGICAL_PROCESSOR_INFORMATION>::iterator
+    pinfoIt_t;
+  for (pinfoIt_t it = ProcInfo.begin(); it != ProcInfo.end(); ++it) {
+    SYSTEM_LOGICAL_PROCESSOR_INFORMATION PInfo = *it;
+    if (PInfo.Relationship != RelationProcessorCore) {
+      continue;
+    }
 
-      if (this->NumberOfLogicalCPU ==
-          1) // Normal P4 : HT is disabled in hardware
-      {
-        StatusFlag = HT_DISABLED;
-      } else {
-        if (HT_Enabled) {
-          // Total physical processors in a Hyper-Threading enabled system.
-          this->NumberOfPhysicalCPU /= (this->NumberOfLogicalCPU);
-          StatusFlag = HT_ENABLED;
-        } else {
-          StatusFlag = HT_SUPPORTED_NOT_ENABLED;
-        }
-      }
+    std::bitset<std::numeric_limits<ULONG_PTR>::digits> ProcMask(
+      (unsigned long long)PInfo.ProcessorMask);
+    unsigned int count = (unsigned int)ProcMask.count();
+    if (count == 0) { // I think this should never happen, but just to be safe.
+      continue;
     }
-  } else {
-    // Processors do not have Hyper-Threading technology
-    StatusFlag = HT_NOT_CAPABLE;
-    this->NumberOfLogicalCPU = 1;
+    this->NumberOfPhysicalCPU++;
+    this->NumberOfLogicalCPU += (unsigned int)count;
+    this->Features.ExtendedFeatures.LogicalProcessorsPerPhysical = count;
   }
-  return StatusFlag;
+  this->NumberOfPhysicalCPU = std::max(1u, this->NumberOfPhysicalCPU);
+  this->NumberOfLogicalCPU = std::max(1u, this->NumberOfLogicalCPU);
 #else
-  return 0;
 #endif
 }
 
@@ -4559,8 +4437,14 @@ bool SystemInformationImplementation::ParseSysCtl()
   sysctlbyname("hw.physicalcpu", &this->NumberOfPhysicalCPU, &len, NULL, 0);
   len = sizeof(this->NumberOfLogicalCPU);
   sysctlbyname("hw.logicalcpu", &this->NumberOfLogicalCPU, &len, NULL, 0);
+
+  int cores_per_package = 0;
+  len = sizeof(cores_per_package);
+  err = sysctlbyname("machdep.cpu.cores_per_package", &cores_per_package, &len,
+                     NULL, 0);
+  // That name was not found, default to 1
   this->Features.ExtendedFeatures.LogicalProcessorsPerPhysical =
-    this->LogicalCPUPerPhysicalCPU();
+    err != 0 ? 1 : static_cast<unsigned char>(cores_per_package);
 
   len = sizeof(value);
   sysctlbyname("hw.cpufrequency", &value, &len, NULL, 0);
diff --git a/Source/kwsys/SystemInformation.hxx.in b/Source/kwsys/SystemInformation.hxx.in
index 0fc1067..cc09393 100644
--- a/Source/kwsys/SystemInformation.hxx.in
+++ b/Source/kwsys/SystemInformation.hxx.in
@@ -67,7 +67,7 @@ public:
 
   bool Is64Bits();
 
-  unsigned int GetNumberOfLogicalCPU(); // per physical cpu
+  unsigned int GetNumberOfLogicalCPU();
   unsigned int GetNumberOfPhysicalCPU();
 
   bool DoesCPUSupportCPUID();
diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt
index 42b00e1..de48501 100644
--- a/Tests/Cuda/CMakeLists.txt
+++ b/Tests/Cuda/CMakeLists.txt
@@ -4,3 +4,4 @@ ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures)
 ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary)
 ADD_TEST_MACRO(Cuda.ToolkitInclude CudaToolkitInclude)
 ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags)
+ADD_TEST_MACRO(Cuda.WithC CudaWithC)
diff --git a/Tests/Cuda/WithC/CMakeLists.txt b/Tests/Cuda/WithC/CMakeLists.txt
new file mode 100644
index 0000000..7596804
--- /dev/null
+++ b/Tests/Cuda/WithC/CMakeLists.txt
@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 3.7)
+project(CudaComplex CUDA C)
+
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+
+add_executable(CudaWithC main.c cuda.cu)
+
+if(APPLE)
+  # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+  # the static cuda runtime can find it at runtime.
+  target_link_libraries(CudaWithC PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()
diff --git a/Tests/Cuda/WithC/cuda.cu b/Tests/Cuda/WithC/cuda.cu
new file mode 100644
index 0000000..06bd7b9
--- /dev/null
+++ b/Tests/Cuda/WithC/cuda.cu
@@ -0,0 +1,16 @@
+#include <cuda.h>
+
+#include <iostream>
+
+extern "C" int use_cuda(void)
+{
+  int nDevices = 0;
+  cudaError_t err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+              << std::endl;
+    return 1;
+  }
+  std::cout << "Found " << nDevices << " CUDA enabled devices" << std::endl;
+  return 0;
+}
diff --git a/Tests/Cuda/WithC/main.c b/Tests/Cuda/WithC/main.c
new file mode 100644
index 0000000..cb5fddc
--- /dev/null
+++ b/Tests/Cuda/WithC/main.c
@@ -0,0 +1,14 @@
+extern int use_cuda(void);
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+int main()
+{
+#ifdef _WIN32
+  /* Use an API that requires CMake's "standard" C libraries.  */
+  GetOpenFileName(NULL);
+#endif
+  return use_cuda();
+}