35 files changed, 217 insertions, 97 deletions
diff --git a/Help/command/execute_process.rst b/Help/command/execute_process.rst
index 71233d9..d617243 100644
--- a/Help/command/execute_process.rst
+++ b/Help/command/execute_process.rst
@@ -70,10 +70,21 @@ Options:
 ``ENCODING <name>``
  On Windows, the encoding that is used to decode output from the process.
  Ignored on other platforms.
- Valid encoding names are: ``AUTO`` (the default), ``NONE``, ``UTF8``,
- ``ANSI`` and ``OEM``.
- ``AUTO`` encoding means current active console's codepage will be used
- or if that isn't available then ``ANSI`` codepage will be used.
+ Valid encoding names are:
+
+ ``NONE``
+   Perform no decoding.  This assumes that the process output is encoded
+   in the same way as CMake's internal encoding (UTF-8).
+   This is the default.
+ ``AUTO``
+   Use the current active console's codepage or if that isn't
+   available then use ANSI.
+ ``ANSI``
+   Use the ANSI codepage.
+ ``OEM``
+   Use the original equipment manufacturer (OEM) code page.
+ ``UTF8``
+   Use the UTF-8 codepage.
 
 If more than one ``OUTPUT_*`` or ``ERROR_*`` option is given for the
 same pipe the precedence is not specified.
diff --git a/Help/manual/cmake-variables.7.rst b/Help/manual/cmake-variables.7.rst
index 95f780a..38444d1 100644
--- a/Help/manual/cmake-variables.7.rst
+++ b/Help/manual/cmake-variables.7.rst
@@ -353,6 +353,7 @@ Variables for Languages
    /variable/CMAKE_CUDA_EXTENSIONS
    /variable/CMAKE_CUDA_STANDARD
    /variable/CMAKE_CUDA_STANDARD_REQUIRED
+   /variable/CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES
    /variable/CMAKE_CXX_COMPILE_FEATURES
    /variable/CMAKE_CXX_EXTENSIONS
    /variable/CMAKE_CXX_STANDARD
diff --git a/Help/variable/CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES.rst b/Help/variable/CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES.rst
new file mode 100644
index 0000000..768f571
--- /dev/null
+++ b/Help/variable/CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES.rst
@@ -0,0 +1,7 @@
+CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES
+--------------------------------------
+
+When the ``CUDA`` language has been enabled, this provides a
+:ref:`;-list <CMake Language Lists>` of include directories provided
+by the CUDA Toolkit.  The value may be useful for C++ source files
+to include CUDA headers.
diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in
index 158d12b..f524e5f 100644
--- a/Modules/CMakeCUDACompiler.cmake.in
+++ b/Modules/CMakeCUDACompiler.cmake.in
@@ -16,6 +16,8 @@ set(CMAKE_CUDA_SOURCE_FILE_EXTENSIONS cu)
 set(CMAKE_CUDA_LINKER_PREFERENCE 15)
 set(CMAKE_CUDA_LINKER_PREFERENCE_PROPAGATES 1)
 
+set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")
+
 set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES@")
 set(CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES@")
 set(CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES@")
diff --git a/Modules/CMakeCUDACompilerABI.cu b/Modules/CMakeCUDACompilerABI.cu
index 5aa1b8a..99bacef 100644
--- a/Modules/CMakeCUDACompilerABI.cu
+++ b/Modules/CMakeCUDACompilerABI.cu
@@ -1,5 +1,5 @@
 #ifndef __CUDACC__
-# error "A C or C++ compiler has been selected for CUDA"
+#error "A C or C++ compiler has been selected for CUDA"
 #endif
 
 #include "CMakeCompilerABI.h"
diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index 13b1789..1c48159 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -93,6 +93,12 @@ if(NOT CMAKE_NOT_USING_CONFIG_FLAGS)
 
 endif()
 
+if(CMAKE_CUDA_STANDARD_LIBRARIES_INIT)
+  set(CMAKE_CUDA_STANDARD_LIBRARIES "${CMAKE_CUDA_STANDARD_LIBRARIES_INIT}"
+    CACHE STRING "Libraries linked by default with all CUDA applications.")
+  mark_as_advanced(CMAKE_CUDA_STANDARD_LIBRARIES)
+endif()
+
 include(CMakeCommonLanguageInclude)
 
 # now define the following rules:
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index bef6d0e..7b6d17b 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -84,20 +84,20 @@ endif()
 #the compiler with cuda-fake-ld  and pass too CMAKE_PARSE_IMPLICIT_LINK_INFO
 if(CMAKE_CUDA_COMPILER_ID STREQUAL NVIDIA)
   set(_nvcc_log "")
-  string(REPLACE "\r" "" _nvcc_output "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
-  if(_nvcc_output MATCHES "#\\\$ +LIBRARIES= *([^\n]*)\n")
+  string(REPLACE "\r" "" _nvcc_output_orig "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+  if(_nvcc_output_orig MATCHES "#\\\$ +LIBRARIES= *([^\n]*)\n")
     set(_nvcc_libraries "${CMAKE_MATCH_1}")
     string(APPEND _nvcc_log "  found 'LIBRARIES=' string: [${_nvcc_libraries}]\n")
   else()
     set(_nvcc_libraries "")
-    string(REPLACE "\n" "\n    " _nvcc_output_log "\n${_nvcc_output}")
+    string(REPLACE "\n" "\n    " _nvcc_output_log "\n${_nvcc_output_orig}")
     string(APPEND _nvcc_log "  no 'LIBRARIES=' string found in nvcc output:${_nvcc_output_log}\n")
   endif()
 
   set(_nvcc_link_line "")
   if(_nvcc_libraries)
     # Remove variable assignments.
-    string(REGEX REPLACE "#\\\$ *[^= ]+=[^\n]*\n" "" _nvcc_output "${_nvcc_output}")
+    string(REGEX REPLACE "#\\\$ *[^= ]+=[^\n]*\n" "" _nvcc_output "${_nvcc_output_orig}")
     # Split lines.
     string(REGEX REPLACE "\n+(#\\\$ )?" ";" _nvcc_output "${_nvcc_output}")
     foreach(line IN LISTS _nvcc_output)
@@ -150,6 +150,32 @@ if(CMAKE_CUDA_COMPILER_ID STREQUAL NVIDIA)
     message(FATAL_ERROR "Failed to extract nvcc implicit link line.")
   endif()
 
+  set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES )
+  if(_nvcc_output_orig MATCHES "#\\\$ +INCLUDES= *([^\n]*)\n")
+    set(_nvcc_includes "${CMAKE_MATCH_1}")
+    string(APPEND _nvcc_log "  found 'INCLUDES=' string: [${_nvcc_includes}]\n")
+  else()
+    set(_nvcc_includes "")
+    string(REPLACE "\n" "\n    " _nvcc_output_log "\n${_nvcc_output_orig}")
+    string(APPEND _nvcc_log "  no 'INCLUDES=' string found in nvcc output:${_nvcc_output_log}\n")
+  endif()
+  if(_nvcc_includes)
+    # across all operating system each include directory is prefixed with -I
+    separate_arguments(_nvcc_output UNIX_COMMAND "${_nvcc_includes}")
+    foreach(line IN LISTS _nvcc_output)
+      string(REGEX REPLACE "^-I" "" line "${line}")
+      get_filename_component(line "${line}" ABSOLUTE)
+      list(APPEND CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "${line}")
+    endforeach()
+
+    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
+      "Parsed CUDA nvcc include information from above output:\n${_nvcc_log}\n${log}\n\n")
+  else()
+    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
+      "Failed to detect CUDA nvcc include information:\n${_nvcc_log}\n\n")
+  endif()
+
+
 endif()
 
 # configure all variables set in this file
diff --git a/Modules/Platform/Windows-NVIDIA-CUDA.cmake b/Modules/Platform/Windows-NVIDIA-CUDA.cmake
index 809ee06..eda41e0 100644
--- a/Modules/Platform/Windows-NVIDIA-CUDA.cmake
+++ b/Modules/Platform/Windows-NVIDIA-CUDA.cmake
@@ -40,3 +40,5 @@ string(APPEND CMAKE_CUDA_FLAGS_DEBUG_INIT " -Xcompiler=-MDd,-Zi,-RTC1")
 string(APPEND CMAKE_CUDA_FLAGS_RELEASE_INIT " -Xcompiler=-MD")
 string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT " -Xcompiler=-MD")
 string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -Xcompiler=-MD")
+
+set(CMAKE_CUDA_STANDARD_LIBRARIES_INIT "${CMAKE_C_STANDARD_LIBRARIES_INIT}")
diff --git a/Source/cmExecuteProcessCommand.cxx b/Source/cmExecuteProcessCommand.cxx
index eb26a50..92cdf64 100644
--- a/Source/cmExecuteProcessCommand.cxx
+++ b/Source/cmExecuteProcessCommand.cxx
@@ -47,7 +47,7 @@ bool cmExecuteProcessCommand::InitialPass(std::vector<std::string> const& args,
   std::string error_variable;
   std::string result_variable;
   std::string working_directory;
-  cmProcessOutput::Encoding encoding = cmProcessOutput::Auto;
+  cmProcessOutput::Encoding encoding = cmProcessOutput::None;
   for (size_t i = 0; i < args.size(); ++i) {
     if (args[i] == "COMMAND") {
       doing_command = true;
diff --git a/Tests/Cuda/.clang-format b/Tests/Cuda/.clang-format
new file mode 100644
index 0000000..a77589a
--- /dev/null
+++ b/Tests/Cuda/.clang-format
@@ -0,0 +1,9 @@
+---
+# This configuration requires clang-format 3.8 or higher.
+BasedOnStyle: Mozilla
+AlignOperands: false
+AlwaysBreakAfterReturnType: None
+AlwaysBreakAfterDefinitionReturnType: None
+ColumnLimit: 79
+Standard: Cpp11
+...
diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt
index 40c9675..de48501 100644
--- a/Tests/Cuda/CMakeLists.txt
+++ b/Tests/Cuda/CMakeLists.txt
@@ -2,4 +2,6 @@
 ADD_TEST_MACRO(Cuda.Complex CudaComplex)
 ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures)
 ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary)
+ADD_TEST_MACRO(Cuda.ToolkitInclude CudaToolkitInclude)
 ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags)
+ADD_TEST_MACRO(Cuda.WithC CudaWithC)
diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu
index a23dc25..82255c5 100644
--- a/Tests/Cuda/Complex/dynamic.cu
+++ b/Tests/Cuda/Complex/dynamic.cu
@@ -1,7 +1,7 @@
 
-#include <string>
 #include <cuda.h>
 #include <iostream>
+#include <string>
 
 #ifdef _WIN32
 #define EXPORT __declspec(dllexport)
@@ -16,18 +16,15 @@ EXPORT int __host__ cuda_dynamic_host_func(int x)
   return dynamic_base_func(x);
 }
 
-static
-__global__
-void DetermineIfValidCudaDevice()
+static __global__ void DetermineIfValidCudaDevice()
 {
 }
 
 EXPORT void cuda_dynamic_lib_func()
 {
-  DetermineIfValidCudaDevice <<<1,1>>> ();
+  DetermineIfValidCudaDevice<<<1, 1>>>();
   cudaError_t err = cudaGetLastError();
-  if(err == cudaSuccess)
-    {
+  if (err == cudaSuccess) {
     std::cerr << cudaGetErrorString(err) << std::endl;
-    }
+  }
 }
diff --git a/Tests/Cuda/Complex/file1.cu b/Tests/Cuda/Complex/file1.cu
index a2e8bf3..1ce63bf 100644
--- a/Tests/Cuda/Complex/file1.cu
+++ b/Tests/Cuda/Complex/file1.cu
@@ -5,6 +5,6 @@ result_type __device__ file1_func(int x)
 {
   result_type r;
   r.input = x;
-  r.sum = x*x;
+  r.sum = x * x;
   return r;
 }
diff --git a/Tests/Cuda/Complex/file2.cu b/Tests/Cuda/Complex/file2.cu
index 6b8b06b..74f3558 100644
--- a/Tests/Cuda/Complex/file2.cu
+++ b/Tests/Cuda/Complex/file2.cu
@@ -5,16 +5,12 @@ result_type __device__ file1_func(int x);
 
 result_type_dynamic __device__ file2_func(int x)
 {
-  if(x!=42)
-    {
+  if (x != 42) {
     const result_type r = file1_func(x);
-    const result_type_dynamic rd { r.input, r.sum, true };
+    const result_type_dynamic rd{ r.input, r.sum, true };
     return rd;
-    }
-  else
-    {
-    const result_type_dynamic rd { x, x*x*x, false };
+  } else {
+    const result_type_dynamic rd{ x, x * x * x, false };
     return rd;
-    }
-
+  }
 }
diff --git a/Tests/Cuda/Complex/file3.cu b/Tests/Cuda/Complex/file3.cu
index 47e64c5..7c37d66 100644
--- a/Tests/Cuda/Complex/file3.cu
+++ b/Tests/Cuda/Complex/file3.cu
@@ -7,9 +7,7 @@
 result_type __device__ file1_func(int x);
 result_type_dynamic __device__ file2_func(int x);
 
-static
-__global__
-void file3_kernel(result_type& r, int x)
+static __global__ void file3_kernel(result_type& r, int x)
 {
   r = file1_func(x);
   result_type_dynamic rd = file2_func(x);
@@ -18,12 +16,11 @@ void file3_kernel(result_type& r, int x)
 int file3_launch_kernel(int x)
 {
   result_type r;
-  file3_kernel <<<1,1>>> (r,x);
+  file3_kernel<<<1, 1>>>(r, x);
   cudaError_t err = cudaGetLastError();
-  if(err == cudaSuccess)
-    {
+  if (err == cudaSuccess) {
     std::cerr << cudaGetErrorString(err) << std::endl;
     return x;
-    }
+  }
   return r.sum;
 }
diff --git a/Tests/Cuda/Complex/mixed.cu b/Tests/Cuda/Complex/mixed.cu
index 7051de0..4bba07c 100644
--- a/Tests/Cuda/Complex/mixed.cu
+++ b/Tests/Cuda/Complex/mixed.cu
@@ -17,9 +17,7 @@ result_type_dynamic __device__ file2_func(int x);
 
 IMPORT void __host__ cuda_dynamic_lib_func();
 
-static
-__global__
-void mixed_kernel(result_type& r, int x)
+static __global__ void mixed_kernel(result_type& r, int x)
 {
   r = file1_func(x);
   result_type_dynamic rd = file2_func(x);
@@ -30,6 +28,6 @@ EXPORT int mixed_launch_kernel(int x)
   cuda_dynamic_lib_func();
 
   result_type r;
-  mixed_kernel <<<1,1>>> (r,x);
+  mixed_kernel<<<1, 1>>>(r, x);
   return r.sum;
 }
diff --git a/Tests/Cuda/ConsumeCompileFeatures/main.cu b/Tests/Cuda/ConsumeCompileFeatures/main.cu
index 712871c..bc32450 100644
--- a/Tests/Cuda/ConsumeCompileFeatures/main.cu
+++ b/Tests/Cuda/ConsumeCompileFeatures/main.cu
@@ -5,14 +5,16 @@ int static_cxx11_func(int);
 
 void test_functions()
 {
-  auto x = static_cxx11_func( int(42) );
+  auto x = static_cxx11_func(int(42));
   std::cout << x << std::endl;
 }
 
-int main(int argc, char **argv)
+int main(int argc, char** argv)
 {
   test_functions();
-  std::cout << "this executable doesn't use cuda code, just call methods defined" << std::endl;
+  std::cout
+    << "this executable doesn't use cuda code, just call methods defined"
+    << std::endl;
   std::cout << "in libraries that have cuda code" << std::endl;
   return 0;
 }
diff --git a/Tests/Cuda/ObjectLibrary/static.cu b/Tests/Cuda/ObjectLibrary/static.cu
index 2374c23..cdf682b 100644
--- a/Tests/Cuda/ObjectLibrary/static.cu
+++ b/Tests/Cuda/ObjectLibrary/static.cu
@@ -8,14 +8,14 @@ int __host__ file1_sq_func(int x)
   cudaError_t err;
   int nDevices = 0;
   err = cudaGetDeviceCount(&nDevices);
-  if(err != cudaSuccess)
-  {
+  if (err != cudaSuccess) {
     std::cout << "nDevices: " << nDevices << std::endl;
     std::cout << "err: " << err << std::endl;
     return 1;
   }
   std::cout << "this library uses cuda code" << std::endl;
-  std::cout << "you have " << nDevices << " devices that support cuda" << std::endl;
+  std::cout << "you have " << nDevices << " devices that support cuda"
+            << std::endl;
 
   return x * x;
 }
diff --git a/Tests/Cuda/ProperLinkFlags/file1.cu b/Tests/Cuda/ProperLinkFlags/file1.cu
index d93dc9f..9a105f0 100644
--- a/Tests/Cuda/ProperLinkFlags/file1.cu
+++ b/Tests/Cuda/ProperLinkFlags/file1.cu
@@ -6,6 +6,6 @@ result_type __device__ file1_func(int x)
   __ldg(&x);
   result_type r;
   r.input = x;
-  r.sum = x*x;
+  r.sum = x * x;
   return r;
 }
diff --git a/Tests/Cuda/ToolkitInclude/CMakeLists.txt b/Tests/Cuda/ToolkitInclude/CMakeLists.txt
new file mode 100644
index 0000000..f246b54
--- /dev/null
+++ b/Tests/Cuda/ToolkitInclude/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.8)
+project (ToolkitInclude CXX CUDA)
+
+#Goal for this example:
+# Validate that between the CXX implicit include directories and the
+# CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES directories we can find
+# the cuda runtime headers
+
+add_executable(CudaToolkitInclude main.cpp)
+target_include_directories(CudaToolkitInclude PRIVATE
+                           ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
diff --git a/Tests/Cuda/ToolkitInclude/main.cpp b/Tests/Cuda/ToolkitInclude/main.cpp
new file mode 100644
index 0000000..c8d5c6b
--- /dev/null
+++ b/Tests/Cuda/ToolkitInclude/main.cpp
@@ -0,0 +1,8 @@
+// Only thing we care about is that these headers are found
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+int main()
+{
+  return 0;
+}
diff --git a/Tests/Cuda/WithC/CMakeLists.txt b/Tests/Cuda/WithC/CMakeLists.txt
new file mode 100644
index 0000000..7596804
--- /dev/null
+++ b/Tests/Cuda/WithC/CMakeLists.txt
@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 3.7)
+project(CudaComplex CUDA C)
+
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+
+add_executable(CudaWithC main.c cuda.cu)
+
+if(APPLE)
+  # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+  # the static cuda runtime can find it at runtime.
+  target_link_libraries(CudaWithC PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()
diff --git a/Tests/Cuda/WithC/cuda.cu b/Tests/Cuda/WithC/cuda.cu
new file mode 100644
index 0000000..06bd7b9
--- /dev/null
+++ b/Tests/Cuda/WithC/cuda.cu
@@ -0,0 +1,16 @@
+#include <cuda.h>
+
+#include <iostream>
+
+extern "C" int use_cuda(void)
+{
+  int nDevices = 0;
+  cudaError_t err = cudaGetDeviceCount(&nDevices);
+  if (err != cudaSuccess) {
+    std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+              << std::endl;
+    return 1;
+  }
+  std::cout << "Found " << nDevices << " CUDA enabled devices" << std::endl;
+  return 0;
+}
diff --git a/Tests/Cuda/WithC/main.c b/Tests/Cuda/WithC/main.c
new file mode 100644
index 0000000..cb5fddc
--- /dev/null
+++ b/Tests/Cuda/WithC/main.c
@@ -0,0 +1,14 @@
+extern int use_cuda(void);
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+int main()
+{
+#ifdef _WIN32
+  /* Use an API that requires CMake's "standard" C libraries.  */
+  GetOpenFileName(NULL);
+#endif
+  return use_cuda();
+}
diff --git a/Tests/CudaOnly/.clang-format b/Tests/CudaOnly/.clang-format
new file mode 100644
index 0000000..a77589a
--- /dev/null
+++ b/Tests/CudaOnly/.clang-format
@@ -0,0 +1,9 @@
+---
+# This configuration requires clang-format 3.8 or higher.
+BasedOnStyle: Mozilla
+AlignOperands: false
+AlwaysBreakAfterReturnType: None
+AlwaysBreakAfterDefinitionReturnType: None
+ColumnLimit: 79
+Standard: Cpp11
+...
diff --git a/Tests/CudaOnly/EnableStandard/CMakeLists.txt b/Tests/CudaOnly/EnableStandard/CMakeLists.txt
index 53b9132..35a1deb 100644
--- a/Tests/CudaOnly/EnableStandard/CMakeLists.txt
+++ b/Tests/CudaOnly/EnableStandard/CMakeLists.txt
@@ -13,3 +13,14 @@ target_link_libraries(CudaOnlyEnableStandard PRIVATE CUDAStatic11 CUDADynamic11)
 
 set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD 11)
 set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD_REQUIRED TRUE)
+
+#Verify CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES
+foreach(dir ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+  if(NOT IS_DIRECTORY "${dir}")
+    message(FATAL_ERROR
+      "CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES entry\n"
+      " ${dir}\n"
+      "is not an existing directory."
+      )
+  endif()
+endforeach()
diff --git a/Tests/CudaOnly/EnableStandard/main.cu b/Tests/CudaOnly/EnableStandard/main.cu
index f7144e6..f219583 100644
--- a/Tests/CudaOnly/EnableStandard/main.cu
+++ b/Tests/CudaOnly/EnableStandard/main.cu
@@ -12,11 +12,11 @@ IMPORT int shared_cuda11_func(int);
 
 void test_functions()
 {
-  static_cuda11_func( int(42) );
-  shared_cuda11_func( int(42) );
+  static_cuda11_func(int(42));
+  shared_cuda11_func(int(42));
 }
 
-int main(int argc, char **argv)
+int main(int argc, char** argv)
 {
   test_functions();
   return 0;
diff --git a/Tests/CudaOnly/SeparateCompilation/file1.cu b/Tests/CudaOnly/SeparateCompilation/file1.cu
index a2e8bf3..1ce63bf 100644
--- a/Tests/CudaOnly/SeparateCompilation/file1.cu
+++ b/Tests/CudaOnly/SeparateCompilation/file1.cu
@@ -5,6 +5,6 @@ result_type __device__ file1_func(int x)
 {
   result_type r;
   r.input = x;
-  r.sum = x*x;
+  r.sum = x * x;
   return r;
 }
diff --git a/Tests/CudaOnly/SeparateCompilation/file2.cu b/Tests/CudaOnly/SeparateCompilation/file2.cu
index 6b8b06b..74f3558 100644
--- a/Tests/CudaOnly/SeparateCompilation/file2.cu
+++ b/Tests/CudaOnly/SeparateCompilation/file2.cu
@@ -5,16 +5,12 @@ result_type __device__ file1_func(int x);
 
 result_type_dynamic __device__ file2_func(int x)
 {
-  if(x!=42)
-    {
+  if (x != 42) {
     const result_type r = file1_func(x);
-    const result_type_dynamic rd { r.input, r.sum, true };
+    const result_type_dynamic rd{ r.input, r.sum, true };
     return rd;
-    }
-  else
-    {
-    const result_type_dynamic rd { x, x*x*x, false };
+  } else {
+    const result_type_dynamic rd{ x, x * x * x, false };
     return rd;
-    }
-
+  }
 }
diff --git a/Tests/CudaOnly/SeparateCompilation/file3.cu b/Tests/CudaOnly/SeparateCompilation/file3.cu
index 670a18b..155b513 100644
--- a/Tests/CudaOnly/SeparateCompilation/file3.cu
+++ b/Tests/CudaOnly/SeparateCompilation/file3.cu
@@ -6,13 +6,10 @@
 result_type __device__ file1_func(int x);
 result_type_dynamic __device__ file2_func(int x);
 
-
-static
-__global__
-void file3_kernel(result_type& r, int x)
+static __global__ void file3_kernel(result_type& r, int x)
 {
-  //call static_func which is a method that is defined in the
-  //static library that is always out of date
+  // call static_func which is a method that is defined in the
+  // static library that is always out of date
   r = file1_func(x);
   result_type_dynamic rd = file2_func(x);
 }
@@ -20,6 +17,6 @@ void file3_kernel(result_type& r, int x)
 result_type file3_launch_kernel(int x)
 {
   result_type r;
-  file3_kernel <<<1,1>>> (r,x);
+  file3_kernel<<<1, 1>>>(r, x);
   return r;
 }
diff --git a/Tests/CudaOnly/SeparateCompilation/file4.cu b/Tests/CudaOnly/SeparateCompilation/file4.cu
index 86ef623..2e3e01e 100644
--- a/Tests/CudaOnly/SeparateCompilation/file4.cu
+++ b/Tests/CudaOnly/SeparateCompilation/file4.cu
@@ -7,12 +7,10 @@
 result_type __device__ file1_func(int x);
 result_type_dynamic __device__ file2_func(int x);
 
-static
-__global__
-void file4_kernel(result_type& r, int x)
+static __global__ void file4_kernel(result_type& r, int x)
 {
-  //call static_func which is a method that is defined in the
-  //static library that is always out of date
+  // call static_func which is a method that is defined in the
+  // static library that is always out of date
   r = file1_func(x);
   result_type_dynamic rd = file2_func(x);
 }
@@ -20,6 +18,6 @@ void file4_kernel(result_type& r, int x)
 int file4_launch_kernel(int x)
 {
   result_type r;
-  file4_kernel <<<1,1>>> (r,x);
+  file4_kernel<<<1, 1>>>(r, x);
   return r.sum;
 }
diff --git a/Tests/CudaOnly/SeparateCompilation/file5.cu b/Tests/CudaOnly/SeparateCompilation/file5.cu
index 6fdb32a..fee8e9e 100644
--- a/Tests/CudaOnly/SeparateCompilation/file5.cu
+++ b/Tests/CudaOnly/SeparateCompilation/file5.cu
@@ -7,12 +7,10 @@
 result_type __device__ file1_func(int x);
 result_type_dynamic __device__ file2_func(int x);
 
-static
-__global__
-void file5_kernel(result_type& r, int x)
+static __global__ void file5_kernel(result_type& r, int x)
 {
-  //call static_func which is a method that is defined in the
-  //static library that is always out of date
+  // call static_func which is a method that is defined in the
+  // static library that is always out of date
   r = file1_func(x);
   result_type_dynamic rd = file2_func(x);
 }
@@ -20,6 +18,6 @@ void file5_kernel(result_type& r, int x)
 int file5_launch_kernel(int x)
 {
   result_type r;
-  file5_kernel <<<1,1>>> (r,x);
+  file5_kernel<<<1, 1>>>(r, x);
   return r.sum;
 }
diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main.cu
index 5c8e150..03e0921 100644
--- a/Tests/CudaOnly/SeparateCompilation/main.cu
+++ b/Tests/CudaOnly/SeparateCompilation/main.cu
@@ -7,7 +7,7 @@
 int file4_launch_kernel(int x);
 int file5_launch_kernel(int x);
 
-int main(int argc, char **argv)
+int main(int argc, char** argv)
 {
   file4_launch_kernel(42);
   file5_launch_kernel(42);
diff --git a/Tests/CudaOnly/WithDefs/main.notcu b/Tests/CudaOnly/WithDefs/main.notcu
index 67bf10c..80ed3a5 100644
--- a/Tests/CudaOnly/WithDefs/main.notcu
+++ b/Tests/CudaOnly/WithDefs/main.notcu
@@ -6,14 +6,12 @@
 #error "PACKED_DEFINE not defined!"
 #endif
 
-static
-__global__
-void DetermineIfValidCudaDevice()
+static __global__ void DetermineIfValidCudaDevice()
 {
 }
 
 #ifdef _MSC_VER
-#pragma pack(push,1)
+#pragma pack(push, 1)
 #undef PACKED_DEFINE
 #define PACKED_DEFINE
 #endif
@@ -32,28 +30,24 @@ struct PACKED_DEFINE result_type
 result_type can_launch_kernel()
 {
   result_type r;
-  DetermineIfValidCudaDevice <<<1,1>>> ();
+  DetermineIfValidCudaDevice<<<1, 1>>>();
   r.valid = (cudaSuccess == cudaGetLastError());
-  if(r.valid)
-    {
+  if (r.valid) {
     r.value = 1;
-    }
-  else
-    {
+  } else {
     r.value = -1;
-    }
+  }
   return r;
 }
 
-int main(int argc, char **argv)
+int main(int argc, char** argv)
 {
   cudaError_t err;
   int nDevices = 0;
   err = cudaGetDeviceCount(&nDevices);
-  if(err != cudaSuccess)
-    {
-      std::cerr << cudaGetErrorString(err) << std::endl;
-      return 1;
-    }
+  if (err != cudaSuccess) {
+    std::cerr << cudaGetErrorString(err) << std::endl;
+    return 1;
+  }
   return 0;
 }
diff --git a/Utilities/Scripts/clang-format.bash b/Utilities/Scripts/clang-format.bash
index 8e07c99..2b36ac5 100755
--- a/Utilities/Scripts/clang-format.bash
+++ b/Utilities/Scripts/clang-format.bash
@@ -107,7 +107,7 @@ case "$mode" in
 esac
 
 # Filter sources to which our style should apply.
-$git_ls -z -- '*.c' '*.cc' '*.cpp' '*.cxx' '*.h' '*.hh' '*.hpp' '*.hxx' |
+$git_ls -z -- '*.c' '*.cc' '*.cpp' '*.cxx' '*.h' '*.hh' '*.hpp' '*.hxx' '*.cu' '*.notcu' |
 
   # Exclude lexer/parser generator input and output.
   egrep -z -v '^Source/cmCommandArgumentLexer\.' |