From f8aac21947f43a0e886a3cc3655514abb95bb260 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Fri, 26 Aug 2016 16:59:36 -0400
Subject: CUDA: Add basic CUDA language support for *NIX systems.

---
 Modules/CMakeCUDACompiler.cmake.in              |   7 ++
 Modules/CMakeCUDACompilerId.cu.in               |  40 +++++++++
 Modules/CMakeCUDAInformation.cmake              | 109 ++++++++++++++++++++++++
 Modules/CMakeDetermineCUDACompiler.cmake        |  49 +++++++++++
 Modules/CMakeTestCUDACompiler.cmake             |  58 +++++++++++++
 Modules/Compiler/NVIDIA-CUDA.cmake              |  16 ++++
 Modules/Compiler/NVIDIA-DetermineCompiler.cmake |   7 ++
 7 files changed, 286 insertions(+)
 create mode 100644 Modules/CMakeCUDACompiler.cmake.in
 create mode 100644 Modules/CMakeCUDACompilerId.cu.in
 create mode 100644 Modules/CMakeCUDAInformation.cmake
 create mode 100644 Modules/CMakeDetermineCUDACompiler.cmake
 create mode 100644 Modules/CMakeTestCUDACompiler.cmake
 create mode 100644 Modules/Compiler/NVIDIA-CUDA.cmake
 create mode 100644 Modules/Compiler/NVIDIA-DetermineCompiler.cmake

diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in
new file mode 100644
index 0000000..1e34825
--- /dev/null
+++ b/Modules/CMakeCUDACompiler.cmake.in
@@ -0,0 +1,7 @@
+set(CMAKE_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
+set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@")
+
+set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
+
+set(CMAKE_CUDA_COMPILER_ID_RUN 1)
+set(CMAKE_CUDA_SOURCE_FILE_EXTENSIONS cu)
diff --git a/Modules/CMakeCUDACompilerId.cu.in b/Modules/CMakeCUDACompilerId.cu.in
new file mode 100644
index 0000000..ddbc8b4
--- /dev/null
+++ b/Modules/CMakeCUDACompilerId.cu.in
@@ -0,0 +1,40 @@
+
+#ifndef __cplusplus
+# error "A C compiler has been selected for CUDA/C++."
+#endif
+
+@CMAKE_CUDA_COMPILER_ID_CONTENT@
+
+/* Construct the string literal in pieces to prevent the source from
+   getting matched.  Store it in a pointer rather than an array
+   because some compilers will just produce instructions to fill the
+   array rather than assigning a pointer to a static array.  */
+char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
+
+@CMAKE_CUDA_COMPILER_ID_PLATFORM_CONTENT@
+@CMAKE_CUDA_COMPILER_ID_ERROR_FOR_TEST@
+
+const char* info_language_dialect_default = "INFO" ":" "dialect_default["
+#if __cplusplus >= 201402L
+  "14"
+#elif __cplusplus >= 201103L
+  "11"
+#else
+  "98"
+#endif
+"]";
+
+/*--------------------------------------------------------------------------*/
+
+int main(int argc, char* argv[])
+{
+  int require = 0;
+  require += info_compiler[argc];
+  require += info_platform[argc];
+#ifdef COMPILER_VERSION_MAJOR
+  require += info_version[argc];
+#endif
+  require += info_language_dialect_default[argc];
+  (void)argv;
+  return require;
+}
diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
new file mode 100644
index 0000000..3436681
--- /dev/null
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -0,0 +1,109 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+set(CMAKE_CUDA_OUTPUT_EXTENSION .o)
+set(CMAKE_INCLUDE_FLAG_CUDA "-I")
+
+# Load compiler-specific information.
+if(CMAKE_CUDA_COMPILER_ID)
+  include(Compiler/${CMAKE_CUDA_COMPILER_ID}-CUDA OPTIONAL)
+else()
+  #couldn't id the cuda compile, fall back to the default setting
+  set(CMAKE_CUDA_COMPILER_ID "NVidia")
+  include(Compiler/NVidia-CUDA OPTIONAL)
+endif()
+
+# load the system- and compiler specific files
+if(CMAKE_CUDA_COMPILER_ID)
+  # load a hardware specific file, mostly useful for embedded compilers
+  if(CMAKE_SYSTEM_PROCESSOR)
+    include(Platform/${CMAKE_SYSTEM_NAME}-${CMAKE_CUDA_COMPILER_ID}-CUDA-${CMAKE_SYSTEM_PROCESSOR} OPTIONAL)
+  endif()
+  include(Platform/${CMAKE_SYSTEM_NAME}-${CMAKE_CUDA_COMPILER_ID}-CUDA OPTIONAL)
+endif()
+
+# for most systems a module is the same as a shared library
+# so unless the variable CMAKE_MODULE_EXISTS is set just
+# copy the values from the LIBRARY variables
+if(NOT CMAKE_MODULE_EXISTS)
+  set(CMAKE_SHARED_MODULE_CUDA_FLAGS ${CMAKE_SHARED_LIBRARY_CUDA_FLAGS})
+  set(CMAKE_SHARED_MODULE_CREATE_CUDA_FLAGS ${CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS})
+endif()
+
+# add the flags to the cache based
+# on the initial values computed in the platform/*.cmake files
+# use _INIT variables so that this only happens the first time
+# and you can set these flags in the cmake cache
+set(CMAKE_CUDA_FLAGS_INIT "$ENV{CUDAFLAGS} ${CMAKE_CUDA_FLAGS_INIT}")
+
+foreach(c "" _DEBUG _RELEASE _MINSIZEREL _RELWITHDEBINFO)
+  string(STRIP "${CMAKE_CUDA_FLAGS${c}_INIT}" CMAKE_CUDA_FLAGS${c}_INIT)
+endforeach()
+
+set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS_INIT}" CACHE STRING
+     "Flags used by the compiler during all build types.")
+
+if(NOT CMAKE_NOT_USING_CONFIG_FLAGS)
+  set (CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG_INIT}" CACHE STRING
+     "Flags used by the compiler during debug builds.")
+  set (CMAKE_CUDA_FLAGS_MINSIZEREL "${CMAKE_CUDA_FLAGS_MINSIZEREL_INIT}" CACHE STRING
+     "Flags used by the compiler during release builds for minimum size.")
+  set (CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE_INIT}" CACHE STRING
+     "Flags used by the compiler during release builds.")
+  set (CMAKE_CUDA_FLAGS_RELWITHDEBINFO "${CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT}" CACHE STRING
+     "Flags used by the compiler during release builds with debug info.")
+
+endif()
+
+include(CMakeCommonLanguageInclude)
+
+# now define the following rules:
+# CMAKE_CUDA_CREATE_SHARED_LIBRARY
+# CMAKE_CUDA_CREATE_SHARED_MODULE
+# CMAKE_CUDA_COMPILE_OBJECT
+# CMAKE_CUDA_LINK_EXECUTABLE
+
+# create a shared library
+if(NOT CMAKE_CUDA_CREATE_SHARED_LIBRARY)
+  set(CMAKE_CUDA_CREATE_SHARED_LIBRARY
+      "<CMAKE_CUDA_COMPILER> <CMAKE_SHARED_LIBRARY_CUDA_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
+endif()
+
+# create a shared module copy the shared library rule by default
+if(NOT CMAKE_CUDA_CREATE_SHARED_MODULE)
+  set(CMAKE_CUDA_CREATE_SHARED_MODULE ${CMAKE_CUDA_CREATE_SHARED_LIBRARY})
+endif()
+
+# Create a static archive incrementally for large object file counts.
+if(NOT DEFINED CMAKE_CUDA_ARCHIVE_CREATE)
+  set(CMAKE_CUDA_ARCHIVE_CREATE "<CMAKE_AR> qc <TARGET> <LINK_FLAGS> <OBJECTS>")
+endif()
+if(NOT DEFINED CMAKE_CUDA_ARCHIVE_APPEND)
+  set(CMAKE_CUDA_ARCHIVE_APPEND "<CMAKE_AR> q  <TARGET> <LINK_FLAGS> <OBJECTS>")
+endif()
+if(NOT DEFINED CMAKE_CUDA_ARCHIVE_FINISH)
+  set(CMAKE_CUDA_ARCHIVE_FINISH "<CMAKE_RANLIB> <TARGET>")
+endif()
+
+
+# compile a cu file into an object file
+if(NOT CMAKE_CUDA_COMPILE_OBJECT)
+  set(CMAKE_CUDA_COMPILE_OBJECT
+    "<CMAKE_CUDA_COMPILER>  <DEFINES> <INCLUDES> <FLAGS> -o <OBJECT> -c <SOURCE>")
+endif()
+
+# compile a cu file into an executable
+if(NOT CMAKE_CUDA_LINK_EXECUTABLE)
+  set(CMAKE_CUDA_LINK_EXECUTABLE
+    "<CMAKE_CUDA_COMPILER>  <FLAGS> <CMAKE_CUDA_LINK_FLAGS> <LINK_FLAGS> <OBJECTS>  -o <TARGET> <LINK_LIBRARIES>")
+endif()
+
+
+mark_as_advanced(
+CMAKE_CUDA_FLAGS
+CMAKE_CUDA_FLAGS_RELEASE
+CMAKE_CUDA_FLAGS_RELWITHDEBINFO
+CMAKE_CUDA_FLAGS_MINSIZEREL
+CMAKE_CUDA_FLAGS_DEBUG)
+
+set(CMAKE_CUDA_INFORMATION_LOADED 1)
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
new file mode 100644
index 0000000..fd87358
--- /dev/null
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -0,0 +1,49 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake)
+
+if( NOT ( ("${CMAKE_GENERATOR}" MATCHES "Make") OR
+          ("${CMAKE_GENERATOR}" MATCHES "Ninja") ) )
+  message(FATAL_ERROR "CUDA language not currently supported by \"${CMAKE_GENERATOR}\" generator")
+endif()
+
+if(NOT CMAKE_CUDA_COMPILER)
+  set(CMAKE_CUDA_COMPILER_INIT NOTFOUND)
+
+  # finally list compilers to try
+  if(NOT CMAKE_CUDA_COMPILER_INIT)
+    set(CMAKE_CUDA_COMPILER_LIST nvcc)
+  endif()
+
+  _cmake_find_compiler(CUDA)
+else()
+  _cmake_find_compiler_path(CUDA)
+endif()
+
+mark_as_advanced(CMAKE_CUDA_COMPILER)
+
+# Build a small source file to identify the compiler.
+if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
+  set(CMAKE_CUDA_COMPILER_ID_RUN 1)
+
+  list(APPEND CMAKE_CUDA_COMPILER_ID_MATCH_VENDORS NVidia)
+  set(CMAKE_CUDA_COMPILER_ID_MATCH_VENDOR_REGEX_NVidia "nvcc: NVIDIA \(R\) Cuda compiler driver")
+
+  set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_REGEX "\nLd[^\n]*(\n[ \t]+[^\n]*)*\n[ \t]+([^ \t\r\n]+)[^\r\n]*-o[^\r\n]*CompilerIdCXX/(\\./)?(CompilerIdCXX.xctest/)?CompilerIdCXX[ \t\n\\\"]")
+  set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_INDEX 2)
+
+  include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerId.cmake)
+  CMAKE_DETERMINE_COMPILER_ID(CUDA CUDAFLAGS CMakeCUDACompilerId.cu)
+
+endif()
+
+include(CMakeFindBinUtils)
+
+# configure all variables set in this file
+configure_file(${CMAKE_ROOT}/Modules/CMakeCUDACompiler.cmake.in
+  ${CMAKE_PLATFORM_INFO_DIR}/CMakeCUDACompiler.cmake
+  @ONLY
+  )
+
+set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
diff --git a/Modules/CMakeTestCUDACompiler.cmake b/Modules/CMakeTestCUDACompiler.cmake
new file mode 100644
index 0000000..e45d2ab
--- /dev/null
+++ b/Modules/CMakeTestCUDACompiler.cmake
@@ -0,0 +1,58 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+if(CMAKE_CUDA_COMPILER_FORCED)
+  # The compiler configuration was forced by the user.
+  # Assume the user has configured all compiler information.
+  set(CMAKE_CUDA_COMPILER_WORKS TRUE)
+  return()
+endif()
+
+include(CMakeTestCompilerCommon)
+
+# Remove any cached result from an older CMake version.
+# We now store this in CMakeCUDACompiler.cmake.
+unset(CMAKE_CUDA_COMPILER_WORKS CACHE)
+
+# This file is used by EnableLanguage in cmGlobalGenerator to
+# determine that that selected cuda compiler can actually compile
+# and link the most basic of programs.   If not, a fatal error
+# is set and cmake stops processing commands and will not generate
+# any makefiles or projects.
+if(NOT CMAKE_CUDA_COMPILER_WORKS)
+  PrintTestCompilerStatus("CUDA" "")
+  file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/main.cu
+    "#ifndef __CUDACC__\n"
+    "# error \"The CMAKE_CUDA_COMPILER is set to an invalid CUDA compiler\"\n"
+    "#endif\n"
+    "int main(){return 0;}\n")
+
+  try_compile(CMAKE_CUDA_COMPILER_WORKS ${CMAKE_BINARY_DIR}
+    ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/main.cu
+    OUTPUT_VARIABLE __CMAKE_CUDA_COMPILER_OUTPUT)
+
+  # Move result from cache to normal variable.
+  set(CMAKE_CUDA_COMPILER_WORKS ${CMAKE_CUDA_COMPILER_WORKS})
+  unset(CMAKE_CUDA_COMPILER_WORKS CACHE)
+  set(CUDA_TEST_WAS_RUN 1)
+endif()
+
+if(NOT CMAKE_CUDA_COMPILER_WORKS)
+  PrintTestCompilerStatus("CUDA" " -- broken")
+  file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
+    "Determining if the CUDA compiler works failed with "
+    "the following output:\n${__CMAKE_CUDA_COMPILER_OUTPUT}\n\n")
+  message(FATAL_ERROR "The CUDA compiler \"${CMAKE_CUDA_COMPILER}\" "
+    "is not able to compile a simple test program.\nIt fails "
+    "with the following output:\n ${__CMAKE_CUDA_COMPILER_OUTPUT}\n\n"
+    "CMake will not be able to correctly generate this project.")
+else()
+  if(CUDA_TEST_WAS_RUN)
+    PrintTestCompilerStatus("CUDA" " -- works")
+    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
+      "Determining if the CUDA compiler works passed with "
+      "the following output:\n${__CMAKE_CUDA_COMPILER_OUTPUT}\n\n")
+  endif()
+endif()
+
+unset(__CMAKE_CUDA_COMPILER_OUTPUT)
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
new file mode 100644
index 0000000..9fcd8d7
--- /dev/null
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -0,0 +1,16 @@
+set(CMAKE_CXX_VERBOSE_FLAG "-v")
+
+
+set(CMAKE_CUDA_COMPILE_OPTIONS_PIE -Xcompiler=-fPIE)
+set(CMAKE_CUDA_COMPILE_OPTIONS_PIC -Xcompiler=-fPIC)
+set(CMAKE_SHARED_LIBRARY_CUDA_FLAGS -Xcompiler=-fPIC)
+set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
+set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
+set(CMAKE_CUDA_COMPILE_OPTIONS_VISIBILITY -Xcompiler=-fvisibility=)
+
+if (NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 7.0)
+  set(CMAKE_CXX11_STANDARD_COMPILE_OPTION "-std=c++11")
+  set(CMAKE_CXX11_EXTENSION_COMPILE_OPTION "-std=c++11")
+  set(CMAKE_CUDA11_STANDARD_COMPILE_OPTION "-std=c++11")
+  set(CMAKE_CUDA11_EXTENSION_COMPILE_OPTION "-std=c++11")
+endif()
diff --git a/Modules/Compiler/NVIDIA-DetermineCompiler.cmake b/Modules/Compiler/NVIDIA-DetermineCompiler.cmake
new file mode 100644
index 0000000..32ccf8a
--- /dev/null
+++ b/Modules/Compiler/NVIDIA-DetermineCompiler.cmake
@@ -0,0 +1,7 @@
+
+set(_compiler_id_pp_test "defined(__NVCC__)")
+
+set(_compiler_id_version_compute "
+# define @PREFIX@COMPILER_VERSION_MAJOR @MACRO_DEC@(__CUDACC_VER_MAJOR__)
+# define @PREFIX@COMPILER_VERSION_MINOR @MACRO_DEC@(__CUDACC_VER_MINOR__)
+# define @PREFIX@COMPILER_VERSION_PATCH @MACRO_DEC@(__CUDACC_VER_BUILD__)")
-- 
cgit v0.12


From ce4ec876ceb73f52055b617374c04dbbbfd2a98d Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Tue, 30 Aug 2016 10:15:10 -0400
Subject: CUDA: Add support language levels (98/11)

---
 Modules/CMakeCUDACompiler.cmake.in |  1 +
 Modules/Compiler/NVIDIA-CUDA.cmake | 13 +++++++------
 Source/cmLocalGenerator.cxx        |  3 +++
 Source/cmTarget.cxx                |  3 +++
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in
index 1e34825..78606e6 100644
--- a/Modules/CMakeCUDACompiler.cmake.in
+++ b/Modules/CMakeCUDACompiler.cmake.in
@@ -1,5 +1,6 @@
 set(CMAKE_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
 set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@")
+set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@")
 
 set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
 
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
index 9fcd8d7..ae51138 100644
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -8,9 +8,10 @@ set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
 set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
 set(CMAKE_CUDA_COMPILE_OPTIONS_VISIBILITY -Xcompiler=-fvisibility=)
 
-if (NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 7.0)
-  set(CMAKE_CXX11_STANDARD_COMPILE_OPTION "-std=c++11")
-  set(CMAKE_CXX11_EXTENSION_COMPILE_OPTION "-std=c++11")
-  set(CMAKE_CUDA11_STANDARD_COMPILE_OPTION "-std=c++11")
-  set(CMAKE_CUDA11_EXTENSION_COMPILE_OPTION "-std=c++11")
-endif()
+
+set(CMAKE_CUDA98_STANDARD_COMPILE_OPTION "")
+set(CMAKE_CUDA98_EXTENSION_COMPILE_OPTION "")
+set(CMAKE_CUDA11_STANDARD_COMPILE_OPTION "-std=c++11")
+set(CMAKE_CUDA11_EXTENSION_COMPILE_OPTION "-std=c++11")
+
+set(CMAKE_CUDA_STANDARD_DEFAULT 98)
diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx
index e7dfed5..1ccd489 100644
--- a/Source/cmLocalGenerator.cxx
+++ b/Source/cmLocalGenerator.cxx
@@ -1472,6 +1472,9 @@ void cmLocalGenerator::AddCompilerRequirementFlag(
     langStdMap["C"].push_back("11");
     langStdMap["C"].push_back("99");
     langStdMap["C"].push_back("90");
+
+    langStdMap["CUDA"].push_back("11");
+    langStdMap["CUDA"].push_back("98");
   }
 
   std::string standard(standardProp);
diff --git a/Source/cmTarget.cxx b/Source/cmTarget.cxx
index 54bdfe6..48acc6e 100644
--- a/Source/cmTarget.cxx
+++ b/Source/cmTarget.cxx
@@ -267,6 +267,9 @@ cmTarget::cmTarget(std::string const& name, cmStateEnums::TargetType type,
     this->SetPropertyDefault("CXX_STANDARD", CM_NULLPTR);
     this->SetPropertyDefault("CXX_STANDARD_REQUIRED", CM_NULLPTR);
     this->SetPropertyDefault("CXX_EXTENSIONS", CM_NULLPTR);
+    this->SetPropertyDefault("CUDA_STANDARD", CM_NULLPTR);
+    this->SetPropertyDefault("CUDA_STANDARD_REQUIRED", CM_NULLPTR);
+    this->SetPropertyDefault("CUDA_EXTENSIONS", CM_NULLPTR);
     this->SetPropertyDefault("LINK_SEARCH_START_STATIC", CM_NULLPTR);
     this->SetPropertyDefault("LINK_SEARCH_END_STATIC", CM_NULLPTR);
   }
-- 
cgit v0.12


From bf326ebb7694d2e86fbcfef4cb827c70531c3255 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Tue, 30 Aug 2016 12:00:44 -0400
Subject: CUDA: Explicitly state all source files are cuda sources.

This way you can mark a .C/.CPP files as a cuda source file and
have nvcc build it as a cuda file.
---
 Modules/CMakeCUDAInformation.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index 3436681..491c35b 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -89,7 +89,7 @@ endif()
 # compile a cu file into an object file
 if(NOT CMAKE_CUDA_COMPILE_OBJECT)
   set(CMAKE_CUDA_COMPILE_OBJECT
-    "<CMAKE_CUDA_COMPILER>  <DEFINES> <INCLUDES> <FLAGS> -o <OBJECT> -c <SOURCE>")
+    "<CMAKE_CUDA_COMPILER>  <DEFINES> <INCLUDES> <FLAGS> -o <OBJECT> -x cu -c <SOURCE>")
 endif()
 
 # compile a cu file into an executable
-- 
cgit v0.12


From aaeee1ca72d4f8b09ebf98c3e3bd73d50c09c0d8 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Tue, 30 Aug 2016 12:01:43 -0400
Subject: CUDA: CompilerId now errors out properly when passed a non CUDA
 compiler.

Previously we only reported an error if the compiler was a C compiler.
---
 Modules/CMakeCUDACompilerId.cu.in | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Modules/CMakeCUDACompilerId.cu.in b/Modules/CMakeCUDACompilerId.cu.in
index ddbc8b4..5fa85da 100644
--- a/Modules/CMakeCUDACompilerId.cu.in
+++ b/Modules/CMakeCUDACompilerId.cu.in
@@ -1,6 +1,5 @@
-
-#ifndef __cplusplus
-# error "A C compiler has been selected for CUDA/C++."
+#ifndef __CUDACC__
+# error "A C or C++ compiler has been selected for CUDA"
 #endif
 
 @CMAKE_CUDA_COMPILER_ID_CONTENT@
-- 
cgit v0.12


From 4f5155f6aba7cb7cd8c0e5b75e43b38a70568eb1 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Mon, 29 Aug 2016 13:28:37 -0400
Subject: CUDA: We now properly perform CUDA compiler identification.

---
 Modules/CMakeCUDACompiler.cmake.in       |  5 +++++
 Modules/CMakeCUDACompilerABI.cu          | 16 ++++++++++++++++
 Modules/CMakeCUDAInformation.cmake       |  4 ----
 Modules/CMakeCompilerIdDetection.cmake   |  5 +++++
 Modules/CMakeDetermineCUDACompiler.cmake | 12 +++++++++---
 Modules/CMakeTestCUDACompiler.cmake      | 13 +++++++++++++
 Modules/Compiler/NVIDIA-CUDA.cmake       |  2 +-
 7 files changed, 49 insertions(+), 8 deletions(-)
 create mode 100644 Modules/CMakeCUDACompilerABI.cu

diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in
index 78606e6..897fc52 100644
--- a/Modules/CMakeCUDACompiler.cmake.in
+++ b/Modules/CMakeCUDACompiler.cmake.in
@@ -1,8 +1,13 @@
 set(CMAKE_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
 set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@")
+set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@")
 set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@")
 
 set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
 
 set(CMAKE_CUDA_COMPILER_ID_RUN 1)
 set(CMAKE_CUDA_SOURCE_FILE_EXTENSIONS cu)
+
+set(CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES "@CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES@")
+set(CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES "@CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES@")
+set(CMAKE_CUDA_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "@CMAKE_CUDA_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES@")
diff --git a/Modules/CMakeCUDACompilerABI.cu b/Modules/CMakeCUDACompilerABI.cu
new file mode 100644
index 0000000..5aa1b8a
--- /dev/null
+++ b/Modules/CMakeCUDACompilerABI.cu
@@ -0,0 +1,16 @@
+#ifndef __CUDACC__
+# error "A C or C++ compiler has been selected for CUDA"
+#endif
+
+#include "CMakeCompilerABI.h"
+
+int main(int argc, char* argv[])
+{
+  int require = 0;
+  require += info_sizeof_dptr[argc];
+#if defined(ABI_ID)
+  require += info_abi[argc];
+#endif
+  (void)argv;
+  return require;
+}
diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index 491c35b..d3278e7 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -7,10 +7,6 @@ set(CMAKE_INCLUDE_FLAG_CUDA "-I")
 # Load compiler-specific information.
 if(CMAKE_CUDA_COMPILER_ID)
   include(Compiler/${CMAKE_CUDA_COMPILER_ID}-CUDA OPTIONAL)
-else()
-  #couldn't id the cuda compile, fall back to the default setting
-  set(CMAKE_CUDA_COMPILER_ID "NVidia")
-  include(Compiler/NVidia-CUDA OPTIONAL)
 endif()
 
 # load the system- and compiler specific files
diff --git a/Modules/CMakeCompilerIdDetection.cmake b/Modules/CMakeCompilerIdDetection.cmake
index 4732250..2881cb1 100644
--- a/Modules/CMakeCompilerIdDetection.cmake
+++ b/Modules/CMakeCompilerIdDetection.cmake
@@ -90,6 +90,11 @@ function(compiler_id_detection outvar lang)
     list(APPEND ordered_compilers
       MIPSpro)
 
+    #Currently the only CUDA compilers are NVIDIA
+    if(lang STREQUAL CUDA)
+      set(ordered_compilers NVIDIA)
+    endif()
+
     if(CID_ID_DEFINE)
       foreach(Id ${ordered_compilers})
         set(CMAKE_${lang}_COMPILER_ID_CONTENT "${CMAKE_${lang}_COMPILER_ID_CONTENT}# define ${CID_PREFIX}COMPILER_IS_${Id} 0\n")
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index fd87358..a6863ef 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -27,10 +27,16 @@ mark_as_advanced(CMAKE_CUDA_COMPILER)
 if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
   set(CMAKE_CUDA_COMPILER_ID_RUN 1)
 
-  list(APPEND CMAKE_CUDA_COMPILER_ID_MATCH_VENDORS NVidia)
-  set(CMAKE_CUDA_COMPILER_ID_MATCH_VENDOR_REGEX_NVidia "nvcc: NVIDIA \(R\) Cuda compiler driver")
+  # Try to identify the compiler.
+  set(CMAKE_CUDA_COMPILER_ID)
+  set(CMAKE_CUDA_PLATFORM_ID)
+  file(READ ${CMAKE_ROOT}/Modules/CMakePlatformId.h.in
+    CMAKE_CUDA_COMPILER_ID_PLATFORM_CONTENT)
 
-  set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_REGEX "\nLd[^\n]*(\n[ \t]+[^\n]*)*\n[ \t]+([^ \t\r\n]+)[^\r\n]*-o[^\r\n]*CompilerIdCXX/(\\./)?(CompilerIdCXX.xctest/)?CompilerIdCXX[ \t\n\\\"]")
+  list(APPEND CMAKE_CUDA_COMPILER_ID_MATCH_VENDORS NVIDIA)
+  set(CMAKE_CUDA_COMPILER_ID_MATCH_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \(R\) Cuda compiler driver")
+
+  set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_REGEX "\nLd[^\n]*(\n[ \t]+[^\n]*)*\n[ \t]+([^ \t\r\n]+)[^\r\n]*-o[^\r\n]*CompilerIdCUDA/(\\./)?(CompilerIdCUDA.xctest/)?CompilerIdCUDA[ \t\n\\\"]")
   set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_INDEX 2)
 
   include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerId.cmake)
diff --git a/Modules/CMakeTestCUDACompiler.cmake b/Modules/CMakeTestCUDACompiler.cmake
index e45d2ab..670b31d 100644
--- a/Modules/CMakeTestCUDACompiler.cmake
+++ b/Modules/CMakeTestCUDACompiler.cmake
@@ -53,6 +53,19 @@ else()
       "Determining if the CUDA compiler works passed with "
       "the following output:\n${__CMAKE_CUDA_COMPILER_OUTPUT}\n\n")
   endif()
+
+  # Try to identify the ABI and configure it into CMakeCUDACompiler.cmake
+  include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerABI.cmake)
+  CMAKE_DETERMINE_COMPILER_ABI(CUDA ${CMAKE_ROOT}/Modules/CMakeCUDACompilerABI.cu)
+
+  # Re-configure to save learned information.
+  configure_file(
+    ${CMAKE_ROOT}/Modules/CMakeCUDACompiler.cmake.in
+    ${CMAKE_PLATFORM_INFO_DIR}/CMakeCUDACompiler.cmake
+    @ONLY
+    )
+  include(${CMAKE_PLATFORM_INFO_DIR}/CMakeCUDACompiler.cmake)
 endif()
 
+
 unset(__CMAKE_CUDA_COMPILER_OUTPUT)
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
index ae51138..6f12ff2 100644
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -1,4 +1,4 @@
-set(CMAKE_CXX_VERBOSE_FLAG "-v")
+set(CMAKE_CUDA_VERBOSE_FLAG "-v -Xcompiler=-v")
 
 
 set(CMAKE_CUDA_COMPILE_OPTIONS_PIE -Xcompiler=-fPIE)
-- 
cgit v0.12


From ec6ce623351ea3fda95dda49a72fc279a63aee87 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Wed, 31 Aug 2016 08:36:00 -0400
Subject: CUDA: State that cuda has preprocessor output and can generate
 assembly.

We can consider PTX code to be a form of assembly.
---
 Source/cmLocalUnixMakefileGenerator3.cxx | 4 ++--
 Source/cmMakefileTargetGenerator.cxx     | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Source/cmLocalUnixMakefileGenerator3.cxx b/Source/cmLocalUnixMakefileGenerator3.cxx
index 8bb084a..c8b459c 100644
--- a/Source/cmLocalUnixMakefileGenerator3.cxx
+++ b/Source/cmLocalUnixMakefileGenerator3.cxx
@@ -285,8 +285,8 @@ void cmLocalUnixMakefileGenerator3::WriteLocalMakefile()
     for (std::vector<LocalObjectEntry>::const_iterator ei = lo->second.begin();
          ei != lo->second.end(); ++ei) {
       if (ei->Language == "C" || ei->Language == "CXX" ||
-          ei->Language == "Fortran") {
-        // Right now, C, C++ and Fortran have both a preprocessor and the
+          ei->Language == "CUDA" || ei->Language == "Fortran") {
+        // Right now, C, C++, Fortran and CUDA have both a preprocessor and the
         // ability to generate assembly code
         lang_has_preprocessor = true;
         lang_has_assembly = true;
diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx
index 5bec2bb..7dfb70f 100644
--- a/Source/cmMakefileTargetGenerator.cxx
+++ b/Source/cmMakefileTargetGenerator.cxx
@@ -583,11 +583,11 @@ void cmMakefileTargetGenerator::WriteObjectBuildFile(
   std::string const includesString = "$(" + lang + "_INCLUDES)";
   vars.Includes = includesString.c_str();
 
-  // At the moment, it is assumed that C, C++, and Fortran have both
+  // At the moment, it is assumed that C, C++, Fortran, and CUDA have both
   // assembly and preprocessor capabilities. The same is true for the
   // ability to export compile commands
-  bool lang_has_preprocessor =
-    ((lang == "C") || (lang == "CXX") || (lang == "Fortran"));
+  bool lang_has_preprocessor = ((lang == "C") || (lang == "CXX") ||
+                                (lang == "Fortran") || (lang == "CUDA"));
   bool const lang_has_assembly = lang_has_preprocessor;
   bool const lang_can_export_cmds = lang_has_preprocessor;
 
-- 
cgit v0.12


From a92f8d96162b9e6c29ad0bd81b8fd05b387d94a9 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Wed, 31 Aug 2016 08:36:47 -0400
Subject: CUDA: Enable header dependency scanning.

---
 Modules/CMakeCUDAInformation.cmake       | 15 ++++++++++++++-
 Source/cmLocalUnixMakefileGenerator3.cxx |  3 ++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index d3278e7..41ed34f 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -85,7 +85,20 @@ endif()
 # compile a cu file into an object file
 if(NOT CMAKE_CUDA_COMPILE_OBJECT)
   set(CMAKE_CUDA_COMPILE_OBJECT
-    "<CMAKE_CUDA_COMPILER>  <DEFINES> <INCLUDES> <FLAGS> -o <OBJECT> -x cu -c <SOURCE>")
+    "<CMAKE_CUDA_COMPILER>  <DEFINES> <INCLUDES> <FLAGS> -x cu -c <SOURCE> -o <OBJECT>")
+
+  #The Ninja generator uses the make file dependency files to determine what
+  #files need to be recompiled. Unfortunately, nvcc doesn't support building
+  #a source file and generating the dependencies of said file in a single
+  #invocation. Instead we have to state that you need to chain two commands.
+  #
+  #The makefile generators uses the custom CMake dependency scanner, and thus
+  #it is exempt from this logic.
+  if(CMAKE_GENERATOR STREQUAL "Ninja")
+    list(APPEND CMAKE_CUDA_COMPILE_OBJECT
+      "<CMAKE_CUDA_COMPILER>  <DEFINES> <INCLUDES> <FLAGS> -x cu -M <SOURCE> -MT <OBJECT> -o $DEP_FILE")
+  endif()
+
 endif()
 
 # compile a cu file into an executable
diff --git a/Source/cmLocalUnixMakefileGenerator3.cxx b/Source/cmLocalUnixMakefileGenerator3.cxx
index c8b459c..ba17f81 100644
--- a/Source/cmLocalUnixMakefileGenerator3.cxx
+++ b/Source/cmLocalUnixMakefileGenerator3.cxx
@@ -1458,7 +1458,8 @@ bool cmLocalUnixMakefileGenerator3::ScanDependencies(
 
     // Create the scanner for this language
     cmDepends* scanner = CM_NULLPTR;
-    if (lang == "C" || lang == "CXX" || lang == "RC" || lang == "ASM") {
+    if (lang == "C" || lang == "CXX" || lang == "RC" || lang == "ASM" ||
+        lang == "CUDA") {
       // TODO: Handle RC (resource files) dependencies correctly.
       scanner = new cmDependsC(this, targetDir, lang, &validDeps);
     }
-- 
cgit v0.12


From bbaf24341026cdacd1e313dd3f8e3501e9c4070e Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Sun, 4 Sep 2016 16:29:44 -0400
Subject: CUDA: add support for specifying an explicit host compiler.

---
 Modules/CMakeCUDAInformation.cmake       | 14 ++++++++++----
 Modules/CMakeDetermineCUDACompiler.cmake | 13 ++++++++++++-
 Source/cmCoreTryCompile.cxx              |  3 +++
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index 41ed34f..858fee7 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -59,10 +59,16 @@ include(CMakeCommonLanguageInclude)
 # CMAKE_CUDA_COMPILE_OBJECT
 # CMAKE_CUDA_LINK_EXECUTABLE
 
+if(CMAKE_CUDA_HOST_COMPILER)
+  set(CMAKE_CUDA_HOST_FLAGS "-ccbin \"${CMAKE_CUDA_HOST_COMPILER}\" ")
+else()
+  set(CMAKE_CUDA_HOST_FLAGS "")
+endif()
+
 # create a shared library
 if(NOT CMAKE_CUDA_CREATE_SHARED_LIBRARY)
   set(CMAKE_CUDA_CREATE_SHARED_LIBRARY
-      "<CMAKE_CUDA_COMPILER> <CMAKE_SHARED_LIBRARY_CUDA_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
+      "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <CMAKE_SHARED_LIBRARY_CUDA_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
 endif()
 
 # create a shared module copy the shared library rule by default
@@ -85,7 +91,7 @@ endif()
 # compile a cu file into an object file
 if(NOT CMAKE_CUDA_COMPILE_OBJECT)
   set(CMAKE_CUDA_COMPILE_OBJECT
-    "<CMAKE_CUDA_COMPILER>  <DEFINES> <INCLUDES> <FLAGS> -x cu -c <SOURCE> -o <OBJECT>")
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -c <SOURCE> -o <OBJECT>")
 
   #The Ninja generator uses the make file dependency files to determine what
   #files need to be recompiled. Unfortunately, nvcc doesn't support building
@@ -96,7 +102,7 @@ if(NOT CMAKE_CUDA_COMPILE_OBJECT)
   #it is exempt from this logic.
   if(CMAKE_GENERATOR STREQUAL "Ninja")
     list(APPEND CMAKE_CUDA_COMPILE_OBJECT
-      "<CMAKE_CUDA_COMPILER>  <DEFINES> <INCLUDES> <FLAGS> -x cu -M <SOURCE> -MT <OBJECT> -o $DEP_FILE")
+      "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -M <SOURCE> -MT <OBJECT> -o $DEP_FILE")
   endif()
 
 endif()
@@ -104,7 +110,7 @@ endif()
 # compile a cu file into an executable
 if(NOT CMAKE_CUDA_LINK_EXECUTABLE)
   set(CMAKE_CUDA_LINK_EXECUTABLE
-    "<CMAKE_CUDA_COMPILER>  <FLAGS> <CMAKE_CUDA_LINK_FLAGS> <LINK_FLAGS> <OBJECTS>  -o <TARGET> <LINK_LIBRARIES>")
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <FLAGS> <CMAKE_CUDA_LINK_FLAGS> <LINK_FLAGS> <OBJECTS>  -o <TARGET> <LINK_LIBRARIES>")
 endif()
 
 
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index a6863ef..d5dc9f4 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -23,6 +23,10 @@ endif()
 
 mark_as_advanced(CMAKE_CUDA_COMPILER)
 
+#Allow the user to specify a host compiler
+#todo: need to specify this to compiler test passes
+set(CMAKE_CUDA_HOST_COMPILER "" CACHE FILEPATH "Host compiler to be used by nvcc")
+
 # Build a small source file to identify the compiler.
 if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
   set(CMAKE_CUDA_COMPILER_ID_RUN 1)
@@ -39,9 +43,16 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
   set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_REGEX "\nLd[^\n]*(\n[ \t]+[^\n]*)*\n[ \t]+([^ \t\r\n]+)[^\r\n]*-o[^\r\n]*CompilerIdCUDA/(\\./)?(CompilerIdCUDA.xctest/)?CompilerIdCUDA[ \t\n\\\"]")
   set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_INDEX 2)
 
+  if(CMAKE_CUDA_HOST_COMPILER)
+      # Each entry in this list is a set of extra flags to try
+      # adding to the compile line to see if it helps produce
+      # a valid identification file. In our case this would just
+      # be the explicit host compiler
+      set(CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "-ccbin=${CMAKE_CUDA_HOST_COMPILER}")
+  endif()
+
   include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerId.cmake)
   CMAKE_DETERMINE_COMPILER_ID(CUDA CUDAFLAGS CMakeCUDACompilerId.cu)
-
 endif()
 
 include(CMakeFindBinUtils)
diff --git a/Source/cmCoreTryCompile.cxx b/Source/cmCoreTryCompile.cxx
index 3b46fc0..b1d3775 100644
--- a/Source/cmCoreTryCompile.cxx
+++ b/Source/cmCoreTryCompile.cxx
@@ -44,6 +44,8 @@ static std::string const kCMAKE_TRY_COMPILE_OSX_ARCHITECTURES =
   "CMAKE_TRY_COMPILE_OSX_ARCHITECTURES";
 static std::string const kCMAKE_TRY_COMPILE_PLATFORM_VARIABLES =
   "CMAKE_TRY_COMPILE_PLATFORM_VARIABLES";
+static std::string const kCMAKE_CUDA_HOST_COMPILER =
+  "CMAKE_CUDA_HOST_COMPILER";
 
 int cmCoreTryCompile::TryCompileCode(std::vector<std::string> const& argv,
                                      bool isTryRun)
@@ -453,6 +455,7 @@ int cmCoreTryCompile::TryCompileCode(std::vector<std::string> const& argv,
       vars.insert(kCMAKE_OSX_SYSROOT);
       vars.insert(kCMAKE_POSITION_INDEPENDENT_CODE);
       vars.insert(kCMAKE_SYSROOT);
+      vars.insert(kCMAKE_CUDA_HOST_COMPILER);
 
       if (const char* varListStr = this->Makefile->GetDefinition(
             kCMAKE_TRY_COMPILE_PLATFORM_VARIABLES)) {
-- 
cgit v0.12


From 489c52ce680df6439f9c1e553cd2925ca8944cb1 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Mon, 19 Sep 2016 13:14:47 -0400
Subject: CUDA: Use the host compiler for linking CUDA executables and shared
 libs.

---
 Modules/CMakeCUDACompiler.cmake.in       |  6 +++++
 Modules/CMakeCUDAInformation.cmake       | 18 +++++++++++---
 Modules/CMakeDetermineCUDACompiler.cmake | 42 ++++++++++++++++++++++++++++----
 Modules/CMakeDetermineCompilerId.cmake   |  6 ++++-
 Modules/Compiler/NVIDIA-CUDA.cmake       |  6 +++--
 Source/cmCoreTryCompile.cxx              |  3 ---
 Source/cmLocalGenerator.cxx              |  2 ++
 7 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in
index 897fc52..7148275 100644
--- a/Modules/CMakeCUDACompiler.cmake.in
+++ b/Modules/CMakeCUDACompiler.cmake.in
@@ -1,4 +1,6 @@
 set(CMAKE_CUDA_COMPILER "@CMAKE_CUDA_COMPILER@")
+set(CMAKE_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
+set(CMAKE_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@")
 set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@")
 set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@")
 set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@")
@@ -8,6 +10,10 @@ set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
 set(CMAKE_CUDA_COMPILER_ID_RUN 1)
 set(CMAKE_CUDA_SOURCE_FILE_EXTENSIONS cu)
 
+set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES@")
+set(CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES@")
+set(CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES@")
+
 set(CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES "@CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES@")
 set(CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES "@CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES@")
 set(CMAKE_CUDA_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "@CMAKE_CUDA_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES@")
diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index 858fee7..19bce6f 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -60,15 +60,27 @@ include(CMakeCommonLanguageInclude)
 # CMAKE_CUDA_LINK_EXECUTABLE
 
 if(CMAKE_CUDA_HOST_COMPILER)
-  set(CMAKE_CUDA_HOST_FLAGS "-ccbin \"${CMAKE_CUDA_HOST_COMPILER}\" ")
+  set(CMAKE_CUDA_HOST_FLAGS "-ccbin=<CMAKE_CUDA_HOST_COMPILER>")
 else()
   set(CMAKE_CUDA_HOST_FLAGS "")
 endif()
 
+set(__IMPLICT_LINKS )
+foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
+  string(APPEND __IMPLICT_LINKS " -L\"${dir}\"")
+endforeach()
+foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES})
+  if(${lib} MATCHES "/")
+    string(APPEND __IMPLICT_LINKS " \"${lib}\"")
+  else()
+    string(APPEND __IMPLICT_LINKS " -l${lib}")
+  endif()
+endforeach()
+
 # create a shared library
 if(NOT CMAKE_CUDA_CREATE_SHARED_LIBRARY)
   set(CMAKE_CUDA_CREATE_SHARED_LIBRARY
-      "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <CMAKE_SHARED_LIBRARY_CUDA_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>")
+      "<CMAKE_CUDA_HOST_LINK_LAUNCHER> <CMAKE_SHARED_LIBRARY_CUDA_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
 endif()
 
 # create a shared module copy the shared library rule by default
@@ -110,7 +122,7 @@ endif()
 # compile a cu file into an executable
 if(NOT CMAKE_CUDA_LINK_EXECUTABLE)
   set(CMAKE_CUDA_LINK_EXECUTABLE
-    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <FLAGS> <CMAKE_CUDA_LINK_FLAGS> <LINK_FLAGS> <OBJECTS>  -o <TARGET> <LINK_LIBRARIES>")
+    "<CMAKE_CUDA_HOST_LINK_LAUNCHER> <CMAKE_CUDA_LINK_FLAGS> <LINK_FLAGS> <OBJECTS>  -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
 endif()
 
 
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index d5dc9f4..9ad4fc6 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -2,6 +2,7 @@
 # file Copyright.txt or https://cmake.org/licensing for details.
 
 include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake)
+include(${CMAKE_ROOT}/Modules//CMakeParseImplicitLinkInfo.cmake)
 
 if( NOT ( ("${CMAKE_GENERATOR}" MATCHES "Make") OR
           ("${CMAKE_GENERATOR}" MATCHES "Ninja") ) )
@@ -43,12 +44,9 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
   set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_REGEX "\nLd[^\n]*(\n[ \t]+[^\n]*)*\n[ \t]+([^ \t\r\n]+)[^\r\n]*-o[^\r\n]*CompilerIdCUDA/(\\./)?(CompilerIdCUDA.xctest/)?CompilerIdCUDA[ \t\n\\\"]")
   set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_INDEX 2)
 
+  set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
   if(CMAKE_CUDA_HOST_COMPILER)
-      # Each entry in this list is a set of extra flags to try
-      # adding to the compile line to see if it helps produce
-      # a valid identification file. In our case this would just
-      # be the explicit host compiler
-      set(CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "-ccbin=${CMAKE_CUDA_HOST_COMPILER}")
+      list(APPEND CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-ccbin=${CMAKE_CUDA_HOST_COMPILER}")
   endif()
 
   include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerId.cmake)
@@ -57,6 +55,40 @@ endif()
 
 include(CMakeFindBinUtils)
 
+#if this compiler vendor is matches NVIDIA we can determine
+#what the host compiler is. This only needs to be done if the CMAKE_CUDA_HOST_COMPILER
+#has NOT been explicitly set
+#
+#Find the line from compiler ID that contains a.out ( or last line )
+#We also need to find the implicit link lines. Which can be done by replacing
+#the compiler with cuda-fake-ld  and pass too CMAKE_PARSE_IMPLICIT_LINK_INFO
+if(CMAKE_CUDA_COMPILER_ID STREQUAL NVIDIA)
+  #grab the last line of the output which holds the link line
+  string(REPLACE "#\$ " ";" nvcc_output "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+  list(GET nvcc_output -1 nvcc_output)
+
+  #extract the compiler that is being used for linking
+  string(REPLACE " " ";" nvcc_output_to_find_launcher "${nvcc_output}")
+  list(GET nvcc_output_to_find_launcher 0 CMAKE_CUDA_HOST_LINK_LAUNCHER)
+  #we need to remove the quotes that nvcc adds around the directory section
+  #of the path
+  string(REPLACE "\"" "" CMAKE_CUDA_HOST_LINK_LAUNCHER "${CMAKE_CUDA_HOST_LINK_LAUNCHER}")
+
+  #prefix the line with cuda-fake-ld so that implicit link info believes it is
+  #a link line
+  set(nvcc_output "cuda-fake-ld ${nvcc_output}")
+  CMAKE_PARSE_IMPLICIT_LINK_INFO("${nvcc_output}"
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES
+                                 log
+                                 "${CMAKE_CUDA_IMPLICIT_OBJECT_REGEX}")
+
+  file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
+          "Parsed CUDA nvcc implicit link information from above output:\n${log}\n\n")
+
+endif()
+
 # configure all variables set in this file
 configure_file(${CMAKE_ROOT}/Modules/CMakeCUDACompiler.cmake.in
   ${CMAKE_PLATFORM_INFO_DIR}/CMakeCUDACompiler.cmake
diff --git a/Modules/CMakeDetermineCompilerId.cmake b/Modules/CMakeDetermineCompilerId.cmake
index 59d8ab6..c5a2bcb 100644
--- a/Modules/CMakeDetermineCompilerId.cmake
+++ b/Modules/CMakeDetermineCompilerId.cmake
@@ -101,6 +101,8 @@ function(CMAKE_DETERMINE_COMPILER_ID lang flagvar src)
   set(CMAKE_${lang}_SIMULATE_ID "${CMAKE_${lang}_SIMULATE_ID}" PARENT_SCOPE)
   set(CMAKE_${lang}_SIMULATE_VERSION "${CMAKE_${lang}_SIMULATE_VERSION}" PARENT_SCOPE)
   set(CMAKE_${lang}_STANDARD_COMPUTED_DEFAULT "${CMAKE_${lang}_STANDARD_COMPUTED_DEFAULT}" PARENT_SCOPE)
+  set(CMAKE_${lang}_COMPILER_PRODUCED_OUTPUT "${COMPILER_${lang}_PRODUCED_OUTPUT}" PARENT_SCOPE)
+  set(CMAKE_${lang}_COMPILER_PRODUCED_FILES "${COMPILER_${lang}_PRODUCED_FILES}" PARENT_SCOPE)
 endfunction()
 
 include(CMakeCompilerIdDetection)
@@ -135,7 +137,7 @@ function(CMAKE_DETERMINE_COMPILER_ID_BUILD lang testflags src)
   set(COMPILER_DESCRIPTION
     "Compiler: ${CMAKE_${lang}_COMPILER} ${CMAKE_${lang}_COMPILER_ID_ARG1}
 Build flags: ${CMAKE_${lang}_COMPILER_ID_FLAGS_LIST}
-Id flags: ${testflags}
+Id flags: ${testflags} ${CMAKE_${lang}_COMPILER_ID_FLAGS_ALWAYS}
 ")
 
   # Compile the compiler identification source.
@@ -322,6 +324,7 @@ Id flags: ${testflags}
               ${CMAKE_${lang}_COMPILER_ID_ARG1}
               ${CMAKE_${lang}_COMPILER_ID_FLAGS_LIST}
               ${testflags}
+              ${CMAKE_${lang}_COMPILER_ID_FLAGS_ALWAYS}
               "${src}"
       WORKING_DIRECTORY ${CMAKE_${lang}_COMPILER_ID_DIR}
       OUTPUT_VARIABLE CMAKE_${lang}_COMPILER_ID_OUTPUT
@@ -400,6 +403,7 @@ ${CMAKE_${lang}_COMPILER_ID_OUTPUT}
   # Return the files produced by the compilation.
   set(COMPILER_${lang}_PRODUCED_FILES "${COMPILER_${lang}_PRODUCED_FILES}" PARENT_SCOPE)
   set(COMPILER_${lang}_PRODUCED_OUTPUT "${COMPILER_${lang}_PRODUCED_OUTPUT}" PARENT_SCOPE)
+
 endfunction()
 
 #-----------------------------------------------------------------------------
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
index 6f12ff2..6b1ac74 100644
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -1,9 +1,11 @@
-set(CMAKE_CUDA_VERBOSE_FLAG "-v -Xcompiler=-v")
+set(CMAKE_CUDA_VERBOSE_FLAG "-v")
 
 
 set(CMAKE_CUDA_COMPILE_OPTIONS_PIE -Xcompiler=-fPIE)
 set(CMAKE_CUDA_COMPILE_OPTIONS_PIC -Xcompiler=-fPIC)
-set(CMAKE_SHARED_LIBRARY_CUDA_FLAGS -Xcompiler=-fPIC)
+#CMAKE_SHARED_LIBRARY_CUDA_FLAGS is sent to the host linker so we don' need
+#to forward it through nvcc
+set(CMAKE_SHARED_LIBRARY_CUDA_FLAGS -fPIC)
 set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
 set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
 set(CMAKE_CUDA_COMPILE_OPTIONS_VISIBILITY -Xcompiler=-fvisibility=)
diff --git a/Source/cmCoreTryCompile.cxx b/Source/cmCoreTryCompile.cxx
index b1d3775..3b46fc0 100644
--- a/Source/cmCoreTryCompile.cxx
+++ b/Source/cmCoreTryCompile.cxx
@@ -44,8 +44,6 @@ static std::string const kCMAKE_TRY_COMPILE_OSX_ARCHITECTURES =
   "CMAKE_TRY_COMPILE_OSX_ARCHITECTURES";
 static std::string const kCMAKE_TRY_COMPILE_PLATFORM_VARIABLES =
   "CMAKE_TRY_COMPILE_PLATFORM_VARIABLES";
-static std::string const kCMAKE_CUDA_HOST_COMPILER =
-  "CMAKE_CUDA_HOST_COMPILER";
 
 int cmCoreTryCompile::TryCompileCode(std::vector<std::string> const& argv,
                                      bool isTryRun)
@@ -455,7 +453,6 @@ int cmCoreTryCompile::TryCompileCode(std::vector<std::string> const& argv,
       vars.insert(kCMAKE_OSX_SYSROOT);
       vars.insert(kCMAKE_POSITION_INDEPENDENT_CODE);
       vars.insert(kCMAKE_SYSROOT);
-      vars.insert(kCMAKE_CUDA_HOST_COMPILER);
 
       if (const char* varListStr = this->Makefile->GetDefinition(
             kCMAKE_TRY_COMPILE_PLATFORM_VARIABLES)) {
diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx
index 1ccd489..8d81d61 100644
--- a/Source/cmLocalGenerator.cxx
+++ b/Source/cmLocalGenerator.cxx
@@ -63,6 +63,8 @@ static const char* ruleReplaceVars[] = {
   "CMAKE_CURRENT_BINARY_DIR",
   "CMAKE_RANLIB",
   "CMAKE_LINKER",
+  "CMAKE_CUDA_HOST_COMPILER",
+  "CMAKE_CUDA_HOST_LINK_LAUNCHER",
   "CMAKE_CL_SHOWINCLUDES_PREFIX"
 };
 
-- 
cgit v0.12


From 5b20d0abfad057d3d6ae7cbae62f564ddb31dd0f Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Fri, 9 Sep 2016 17:18:17 -0400
Subject: CUDA: C++ compile features now enable cuda c++11 support.

---
 Source/cmMakefile.cxx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Source/cmMakefile.cxx b/Source/cmMakefile.cxx
index 90182f9..fecc983 100644
--- a/Source/cmMakefile.cxx
+++ b/Source/cmMakefile.cxx
@@ -4413,10 +4413,13 @@ bool cmMakefile::AddRequiredTargetCxxFeature(cmTarget* target,
 
   if (setCxx14) {
     target->SetProperty("CXX_STANDARD", "14");
+    target->SetProperty("CUDA_STANDARD", "14");
   } else if (setCxx11) {
     target->SetProperty("CXX_STANDARD", "11");
+    target->SetProperty("CUDA_STANDARD", "11");
   } else if (setCxx98) {
     target->SetProperty("CXX_STANDARD", "98");
+    target->SetProperty("CUDA_STANDARD", "98");
   }
   return true;
 }
-- 
cgit v0.12


From 5dec403103990f766f458ff79987234e3b945787 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Tue, 27 Sep 2016 14:29:30 -0400
Subject: CUDA: Refactor CMakeCUDAInformation to prepare for separable
 compilation.

---
 Modules/CMakeCUDAInformation.cmake | 75 +++++++++++++++++++++++++++++++++-----
 Modules/Compiler/NVIDIA-CUDA.cmake |  5 +++
 2 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index 19bce6f..202a7a6 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -18,6 +18,44 @@ if(CMAKE_CUDA_COMPILER_ID)
   include(Platform/${CMAKE_SYSTEM_NAME}-${CMAKE_CUDA_COMPILER_ID}-CUDA OPTIONAL)
 endif()
 
+
+if(NOT CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG)
+  set(CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG})
+endif()
+
+if(NOT CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG_SEP)
+  set(CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG_SEP})
+endif()
+
+if(NOT CMAKE_SHARED_LIBRARY_RPATH_LINK_CUDA_FLAG)
+  set(CMAKE_SHARED_LIBRARY_RPATH_LINK_CUDA_FLAG ${CMAKE_SHARED_LIBRARY_RPATH_LINK_C_FLAG})
+endif()
+
+if(NOT DEFINED CMAKE_EXE_EXPORTS_CUDA_FLAG)
+  set(CMAKE_EXE_EXPORTS_CUDA_FLAG ${CMAKE_EXE_EXPORTS_C_FLAG})
+endif()
+
+if(NOT DEFINED CMAKE_SHARED_LIBRARY_SONAME_CUDA_FLAG)
+  set(CMAKE_SHARED_LIBRARY_SONAME_CUDA_FLAG ${CMAKE_SHARED_LIBRARY_SONAME_C_FLAG})
+endif()
+
+if(NOT CMAKE_EXECUTABLE_RUNTIME_CUDA_FLAG)
+  set(CMAKE_EXECUTABLE_RUNTIME_CUDA_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG})
+endif()
+
+if(NOT CMAKE_EXECUTABLE_RUNTIME_CUDA_FLAG_SEP)
+  set(CMAKE_EXECUTABLE_RUNTIME_CUDA_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CUDA_FLAG_SEP})
+endif()
+
+if(NOT CMAKE_EXECUTABLE_RPATH_LINK_CUDA_FLAG)
+  set(CMAKE_EXECUTABLE_RPATH_LINK_CUDA_FLAG ${CMAKE_SHARED_LIBRARY_RPATH_LINK_CUDA_FLAG})
+endif()
+
+if(NOT DEFINED CMAKE_SHARED_LIBRARY_LINK_CUDA_WITH_RUNTIME_PATH)
+  set(CMAKE_SHARED_LIBRARY_LINK_CUDA_WITH_RUNTIME_PATH ${CMAKE_SHARED_LIBRARY_LINK_C_WITH_RUNTIME_PATH})
+endif()
+
+
 # for most systems a module is the same as a shared library
 # so unless the variable CMAKE_MODULE_EXISTS is set just
 # copy the values from the LIBRARY variables
@@ -99,12 +137,27 @@ if(NOT DEFINED CMAKE_CUDA_ARCHIVE_FINISH)
   set(CMAKE_CUDA_ARCHIVE_FINISH "<CMAKE_RANLIB> <TARGET>")
 endif()
 
+#Specify how to compile when ptx has been requested
+if(NOT CMAKE_CUDA_COMPILE_PTX_COMPILATION)
+  set(CMAKE_CUDA_COMPILE_PTX_COMPILATION
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -ptx <SOURCE> -o <OBJECT>")
+endif()
 
-# compile a cu file into an object file
-if(NOT CMAKE_CUDA_COMPILE_OBJECT)
-  set(CMAKE_CUDA_COMPILE_OBJECT
+#Specify how to compile when separable compilation has been requested
+if(NOT CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION)
+  set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -dc <SOURCE> -o <OBJECT>")
+endif()
+
+#Specify how to compile when whole compilation has been requested
+if(NOT CMAKE_CUDA_COMPILE_WHOLE_COMPILATION)
+  set(CMAKE_CUDA_COMPILE_WHOLE_COMPILATION
     "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -c <SOURCE> -o <OBJECT>")
+endif()
 
+if(CMAKE_GENERATOR STREQUAL "Ninja")
+  set(CMAKE_CUDA_COMPILE_DEPENDENCY_DETECTION
+    "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -M <SOURCE> -MT <OBJECT> -o $DEP_FILE")
   #The Ninja generator uses the make file dependency files to determine what
   #files need to be recompiled. Unfortunately, nvcc doesn't support building
   #a source file and generating the dependencies of said file in a single
@@ -112,19 +165,23 @@ if(NOT CMAKE_CUDA_COMPILE_OBJECT)
   #
   #The makefile generators uses the custom CMake dependency scanner, and thus
   #it is exempt from this logic.
-  if(CMAKE_GENERATOR STREQUAL "Ninja")
-    list(APPEND CMAKE_CUDA_COMPILE_OBJECT
-      "<CMAKE_CUDA_COMPILER> ${CMAKE_CUDA_HOST_FLAGS} <DEFINES> <INCLUDES> <FLAGS> -x cu -M <SOURCE> -MT <OBJECT> -o $DEP_FILE")
-  endif()
-
+  list(APPEND CMAKE_CUDA_COMPILE_PTX_COMPILATION "${CMAKE_CUDA_COMPILE_DEPENDENCY_DETECTION}")
+  list(APPEND CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION "${CMAKE_CUDA_COMPILE_DEPENDENCY_DETECTION}")
+  list(APPEND CMAKE_CUDA_COMPILE_WHOLE_COMPILATION "${CMAKE_CUDA_COMPILE_DEPENDENCY_DETECTION}")
 endif()
 
 # compile a cu file into an executable
 if(NOT CMAKE_CUDA_LINK_EXECUTABLE)
   set(CMAKE_CUDA_LINK_EXECUTABLE
-    "<CMAKE_CUDA_HOST_LINK_LAUNCHER> <CMAKE_CUDA_LINK_FLAGS> <LINK_FLAGS> <OBJECTS>  -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
+    "<CMAKE_CUDA_HOST_LINK_LAUNCHER> <CMAKE_CUDA_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
 endif()
 
+#These are used when linking relocatable (dc) cuda code
+set(CMAKE_CUDA_DEVICE_LINK_LIBRARY
+      "<CMAKE_CUDA_COMPILER> <CMAKE_CUDA_LINK_FLAGS> <LANGUAGE_COMPILE_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
+set(CMAKE_CUDA_DEVICE_LINK_EXECUTABLE
+      "<CMAKE_CUDA_COMPILER> <CMAKE_CUDA_LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
+
 
 mark_as_advanced(
 CMAKE_CUDA_FLAGS
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
index 6b1ac74..e3ff5e3 100644
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -10,6 +10,11 @@ set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
 set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
 set(CMAKE_CUDA_COMPILE_OPTIONS_VISIBILITY -Xcompiler=-fvisibility=)
 
+set(CMAKE_CUDA_FLAGS_INIT " ")
+set(CMAKE_CUDA_FLAGS_DEBUG_INIT " -g")
+set(CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -Os -DNDEBUG")
+set(CMAKE_CUDA_FLAGS_RELEASE_INIT " -O3 -DNDEBUG")
+set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT " -O2 -g -DNDEBUG")
 
 set(CMAKE_CUDA98_STANDARD_COMPILE_OPTION "")
 set(CMAKE_CUDA98_EXTENSION_COMPILE_OPTION "")
-- 
cgit v0.12


From 115269a86c3116198f4de7c690fef21ec38767ac Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Thu, 13 Oct 2016 08:57:31 -0400
Subject: CUDA: Refactor cmLinkLineComputer to allow for better derived
 children.

Derived children of cmLinkLineComputer need to be able to use member
variables such as OutputConverter and the previously private methods.
---
 Source/cmLinkLineComputer.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Source/cmLinkLineComputer.h b/Source/cmLinkLineComputer.h
index 97a5d1b..bb13717 100644
--- a/Source/cmLinkLineComputer.h
+++ b/Source/cmLinkLineComputer.h
@@ -33,10 +33,10 @@ public:
   std::string ComputeFrameworkPath(cmComputeLinkInformation& cli,
                                    std::string const& fwSearchFlag);
 
-  std::string ComputeLinkLibraries(cmComputeLinkInformation& cli,
-                                   std::string const& stdLibString);
+  virtual std::string ComputeLinkLibraries(cmComputeLinkInformation& cli,
+                                           std::string const& stdLibString);
 
-private:
+protected:
   std::string ComputeLinkLibs(cmComputeLinkInformation& cli);
   std::string ComputeRPath(cmComputeLinkInformation& cli);
 
-- 
cgit v0.12


From ae05fcc63f37b71cd9b07a2aade03a2a84fa22c4 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Thu, 13 Oct 2016 09:08:44 -0400
Subject: CUDA: Add LinkLineComputer that computes cuda dlink lines.

---
 Source/CMakeLists.txt               |  2 +
 Source/cmComputeLinkInformation.h   |  1 +
 Source/cmLinkLineDeviceComputer.cxx | 74 +++++++++++++++++++++++++++++++++++++
 Source/cmLinkLineDeviceComputer.h   | 36 ++++++++++++++++++
 4 files changed, 113 insertions(+)
 create mode 100644 Source/cmLinkLineDeviceComputer.cxx
 create mode 100644 Source/cmLinkLineDeviceComputer.h

diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt
index 718b022..e74a032 100644
--- a/Source/CMakeLists.txt
+++ b/Source/CMakeLists.txt
@@ -298,6 +298,8 @@ set(SRCS
   cmLinkItem.h
   cmLinkLineComputer.cxx
   cmLinkLineComputer.h
+  cmLinkLineDeviceComputer.cxx
+  cmLinkLineDeviceComputer.h
   cmListFileCache.cxx
   cmListFileCache.h
   cmListFileLexer.c
diff --git a/Source/cmComputeLinkInformation.h b/Source/cmComputeLinkInformation.h
index 1d29c26..3d26ea7 100644
--- a/Source/cmComputeLinkInformation.h
+++ b/Source/cmComputeLinkInformation.h
@@ -70,6 +70,7 @@ public:
   std::string const& GetRPathLinkFlag() const { return this->RPathLinkFlag; }
   std::string GetRPathLinkString();
 
+  std::string GetConfig() const { return this->Config; }
 private:
   void AddItem(std::string const& item, const cmGeneratorTarget* tgt);
   void AddSharedDepItem(std::string const& item, cmGeneratorTarget const* tgt);
diff --git a/Source/cmLinkLineDeviceComputer.cxx b/Source/cmLinkLineDeviceComputer.cxx
new file mode 100644
index 0000000..75e5ef5
--- /dev/null
+++ b/Source/cmLinkLineDeviceComputer.cxx
@@ -0,0 +1,74 @@
+/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+   file Copyright.txt or https://cmake.org/licensing for details.  */
+
+#include "cmLinkLineDeviceComputer.h"
+#include "cmComputeLinkInformation.h"
+#include "cmGeneratorTarget.h"
+#include "cmGlobalNinjaGenerator.h"
+#include "cmOutputConverter.h"
+
+cmLinkLineDeviceComputer::cmLinkLineDeviceComputer(
+  cmOutputConverter* outputConverter, cmStateDirectory stateDir)
+  : cmLinkLineComputer(outputConverter, stateDir)
+{
+}
+
+cmLinkLineDeviceComputer::~cmLinkLineDeviceComputer()
+{
+}
+
+std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
+  cmComputeLinkInformation& cli, std::string const& stdLibString)
+{
+  // Write the library flags to the build rule.
+  std::ostringstream fout;
+  typedef cmComputeLinkInformation::ItemVector ItemVector;
+  ItemVector const& items = cli.GetItems();
+  std::string config = cli.GetConfig();
+  for (ItemVector::const_iterator li = items.begin(); li != items.end();
+       ++li) {
+    if (!li->Target) {
+      continue;
+    }
+
+    if (li->Target->GetType() == cmStateEnums::INTERFACE_LIBRARY ||
+        li->Target->GetType() == cmStateEnums::SHARED_LIBRARY ||
+        li->Target->GetType() == cmStateEnums::MODULE_LIBRARY) {
+      continue;
+    }
+
+    std::set<std::string> langs;
+    li->Target->GetLanguages(langs, config);
+    if (langs.count("CUDA") == 0) {
+      continue;
+    }
+
+    if (li->IsPath) {
+      fout << this->ConvertToOutputFormat(
+        this->ConvertToLinkReference(li->Value));
+    } else {
+      fout << li->Value;
+    }
+    fout << " ";
+  }
+
+  if (!stdLibString.empty()) {
+    fout << stdLibString << " ";
+  }
+
+  return fout.str();
+}
+
+cmNinjaLinkLineDeviceComputer::cmNinjaLinkLineDeviceComputer(
+  cmOutputConverter* outputConverter, cmStateDirectory stateDir,
+  cmGlobalNinjaGenerator const* gg)
+  : cmLinkLineDeviceComputer(outputConverter, stateDir)
+  , GG(gg)
+{
+}
+
+std::string cmNinjaLinkLineDeviceComputer::ConvertToLinkReference(
+  std::string const& lib) const
+{
+  return GG->ConvertToNinjaPath(lib);
+}
diff --git a/Source/cmLinkLineDeviceComputer.h b/Source/cmLinkLineDeviceComputer.h
new file mode 100644
index 0000000..d1079d7
--- /dev/null
+++ b/Source/cmLinkLineDeviceComputer.h
@@ -0,0 +1,36 @@
+/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+   file Copyright.txt or https://cmake.org/licensing for details.  */
+
+#ifndef cmLinkLineDeviceComputer_h
+#define cmLinkLineDeviceComputer_h
+
+#include "cmLinkLineComputer.h"
+class cmGlobalNinjaGenerator;
+
+class cmLinkLineDeviceComputer : public cmLinkLineComputer
+{
+public:
+  cmLinkLineDeviceComputer(cmOutputConverter* outputConverter,
+                           cmStateDirectory stateDir);
+  ~cmLinkLineDeviceComputer() CM_OVERRIDE;
+
+  std::string ComputeLinkLibraries(cmComputeLinkInformation& cli,
+                                   std::string const& stdLibString)
+    CM_OVERRIDE;
+};
+
+class cmNinjaLinkLineDeviceComputer : public cmLinkLineDeviceComputer
+{
+public:
+  cmNinjaLinkLineDeviceComputer(cmOutputConverter* outputConverter,
+                                cmStateDirectory stateDir,
+                                cmGlobalNinjaGenerator const* gg);
+
+  std::string ConvertToLinkReference(std::string const& input) const
+    CM_OVERRIDE;
+
+private:
+  cmGlobalNinjaGenerator const* GG;
+};
+
+#endif
-- 
cgit v0.12


From 4b316097103e0f30220fd579bce28163d38458bf Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Wed, 9 Nov 2016 15:39:55 -0500
Subject: CUDA: Add support for the CUDA_SEPARABLE_COMPILATION target property

---
 Help/manual/cmake-properties.7.rst           |  1 +
 Help/prop_tgt/CUDA_SEPARABLE_COMPILATION.rst | 13 +++++++++++++
 Source/cmMakefileTargetGenerator.cxx         | 21 +++++++++++++++------
 Source/cmNinjaTargetGenerator.cxx            | 18 +++++++++++++++---
 4 files changed, 44 insertions(+), 9 deletions(-)
 create mode 100644 Help/prop_tgt/CUDA_SEPARABLE_COMPILATION.rst

diff --git a/Help/manual/cmake-properties.7.rst b/Help/manual/cmake-properties.7.rst
index 82d5588..5da00ed 100644
--- a/Help/manual/cmake-properties.7.rst
+++ b/Help/manual/cmake-properties.7.rst
@@ -143,6 +143,7 @@ Properties on Targets
    /prop_tgt/CONFIG_OUTPUT_NAME
    /prop_tgt/CONFIG_POSTFIX
    /prop_tgt/CROSSCOMPILING_EMULATOR
+   /prop_tgt/CUDA_SEPARABLE_COMPILATION
    /prop_tgt/CXX_EXTENSIONS
    /prop_tgt/CXX_STANDARD
    /prop_tgt/CXX_STANDARD_REQUIRED
diff --git a/Help/prop_tgt/CUDA_SEPARABLE_COMPILATION.rst b/Help/prop_tgt/CUDA_SEPARABLE_COMPILATION.rst
new file mode 100644
index 0000000..1c7dd80
--- /dev/null
+++ b/Help/prop_tgt/CUDA_SEPARABLE_COMPILATION.rst
@@ -0,0 +1,13 @@
+CUDA_SEPARABLE_COMPILATION
+--------------------------
+
+CUDA only: Enables separate compilation of device code
+
+If set this will enable separable compilation for all CUDA files for
+the given target.
+
+For instance:
+
+.. code-block:: cmake
+
+  set_property(TARGET myexe PROPERTY CUDA_SEPARABLE_COMPILATION ON)
diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx
index 7dfb70f..2e5173d 100644
--- a/Source/cmMakefileTargetGenerator.cxx
+++ b/Source/cmMakefileTargetGenerator.cxx
@@ -596,13 +596,22 @@ void cmMakefileTargetGenerator::WriteObjectBuildFile(
 
   // Construct the compile rules.
   {
-    std::string compileRuleVar = "CMAKE_";
-    compileRuleVar += lang;
-    compileRuleVar += "_COMPILE_OBJECT";
-    std::string compileRule =
-      this->Makefile->GetRequiredDefinition(compileRuleVar);
     std::vector<std::string> compileCommands;
-    cmSystemTools::ExpandListArgument(compileRule, compileCommands);
+    if (lang == "CUDA") {
+      std::string cmdVar;
+      if (this->GeneratorTarget->GetProperty("CUDA_SEPARABLE_COMPILATION")) {
+        cmdVar = std::string("CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION");
+      } else {
+        cmdVar = std::string("CMAKE_CUDA_COMPILE_WHOLE_COMPILATION");
+      }
+      std::string compileRule = this->Makefile->GetRequiredDefinition(cmdVar);
+      cmSystemTools::ExpandListArgument(compileRule, compileCommands);
+    } else {
+      const std::string cmdVar =
+        std::string("CMAKE_") + lang + "_COMPILE_OBJECT";
+      std::string compileRule = this->Makefile->GetRequiredDefinition(cmdVar);
+      cmSystemTools::ExpandListArgument(compileRule, compileCommands);
+    }
 
     if (this->Makefile->IsOn("CMAKE_EXPORT_COMPILE_COMMANDS") &&
         lang_can_export_cmds && compileCommands.size() == 1) {
diff --git a/Source/cmNinjaTargetGenerator.cxx b/Source/cmNinjaTargetGenerator.cxx
index a220cd8..e47de97 100644
--- a/Source/cmNinjaTargetGenerator.cxx
+++ b/Source/cmNinjaTargetGenerator.cxx
@@ -583,10 +583,22 @@ void cmNinjaTargetGenerator::WriteCompileRule(const std::string& lang)
   }
 
   // Rule for compiling object file.
-  const std::string cmdVar = std::string("CMAKE_") + lang + "_COMPILE_OBJECT";
-  std::string compileCmd = mf->GetRequiredDefinition(cmdVar);
   std::vector<std::string> compileCmds;
-  cmSystemTools::ExpandListArgument(compileCmd, compileCmds);
+  if (lang == "CUDA") {
+    std::string cmdVar;
+    if (this->GeneratorTarget->GetProperty("CUDA_SEPARABLE_COMPILATION")) {
+      cmdVar = std::string("CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION");
+    } else {
+      cmdVar = std::string("CMAKE_CUDA_COMPILE_WHOLE_COMPILATION");
+    }
+    std::string compileCmd = mf->GetRequiredDefinition(cmdVar);
+    cmSystemTools::ExpandListArgument(compileCmd, compileCmds);
+  } else {
+    const std::string cmdVar =
+      std::string("CMAKE_") + lang + "_COMPILE_OBJECT";
+    std::string compileCmd = mf->GetRequiredDefinition(cmdVar);
+    cmSystemTools::ExpandListArgument(compileCmd, compileCmds);
+  }
 
   // Maybe insert an include-what-you-use runner.
   if (!compileCmds.empty() && (lang == "C" || lang == "CXX")) {
-- 
cgit v0.12


From 43ce4414c479af6b04e93decaf7f69938c92a323 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Thu, 13 Oct 2016 11:14:21 -0400
Subject: CUDA: Add separable compilation support to the ninja generator.

---
 Source/cmNinjaNormalTargetGenerator.cxx | 358 ++++++++++++++++++++++++++++++++
 Source/cmNinjaNormalTargetGenerator.h   |  10 +
 2 files changed, 368 insertions(+)

diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx
index a23e0ad..c34e826 100644
--- a/Source/cmNinjaNormalTargetGenerator.cxx
+++ b/Source/cmNinjaNormalTargetGenerator.cxx
@@ -18,6 +18,7 @@
 #include "cmGeneratorTarget.h"
 #include "cmGlobalNinjaGenerator.h"
 #include "cmLinkLineComputer.h"
+#include "cmLinkLineDeviceComputer.h"
 #include "cmLocalGenerator.h"
 #include "cmLocalNinjaGenerator.h"
 #include "cmMakefile.h"
@@ -47,6 +48,7 @@ cmNinjaNormalTargetGenerator::cmNinjaNormalTargetGenerator(
   , TargetNameImport()
   , TargetNamePDB()
   , TargetLinkLanguage("")
+  , DeviceLinkObject()
 {
   this->TargetLinkLanguage = target->GetLinkerLanguage(this->GetConfigName());
   if (target->GetType() == cmStateEnums::EXECUTABLE) {
@@ -94,6 +96,9 @@ void cmNinjaNormalTargetGenerator::Generate()
   if (this->GetGeneratorTarget()->GetType() == cmStateEnums::OBJECT_LIBRARY) {
     this->WriteObjectLibStatement();
   } else {
+    // If this target has cuda language link inputs, and we need to do
+    // device linking
+    this->WriteDeviceLinkStatement();
     this->WriteLinkStatement();
   }
 }
@@ -155,6 +160,14 @@ std::string cmNinjaNormalTargetGenerator::LanguageLinkerRule() const
                     this->GetGeneratorTarget()->GetName());
 }
 
+std::string cmNinjaNormalTargetGenerator::LanguageLinkerDeviceRule() const
+{
+  return this->TargetLinkLanguage + "_" +
+    cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()) +
+    "_DEVICE_LINKER__" + cmGlobalNinjaGenerator::EncodeRuleName(
+                           this->GetGeneratorTarget()->GetName());
+}
+
 struct cmNinjaRemoveNoOpCommands
 {
   bool operator()(std::string const& cmd)
@@ -163,6 +176,115 @@ struct cmNinjaRemoveNoOpCommands
   }
 };
 
+void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(bool useResponseFile)
+{
+  cmStateEnums::TargetType targetType = this->GetGeneratorTarget()->GetType();
+  std::string ruleName = this->LanguageLinkerDeviceRule();
+  // Select whether to use a response file for objects.
+  std::string rspfile;
+  std::string rspcontent;
+
+  if (!this->GetGlobalGenerator()->HasRule(ruleName)) {
+    cmRulePlaceholderExpander::RuleVariables vars;
+    vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str();
+    vars.CMTargetType =
+      cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType());
+
+    vars.Language = "CUDA";
+
+    std::string responseFlag;
+    if (!useResponseFile) {
+      vars.Objects = "$in";
+      vars.LinkLibraries = "$LINK_LIBRARIES";
+    } else {
+      std::string cmakeVarLang = "CMAKE_";
+      cmakeVarLang += this->TargetLinkLanguage;
+
+      // build response file name
+      std::string cmakeLinkVar = cmakeVarLang + "_RESPONSE_FILE_LINK_FLAG";
+      const char* flag = GetMakefile()->GetDefinition(cmakeLinkVar);
+      if (flag) {
+        responseFlag = flag;
+      } else {
+        responseFlag = "@";
+      }
+      rspfile = "$RSP_FILE";
+      responseFlag += rspfile;
+
+      // build response file content
+      if (this->GetGlobalGenerator()->IsGCCOnWindows()) {
+        rspcontent = "$in";
+      } else {
+        rspcontent = "$in_newline";
+      }
+      rspcontent += " $LINK_LIBRARIES";
+      vars.Objects = responseFlag.c_str();
+      vars.LinkLibraries = "";
+    }
+
+    vars.ObjectDir = "$OBJECT_DIR";
+
+    vars.Target = "$TARGET_FILE";
+
+    vars.SONameFlag = "$SONAME_FLAG";
+    vars.TargetSOName = "$SONAME";
+    vars.TargetPDB = "$TARGET_PDB";
+
+    vars.Flags = "$FLAGS";
+    vars.LinkFlags = "$LINK_FLAGS";
+    vars.Manifests = "$MANIFESTS";
+
+    std::string langFlags;
+    if (targetType != cmStateEnums::EXECUTABLE) {
+      langFlags += "$LANGUAGE_COMPILE_FLAGS $ARCH_FLAGS";
+      vars.LanguageCompileFlags = langFlags.c_str();
+    }
+
+    std::string launcher;
+    const char* val = this->GetLocalGenerator()->GetRuleLauncher(
+      this->GetGeneratorTarget(), "RULE_LAUNCH_LINK");
+    if (val && *val) {
+      launcher = val;
+      launcher += " ";
+    }
+
+    CM_AUTO_PTR<cmRulePlaceholderExpander> rulePlaceholderExpander(
+      this->GetLocalGenerator()->CreateRulePlaceholderExpander());
+
+    // Rule for linking library/executable.
+    std::vector<std::string> linkCmds = this->ComputeDeviceLinkCmd();
+    for (std::vector<std::string>::iterator i = linkCmds.begin();
+         i != linkCmds.end(); ++i) {
+      *i = launcher + *i;
+      rulePlaceholderExpander->ExpandRuleVariables(this->GetLocalGenerator(),
+                                                   *i, vars);
+    }
+    {
+      // If there is no ranlib the command will be ":".  Skip it.
+      std::vector<std::string>::iterator newEnd = std::remove_if(
+        linkCmds.begin(), linkCmds.end(), cmNinjaRemoveNoOpCommands());
+      linkCmds.erase(newEnd, linkCmds.end());
+    }
+
+    std::string linkCmd =
+      this->GetLocalGenerator()->BuildCommandLine(linkCmds);
+
+    // Write the linker rule with response file if needed.
+    std::ostringstream comment;
+    comment << "Rule for linking " << this->TargetLinkLanguage << " "
+            << this->GetVisibleTypeName() << ".";
+    std::ostringstream description;
+    description << "Linking " << this->TargetLinkLanguage << " "
+                << this->GetVisibleTypeName() << " $TARGET_FILE";
+    this->GetGlobalGenerator()->AddRule(ruleName, linkCmd, description.str(),
+                                        comment.str(),
+                                        /*depfile*/ "",
+                                        /*deptype*/ "", rspfile, rspcontent,
+                                        /*restat*/ "$RESTAT",
+                                        /*generator*/ false);
+  }
+}
+
 void cmNinjaNormalTargetGenerator::WriteLinkRule(bool useResponseFile)
 {
   cmStateEnums::TargetType targetType = this->GetGeneratorTarget()->GetType();
@@ -327,6 +449,32 @@ void cmNinjaNormalTargetGenerator::WriteLinkRule(bool useResponseFile)
   }
 }
 
+std::vector<std::string> cmNinjaNormalTargetGenerator::ComputeDeviceLinkCmd()
+{
+  std::vector<std::string> linkCmds;
+
+  // this target requires separable cuda compilation
+  // now build the correct command depending on if the target is
+  // an executable or a dynamic library.
+  std::string linkCmd;
+  switch (this->GetGeneratorTarget()->GetType()) {
+    case cmStateEnums::SHARED_LIBRARY:
+    case cmStateEnums::MODULE_LIBRARY: {
+      const std::string cudaLinkCmd(
+        this->GetMakefile()->GetDefinition("CMAKE_CUDA_DEVICE_LINK_LIBRARY"));
+      cmSystemTools::ExpandListArgument(cudaLinkCmd, linkCmds);
+    } break;
+    case cmStateEnums::EXECUTABLE: {
+      const std::string cudaLinkCmd(this->GetMakefile()->GetDefinition(
+        "CMAKE_CUDA_DEVICE_LINK_EXECUTABLE"));
+      cmSystemTools::ExpandListArgument(cudaLinkCmd, linkCmds);
+    } break;
+    default:
+      break;
+  }
+  return linkCmds;
+}
+
 std::vector<std::string> cmNinjaNormalTargetGenerator::ComputeLinkCmd()
 {
   std::vector<std::string> linkCmds;
@@ -418,6 +566,211 @@ static int calculateCommandLineLengthLimit(int linkRuleLength)
   return sz - linkRuleLength;
 }
 
+void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement()
+{
+  cmGeneratorTarget& genTarget = *this->GetGeneratorTarget();
+
+  // determine if we need to do any device linking for this target
+  const std::string cuda_lang("CUDA");
+  cmGeneratorTarget::LinkClosure const* closure =
+    genTarget.GetLinkClosure(this->GetConfigName());
+
+  const bool hasCUDA =
+    (std::find(closure->Languages.begin(), closure->Languages.end(),
+               cuda_lang) != closure->Languages.end());
+
+  bool shouldHaveDeviceLinking = false;
+  switch (genTarget.GetType()) {
+    case cmStateEnums::SHARED_LIBRARY:
+    case cmStateEnums::MODULE_LIBRARY:
+    case cmStateEnums::EXECUTABLE:
+      shouldHaveDeviceLinking = true;
+      break;
+    default:
+      break;
+  }
+
+  if (!shouldHaveDeviceLinking || !hasCUDA) {
+    return;
+  }
+
+  // Now we can do device linking
+
+  // First and very important step is to make sure while inside this
+  // step our link language is set to CUDA
+  std::string cudaLinkLanguage = "CUDA";
+
+  std::string const cfgName = this->GetConfigName();
+  std::string const targetOutputReal =
+    ConvertToNinjaPath(genTarget.ObjectDirectory + "cmake_device_link.o");
+
+  std::string const targetOutputImplib =
+    ConvertToNinjaPath(genTarget.GetFullPath(cfgName,
+                                             /*implib=*/true));
+
+  this->DeviceLinkObject = targetOutputReal;
+
+  // Write comments.
+  cmGlobalNinjaGenerator::WriteDivider(this->GetBuildFileStream());
+  const cmStateEnums::TargetType targetType = genTarget.GetType();
+  this->GetBuildFileStream() << "# Device Link build statements for "
+                             << cmState::GetTargetTypeName(targetType)
+                             << " target " << this->GetTargetName() << "\n\n";
+
+  // Compute the comment.
+  std::ostringstream comment;
+  comment << "Link the " << this->GetVisibleTypeName() << " "
+          << targetOutputReal;
+
+  cmNinjaDeps emptyDeps;
+  cmNinjaVars vars;
+
+  // Compute outputs.
+  cmNinjaDeps outputs;
+  outputs.push_back(targetOutputReal);
+  // Compute specific libraries to link with.
+  cmNinjaDeps explicitDeps = this->GetObjects();
+  cmNinjaDeps implicitDeps = this->ComputeLinkDeps();
+
+  std::string frameworkPath;
+  std::string linkPath;
+
+  std::string createRule = genTarget.GetCreateRuleVariable(
+    this->TargetLinkLanguage, this->GetConfigName());
+  const bool useWatcomQuote =
+    this->GetMakefile()->IsOn(createRule + "_USE_WATCOM_QUOTE");
+  cmLocalNinjaGenerator& localGen = *this->GetLocalGenerator();
+
+  vars["TARGET_FILE"] =
+    localGen.ConvertToOutputFormat(targetOutputReal, cmOutputConverter::SHELL);
+
+  CM_AUTO_PTR<cmLinkLineComputer> linkLineComputer(
+    new cmNinjaLinkLineDeviceComputer(
+      this->GetLocalGenerator(),
+      this->GetLocalGenerator()->GetStateSnapshot().GetDirectory(),
+      this->GetGlobalGenerator()));
+  linkLineComputer->SetUseWatcomQuote(useWatcomQuote);
+
+  localGen.GetTargetFlags(
+    linkLineComputer.get(), this->GetConfigName(), vars["LINK_LIBRARIES"],
+    vars["FLAGS"], vars["LINK_FLAGS"], frameworkPath, linkPath, &genTarget);
+
+  this->addPoolNinjaVariable("JOB_POOL_LINK", &genTarget, vars);
+
+  this->AddModuleDefinitionFlag(linkLineComputer.get(), vars["LINK_FLAGS"]);
+  vars["LINK_FLAGS"] =
+    cmGlobalNinjaGenerator::EncodeLiteral(vars["LINK_FLAGS"]);
+
+  vars["MANIFESTS"] = this->GetManifests();
+
+  vars["LINK_PATH"] = frameworkPath + linkPath;
+
+  // Compute architecture specific link flags.  Yes, these go into a different
+  // variable for executables, probably due to a mistake made when duplicating
+  // code between the Makefile executable and library generators.
+  if (targetType == cmStateEnums::EXECUTABLE) {
+    std::string t = vars["FLAGS"];
+    localGen.AddArchitectureFlags(t, &genTarget, cudaLinkLanguage, cfgName);
+    vars["FLAGS"] = t;
+  } else {
+    std::string t = vars["ARCH_FLAGS"];
+    localGen.AddArchitectureFlags(t, &genTarget, cudaLinkLanguage, cfgName);
+    vars["ARCH_FLAGS"] = t;
+    t = "";
+    localGen.AddLanguageFlags(t, cudaLinkLanguage, cfgName);
+    vars["LANGUAGE_COMPILE_FLAGS"] = t;
+  }
+  if (this->GetGeneratorTarget()->HasSOName(cfgName)) {
+    vars["SONAME_FLAG"] =
+      this->GetMakefile()->GetSONameFlag(this->TargetLinkLanguage);
+    vars["SONAME"] = this->TargetNameSO;
+    if (targetType == cmStateEnums::SHARED_LIBRARY) {
+      std::string install_dir =
+        this->GetGeneratorTarget()->GetInstallNameDirForBuildTree(cfgName);
+      if (!install_dir.empty()) {
+        vars["INSTALLNAME_DIR"] = localGen.ConvertToOutputFormat(
+          install_dir, cmOutputConverter::SHELL);
+      }
+    }
+  }
+
+  cmNinjaDeps byproducts;
+
+  if (!this->TargetNameImport.empty()) {
+    const std::string impLibPath = localGen.ConvertToOutputFormat(
+      targetOutputImplib, cmOutputConverter::SHELL);
+    vars["TARGET_IMPLIB"] = impLibPath;
+    EnsureParentDirectoryExists(impLibPath);
+    if (genTarget.HasImportLibrary()) {
+      byproducts.push_back(targetOutputImplib);
+    }
+  }
+
+  const std::string objPath = GetGeneratorTarget()->GetSupportDirectory();
+  vars["OBJECT_DIR"] = this->GetLocalGenerator()->ConvertToOutputFormat(
+    this->ConvertToNinjaPath(objPath), cmOutputConverter::SHELL);
+  EnsureDirectoryExists(objPath);
+
+  if (this->GetGlobalGenerator()->IsGCCOnWindows()) {
+    // ar.exe can't handle backslashes in rsp files (implicitly used by gcc)
+    std::string& linkLibraries = vars["LINK_LIBRARIES"];
+    std::replace(linkLibraries.begin(), linkLibraries.end(), '\\', '/');
+    std::string& link_path = vars["LINK_PATH"];
+    std::replace(link_path.begin(), link_path.end(), '\\', '/');
+  }
+
+  const std::vector<cmCustomCommand>* cmdLists[3] = {
+    &genTarget.GetPreBuildCommands(), &genTarget.GetPreLinkCommands(),
+    &genTarget.GetPostBuildCommands()
+  };
+
+  std::vector<std::string> preLinkCmdLines, postBuildCmdLines;
+  vars["PRE_LINK"] = localGen.BuildCommandLine(preLinkCmdLines);
+  vars["POST_BUILD"] = localGen.BuildCommandLine(postBuildCmdLines);
+
+  std::vector<std::string>* cmdLineLists[3] = { &preLinkCmdLines,
+                                                &preLinkCmdLines,
+                                                &postBuildCmdLines };
+
+  for (unsigned i = 0; i != 3; ++i) {
+    for (std::vector<cmCustomCommand>::const_iterator ci =
+           cmdLists[i]->begin();
+         ci != cmdLists[i]->end(); ++ci) {
+      cmCustomCommandGenerator ccg(*ci, cfgName, this->GetLocalGenerator());
+      localGen.AppendCustomCommandLines(ccg, *cmdLineLists[i]);
+      std::vector<std::string> const& ccByproducts = ccg.GetByproducts();
+      std::transform(ccByproducts.begin(), ccByproducts.end(),
+                     std::back_inserter(byproducts), MapToNinjaPath());
+    }
+  }
+
+  cmGlobalNinjaGenerator& globalGen = *this->GetGlobalGenerator();
+
+  int commandLineLengthLimit = -1;
+  if (!this->ForceResponseFile()) {
+    commandLineLengthLimit = calculateCommandLineLengthLimit(
+      globalGen.GetRuleCmdLength(this->LanguageLinkerDeviceRule()));
+  }
+
+  const std::string rspfile =
+    std::string(cmake::GetCMakeFilesDirectoryPostSlash()) +
+    genTarget.GetName() + ".rsp";
+
+  // Gather order-only dependencies.
+  cmNinjaDeps orderOnlyDeps;
+  this->GetLocalGenerator()->AppendTargetDepends(this->GetGeneratorTarget(),
+                                                 orderOnlyDeps);
+
+  // Write the build statement for this target.
+  bool usedResponseFile = false;
+  globalGen.WriteBuild(this->GetBuildFileStream(), comment.str(),
+                       this->LanguageLinkerDeviceRule(), outputs,
+                       /*implicitOuts=*/cmNinjaDeps(), explicitDeps,
+                       implicitDeps, orderOnlyDeps, vars, rspfile,
+                       commandLineLengthLimit, &usedResponseFile);
+  this->WriteDeviceLinkRule(usedResponseFile);
+}
+
 void cmNinjaNormalTargetGenerator::WriteLinkStatement()
 {
   cmGeneratorTarget& gt = *this->GetGeneratorTarget();
@@ -478,6 +831,10 @@ void cmNinjaNormalTargetGenerator::WriteLinkStatement()
   cmNinjaDeps explicitDeps = this->GetObjects();
   cmNinjaDeps implicitDeps = this->ComputeLinkDeps();
 
+  if (!this->DeviceLinkObject.empty()) {
+    explicitDeps.push_back(this->DeviceLinkObject);
+  }
+
   cmMakefile* mf = this->GetMakefile();
 
   std::string frameworkPath;
@@ -501,6 +858,7 @@ void cmNinjaNormalTargetGenerator::WriteLinkStatement()
   localGen.GetTargetFlags(
     linkLineComputer.get(), this->GetConfigName(), vars["LINK_LIBRARIES"],
     vars["FLAGS"], vars["LINK_FLAGS"], frameworkPath, linkPath, &genTarget);
+
   if (this->GetMakefile()->IsOn("CMAKE_SUPPORT_WINDOWS_EXPORT_ALL_SYMBOLS") &&
       (gt.GetType() == cmStateEnums::SHARED_LIBRARY ||
        gt.IsExecutableWithExports())) {
diff --git a/Source/cmNinjaNormalTargetGenerator.h b/Source/cmNinjaNormalTargetGenerator.h
index 5bd906f..e5595ea 100644
--- a/Source/cmNinjaNormalTargetGenerator.h
+++ b/Source/cmNinjaNormalTargetGenerator.h
@@ -22,12 +22,21 @@ public:
 
 private:
   std::string LanguageLinkerRule() const;
+  std::string LanguageLinkerDeviceRule() const;
+
   const char* GetVisibleTypeName() const;
   void WriteLanguagesRules();
+
   void WriteLinkRule(bool useResponseFile);
+  void WriteDeviceLinkRule(bool useResponseFile);
+
   void WriteLinkStatement();
+  void WriteDeviceLinkStatement();
+
   void WriteObjectLibStatement();
+
   std::vector<std::string> ComputeLinkCmd();
+  std::vector<std::string> ComputeDeviceLinkCmd();
 
 private:
   // Target name info.
@@ -37,6 +46,7 @@ private:
   std::string TargetNameImport;
   std::string TargetNamePDB;
   std::string TargetLinkLanguage;
+  std::string DeviceLinkObject;
 };
 
 #endif // ! cmNinjaNormalTargetGenerator_h
-- 
cgit v0.12


From d038559e49b0d0dd65124370f48a92ba3de9006c Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Fri, 21 Oct 2016 14:17:42 -0400
Subject: CUDA: Add separable compilation support to the makefile generator.

---
 Source/cmMakefileExecutableTargetGenerator.cxx | 227 +++++++++++++++++++++++++
 Source/cmMakefileExecutableTargetGenerator.h   |   4 +
 Source/cmMakefileLibraryTargetGenerator.cxx    | 222 ++++++++++++++++++++++++
 Source/cmMakefileLibraryTargetGenerator.h      |   6 +
 4 files changed, 459 insertions(+)

diff --git a/Source/cmMakefileExecutableTargetGenerator.cxx b/Source/cmMakefileExecutableTargetGenerator.cxx
index 358804e..069011d 100644
--- a/Source/cmMakefileExecutableTargetGenerator.cxx
+++ b/Source/cmMakefileExecutableTargetGenerator.cxx
@@ -10,6 +10,7 @@
 #include "cmGeneratorTarget.h"
 #include "cmGlobalUnixMakefileGenerator3.h"
 #include "cmLinkLineComputer.h"
+#include "cmLinkLineDeviceComputer.h"
 #include "cmLocalGenerator.h"
 #include "cmLocalUnixMakefileGenerator3.h"
 #include "cmMakefile.h"
@@ -56,6 +57,9 @@ void cmMakefileExecutableTargetGenerator::WriteRuleFiles()
   // write in rules for object files and custom commands
   this->WriteTargetBuildRules();
 
+  // write the device link rules
+  this->WriteDeviceExecutableRule(false);
+
   // write the link rules
   this->WriteExecutableRule(false);
   if (this->GeneratorTarget->NeedRelinkBeforeInstall(this->ConfigName)) {
@@ -77,6 +81,218 @@ void cmMakefileExecutableTargetGenerator::WriteRuleFiles()
   this->CloseFileStreams();
 }
 
+void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
+  bool relink)
+{
+#ifdef CMAKE_BUILD_WITH_CMAKE
+  const std::string cuda_lang("CUDA");
+  cmGeneratorTarget::LinkClosure const* closure =
+    this->GeneratorTarget->GetLinkClosure(this->ConfigName);
+
+  const bool hasCUDA =
+    (std::find(closure->Languages.begin(), closure->Languages.end(),
+               cuda_lang) != closure->Languages.end());
+  if (!hasCUDA) {
+    return;
+  }
+
+  std::vector<std::string> commands;
+
+  // Build list of dependencies.
+  std::vector<std::string> depends;
+  this->AppendLinkDepends(depends);
+
+  // Get the language to use for linking this library.
+  std::string linkLanguage = "CUDA";
+
+  // Get the name of the device object to generate.
+  std::string const targetOutputReal =
+    this->GeneratorTarget->ObjectDirectory + "cmake_device_link.o";
+  this->DeviceLinkObject = targetOutputReal;
+
+  this->NumberOfProgressActions++;
+  if (!this->NoRuleMessages) {
+    cmLocalUnixMakefileGenerator3::EchoProgress progress;
+    this->MakeEchoProgress(progress);
+    // Add the link message.
+    std::string buildEcho = "Linking ";
+    buildEcho += linkLanguage;
+    buildEcho += " device code ";
+    buildEcho += targetOutputReal;
+    this->LocalGenerator->AppendEcho(
+      commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
+  }
+
+  // Build a list of compiler flags and linker flags.
+  std::string flags;
+  std::string linkFlags;
+
+  // Add flags to create an executable.
+  // Add symbol export flags if necessary.
+  if (this->GeneratorTarget->IsExecutableWithExports()) {
+    std::string export_flag_var = "CMAKE_EXE_EXPORTS_";
+    export_flag_var += linkLanguage;
+    export_flag_var += "_FLAG";
+    this->LocalGenerator->AppendFlags(
+      linkFlags, this->Makefile->GetDefinition(export_flag_var));
+  }
+
+  this->LocalGenerator->AppendFlags(linkFlags,
+                                    this->LocalGenerator->GetLinkLibsCMP0065(
+                                      linkLanguage, *this->GeneratorTarget));
+
+  // Add language feature flags.
+  this->AddFeatureFlags(flags, linkLanguage);
+
+  this->LocalGenerator->AddArchitectureFlags(flags, this->GeneratorTarget,
+                                             linkLanguage, this->ConfigName);
+
+  // Add target-specific linker flags.
+  this->LocalGenerator->AppendFlags(
+    linkFlags, this->GeneratorTarget->GetProperty("LINK_FLAGS"));
+  std::string linkFlagsConfig = "LINK_FLAGS_";
+  linkFlagsConfig += cmSystemTools::UpperCase(this->ConfigName);
+  this->LocalGenerator->AppendFlags(
+    linkFlags, this->GeneratorTarget->GetProperty(linkFlagsConfig));
+
+  {
+    CM_AUTO_PTR<cmLinkLineComputer> linkLineComputer(
+      this->CreateLinkLineComputer(
+        this->LocalGenerator,
+        this->LocalGenerator->GetStateSnapshot().GetDirectory()));
+
+    this->AddModuleDefinitionFlag(linkLineComputer.get(), linkFlags);
+  }
+
+  // Construct a list of files associated with this executable that
+  // may need to be cleaned.
+  std::vector<std::string> exeCleanFiles;
+  exeCleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
+    this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal));
+
+  // Determine whether a link script will be used.
+  bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
+
+  // Construct the main link rule.
+  std::vector<std::string> real_link_commands;
+  const std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_EXECUTABLE";
+  const std::string linkRule = this->GetLinkRule(linkRuleVar);
+  std::vector<std::string> commands1;
+  cmSystemTools::ExpandListArgument(linkRule, real_link_commands);
+
+  bool useResponseFileForObjects =
+    this->CheckUseResponseFileForObjects(linkLanguage);
+  bool const useResponseFileForLibs =
+    this->CheckUseResponseFileForLibraries(linkLanguage);
+
+  // Expand the rule variables.
+  {
+    bool useWatcomQuote =
+      this->Makefile->IsOn(linkRuleVar + "_USE_WATCOM_QUOTE");
+
+    // Set path conversion for link script shells.
+    this->LocalGenerator->SetLinkScriptShell(useLinkScript);
+
+    CM_AUTO_PTR<cmLinkLineComputer> linkLineComputer(
+      new cmLinkLineDeviceComputer(
+        this->LocalGenerator,
+        this->LocalGenerator->GetStateSnapshot().GetDirectory()));
+    linkLineComputer->SetForResponse(useResponseFileForLibs);
+    linkLineComputer->SetUseWatcomQuote(useWatcomQuote);
+    linkLineComputer->SetRelink(relink);
+
+    // Collect up flags to link in needed libraries.
+    std::string linkLibs;
+    this->CreateLinkLibs(linkLineComputer.get(), linkLibs,
+                         useResponseFileForLibs, depends);
+
+    // Construct object file lists that may be needed to expand the
+    // rule.
+    std::string buildObjs;
+    this->CreateObjectLists(useLinkScript, false, useResponseFileForObjects,
+                            buildObjs, depends, useWatcomQuote);
+
+    cmRulePlaceholderExpander::RuleVariables vars;
+    std::string objectDir = this->GeneratorTarget->GetSupportDirectory();
+
+    objectDir = this->LocalGenerator->ConvertToOutputFormat(
+      this->LocalGenerator->MaybeConvertToRelativePath(
+        this->LocalGenerator->GetCurrentBinaryDirectory(), objectDir),
+      cmOutputConverter::SHELL);
+
+    cmOutputConverter::OutputFormat output = (useWatcomQuote)
+      ? cmOutputConverter::WATCOMQUOTE
+      : cmOutputConverter::SHELL;
+    std::string target = this->LocalGenerator->ConvertToOutputFormat(
+      this->LocalGenerator->MaybeConvertToRelativePath(
+        this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal),
+      output);
+
+    vars.Language = linkLanguage.c_str();
+    vars.Objects = buildObjs.c_str();
+    vars.ObjectDir = objectDir.c_str();
+    vars.Target = target.c_str();
+    vars.LinkLibraries = linkLibs.c_str();
+    vars.Flags = flags.c_str();
+    vars.LinkFlags = linkFlags.c_str();
+
+    std::string launcher;
+
+    const char* val = this->LocalGenerator->GetRuleLauncher(
+      this->GeneratorTarget, "RULE_LAUNCH_LINK");
+    if (val && *val) {
+      launcher = val;
+      launcher += " ";
+    }
+
+    CM_AUTO_PTR<cmRulePlaceholderExpander> rulePlaceholderExpander(
+      this->LocalGenerator->CreateRulePlaceholderExpander());
+
+    // Expand placeholders in the commands.
+    rulePlaceholderExpander->SetTargetImpLib(targetOutputReal);
+    for (std::vector<std::string>::iterator i = real_link_commands.begin();
+         i != real_link_commands.end(); ++i) {
+      *i = launcher + *i;
+      rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator, *i,
+                                                   vars);
+    }
+
+    // Restore path conversion to normal shells.
+    this->LocalGenerator->SetLinkScriptShell(false);
+  }
+
+  // Optionally convert the build rule to use a script to avoid long
+  // command lines in the make shell.
+  if (useLinkScript) {
+    // Use a link script.
+    const char* name = (relink ? "drelink.txt" : "dlink.txt");
+    this->CreateLinkScript(name, real_link_commands, commands1, depends);
+  } else {
+    // No link script.  Just use the link rule directly.
+    commands1 = real_link_commands;
+  }
+  this->LocalGenerator->CreateCDCommand(
+    commands1, this->Makefile->GetCurrentBinaryDirectory(),
+    this->LocalGenerator->GetBinaryDirectory());
+  commands.insert(commands.end(), commands1.begin(), commands1.end());
+  commands1.clear();
+
+  // Write the build rule.
+  this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, CM_NULLPTR,
+                                      targetOutputReal, depends, commands,
+                                      false);
+
+  // Write the main driver rule to build everything in this target.
+  this->WriteTargetDriverRule(targetOutputReal, relink);
+
+  // Clean all the possible executable names and symlinks.
+  this->CleanFiles.insert(this->CleanFiles.end(), exeCleanFiles.begin(),
+                          exeCleanFiles.end());
+#else
+  static_cast<void>(relink);
+#endif
+}
+
 void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink)
 {
   std::vector<std::string> commands;
@@ -84,6 +300,9 @@ void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink)
   // Build list of dependencies.
   std::vector<std::string> depends;
   this->AppendLinkDepends(depends);
+  if (!this->DeviceLinkObject.empty()) {
+    depends.push_back(this->DeviceLinkObject);
+  }
 
   // Get the name of the executable to generate.
   std::string targetName;
@@ -327,6 +546,14 @@ void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink)
     std::string buildObjs;
     this->CreateObjectLists(useLinkScript, false, useResponseFileForObjects,
                             buildObjs, depends, useWatcomQuote);
+    if (!this->DeviceLinkObject.empty()) {
+      buildObjs += " " +
+        this->LocalGenerator->ConvertToOutputFormat(
+          this->LocalGenerator->MaybeConvertToRelativePath(
+            this->LocalGenerator->GetCurrentBinaryDirectory(),
+            this->DeviceLinkObject),
+          cmOutputConverter::SHELL);
+    }
 
     // maybe create .def file from list of objects
     if (this->GeneratorTarget->IsExecutableWithExports() &&
diff --git a/Source/cmMakefileExecutableTargetGenerator.h b/Source/cmMakefileExecutableTargetGenerator.h
index 36cfe40..642182b 100644
--- a/Source/cmMakefileExecutableTargetGenerator.h
+++ b/Source/cmMakefileExecutableTargetGenerator.h
@@ -21,6 +21,10 @@ public:
 
 protected:
   virtual void WriteExecutableRule(bool relink);
+  virtual void WriteDeviceExecutableRule(bool relink);
+
+private:
+  std::string DeviceLinkObject;
 };
 
 #endif
diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx
index c591bb3..2b0e1b1 100644
--- a/Source/cmMakefileLibraryTargetGenerator.cxx
+++ b/Source/cmMakefileLibraryTargetGenerator.cxx
@@ -11,6 +11,7 @@
 #include "cmGeneratorTarget.h"
 #include "cmGlobalUnixMakefileGenerator3.h"
 #include "cmLinkLineComputer.h"
+#include "cmLinkLineDeviceComputer.h"
 #include "cmLocalGenerator.h"
 #include "cmLocalUnixMakefileGenerator3.h"
 #include "cmMakefile.h"
@@ -151,6 +152,24 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
     this->WriteFrameworkRules(relink);
     return;
   }
+
+  if (!relink) {
+    const std::string cuda_lang("CUDA");
+    cmGeneratorTarget::LinkClosure const* closure =
+      this->GeneratorTarget->GetLinkClosure(this->ConfigName);
+
+    const bool hasCUDA =
+      (std::find(closure->Languages.begin(), closure->Languages.end(),
+                 cuda_lang) != closure->Languages.end());
+    if (hasCUDA) {
+      std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
+      std::string extraFlags;
+      this->LocalGenerator->AppendFlags(
+        extraFlags, this->GeneratorTarget->GetProperty("LINK_FLAGS"));
+      this->WriteDeviceLibraryRules(linkRuleVar, extraFlags, relink);
+    }
+  }
+
   std::string linkLanguage =
     this->GeneratorTarget->GetLinkerLanguage(this->ConfigName);
   std::string linkRuleVar = "CMAKE_";
@@ -183,6 +202,24 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
 
 void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink)
 {
+
+  if (!relink) {
+    const std::string cuda_lang("CUDA");
+    cmGeneratorTarget::LinkClosure const* closure =
+      this->GeneratorTarget->GetLinkClosure(this->ConfigName);
+
+    const bool hasCUDA =
+      (std::find(closure->Languages.begin(), closure->Languages.end(),
+                 cuda_lang) != closure->Languages.end());
+    if (hasCUDA) {
+      std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
+      std::string extraFlags;
+      this->LocalGenerator->AppendFlags(
+        extraFlags, this->GeneratorTarget->GetProperty("LINK_FLAGS"));
+      this->WriteDeviceLibraryRules(linkRuleVar, extraFlags, relink);
+    }
+  }
+
   std::string linkLanguage =
     this->GeneratorTarget->GetLinkerLanguage(this->ConfigName);
   std::string linkRuleVar = "CMAKE_";
@@ -230,6 +267,180 @@ void cmMakefileLibraryTargetGenerator::WriteFrameworkRules(bool relink)
   this->WriteLibraryRules(linkRuleVar, extraFlags, relink);
 }
 
+void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
+  const std::string& linkRuleVar, const std::string& extraFlags, bool relink)
+{
+#ifdef CMAKE_BUILD_WITH_CMAKE
+  // TODO: Merge the methods that call this method to avoid
+  // code duplication.
+  std::vector<std::string> commands;
+
+  // Build list of dependencies.
+  std::vector<std::string> depends;
+  this->AppendLinkDepends(depends);
+
+  // Get the language to use for linking this library.
+  std::string linkLanguage = "CUDA";
+
+  // Create set of linking flags.
+  std::string linkFlags;
+  this->LocalGenerator->AppendFlags(linkFlags, extraFlags);
+
+  // Get the name of the device object to generate.
+  std::string const targetOutputReal =
+    this->GeneratorTarget->ObjectDirectory + "cmake_device_link.o";
+  this->DeviceLinkObject = targetOutputReal;
+
+  this->NumberOfProgressActions++;
+  if (!this->NoRuleMessages) {
+    cmLocalUnixMakefileGenerator3::EchoProgress progress;
+    this->MakeEchoProgress(progress);
+    // Add the link message.
+    std::string buildEcho = "Linking " + linkLanguage + " device code";
+    buildEcho += targetOutputReal;
+    this->LocalGenerator->AppendEcho(
+      commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
+  }
+  // Clean files associated with this library.
+  std::vector<std::string> libCleanFiles;
+  libCleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
+    this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal));
+
+  // Determine whether a link script will be used.
+  bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
+
+  bool useResponseFileForObjects =
+    this->CheckUseResponseFileForObjects(linkLanguage);
+  bool const useResponseFileForLibs =
+    this->CheckUseResponseFileForLibraries(linkLanguage);
+
+  cmRulePlaceholderExpander::RuleVariables vars;
+  vars.Language = linkLanguage.c_str();
+
+  // Expand the rule variables.
+  std::vector<std::string> real_link_commands;
+  {
+    bool useWatcomQuote =
+      this->Makefile->IsOn(linkRuleVar + "_USE_WATCOM_QUOTE");
+
+    // Set path conversion for link script shells.
+    this->LocalGenerator->SetLinkScriptShell(useLinkScript);
+
+    // Collect up flags to link in needed libraries.
+    std::string linkLibs;
+    if (this->GeneratorTarget->GetType() != cmStateEnums::STATIC_LIBRARY) {
+
+      CM_AUTO_PTR<cmLinkLineComputer> linkLineComputer(
+        new cmLinkLineDeviceComputer(
+          this->LocalGenerator,
+          this->LocalGenerator->GetStateSnapshot().GetDirectory()));
+      linkLineComputer->SetForResponse(useResponseFileForLibs);
+      linkLineComputer->SetUseWatcomQuote(useWatcomQuote);
+      linkLineComputer->SetRelink(relink);
+
+      this->CreateLinkLibs(linkLineComputer.get(), linkLibs,
+                           useResponseFileForLibs, depends);
+    }
+
+    // Construct object file lists that may be needed to expand the
+    // rule.
+    std::string buildObjs;
+    this->CreateObjectLists(useLinkScript, false, // useArchiveRules
+                            useResponseFileForObjects, buildObjs, depends,
+                            useWatcomQuote);
+
+    cmOutputConverter::OutputFormat output = (useWatcomQuote)
+      ? cmOutputConverter::WATCOMQUOTE
+      : cmOutputConverter::SHELL;
+
+    std::string objectDir = this->GeneratorTarget->GetSupportDirectory();
+    objectDir = this->LocalGenerator->ConvertToOutputFormat(
+      this->LocalGenerator->MaybeConvertToRelativePath(
+        this->LocalGenerator->GetCurrentBinaryDirectory(), objectDir),
+      cmOutputConverter::SHELL);
+
+    std::string target = this->LocalGenerator->ConvertToOutputFormat(
+      this->LocalGenerator->MaybeConvertToRelativePath(
+        this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal),
+      output);
+
+    vars.Objects = buildObjs.c_str();
+    vars.ObjectDir = objectDir.c_str();
+    vars.Target = target.c_str();
+    vars.LinkLibraries = linkLibs.c_str();
+    vars.ObjectsQuoted = buildObjs.c_str();
+    vars.LinkFlags = linkFlags.c_str();
+
+    // Add language feature flags.
+    std::string langFlags;
+    this->AddFeatureFlags(langFlags, linkLanguage);
+
+    vars.LanguageCompileFlags = langFlags.c_str();
+
+    std::string launcher;
+    const char* val = this->LocalGenerator->GetRuleLauncher(
+      this->GeneratorTarget, "RULE_LAUNCH_LINK");
+    if (val && *val) {
+      launcher = val;
+      launcher += " ";
+    }
+
+    CM_AUTO_PTR<cmRulePlaceholderExpander> rulePlaceholderExpander(
+      this->LocalGenerator->CreateRulePlaceholderExpander());
+
+    // Construct the main link rule and expand placeholders.
+    rulePlaceholderExpander->SetTargetImpLib(targetOutputReal);
+    std::string linkRule = this->GetLinkRule(linkRuleVar);
+    cmSystemTools::ExpandListArgument(linkRule, real_link_commands);
+
+    // Expand placeholders.
+    for (std::vector<std::string>::iterator i = real_link_commands.begin();
+         i != real_link_commands.end(); ++i) {
+      *i = launcher + *i;
+      rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator, *i,
+                                                   vars);
+    }
+    // Restore path conversion to normal shells.
+    this->LocalGenerator->SetLinkScriptShell(false);
+
+    // Clean all the possible library names and symlinks.
+    this->CleanFiles.insert(this->CleanFiles.end(), libCleanFiles.begin(),
+                            libCleanFiles.end());
+  }
+
+  std::vector<std::string> commands1;
+  // Optionally convert the build rule to use a script to avoid long
+  // command lines in the make shell.
+  if (useLinkScript) {
+    // Use a link script.
+    const char* name = (relink ? "drelink.txt" : "dlink.txt");
+    this->CreateLinkScript(name, real_link_commands, commands1, depends);
+  } else {
+    // No link script.  Just use the link rule directly.
+    commands1 = real_link_commands;
+  }
+  this->LocalGenerator->CreateCDCommand(
+    commands1, this->Makefile->GetCurrentBinaryDirectory(),
+    this->LocalGenerator->GetBinaryDirectory());
+  commands.insert(commands.end(), commands1.begin(), commands1.end());
+  commands1.clear();
+
+  // Compute the list of outputs.
+  std::vector<std::string> outputs(1, targetOutputReal);
+
+  // Write the build rule.
+  this->WriteMakeRule(*this->BuildFileStream, CM_NULLPTR, outputs, depends,
+                      commands, false);
+
+  // Write the main driver rule to build everything in this target.
+  this->WriteTargetDriverRule(targetOutputReal, relink);
+#else
+  static_cast<void>(linkRuleVar);
+  static_cast<void>(extraFlags);
+  static_cast<void>(relink);
+#endif
+}
+
 void cmMakefileLibraryTargetGenerator::WriteLibraryRules(
   const std::string& linkRuleVar, const std::string& extraFlags, bool relink)
 {
@@ -240,6 +451,9 @@ void cmMakefileLibraryTargetGenerator::WriteLibraryRules(
   // Build list of dependencies.
   std::vector<std::string> depends;
   this->AppendLinkDepends(depends);
+  if (!this->DeviceLinkObject.empty()) {
+    depends.push_back(this->DeviceLinkObject);
+  }
 
   // Get the language to use for linking this library.
   std::string linkLanguage =
@@ -518,6 +732,14 @@ void cmMakefileLibraryTargetGenerator::WriteLibraryRules(
     this->CreateObjectLists(useLinkScript, useArchiveRules,
                             useResponseFileForObjects, buildObjs, depends,
                             useWatcomQuote);
+    if (!this->DeviceLinkObject.empty()) {
+      buildObjs += " " +
+        this->LocalGenerator->ConvertToOutputFormat(
+          this->LocalGenerator->MaybeConvertToRelativePath(
+            this->LocalGenerator->GetCurrentBinaryDirectory(),
+            this->DeviceLinkObject),
+          cmOutputConverter::SHELL);
+    }
 
     // maybe create .def file from list of objects
     if (this->GeneratorTarget->GetType() == cmStateEnums::SHARED_LIBRARY &&
diff --git a/Source/cmMakefileLibraryTargetGenerator.h b/Source/cmMakefileLibraryTargetGenerator.h
index dda41b8..93ce902 100644
--- a/Source/cmMakefileLibraryTargetGenerator.h
+++ b/Source/cmMakefileLibraryTargetGenerator.h
@@ -26,6 +26,9 @@ protected:
   void WriteStaticLibraryRules();
   void WriteSharedLibraryRules(bool relink);
   void WriteModuleLibraryRules(bool relink);
+
+  void WriteDeviceLibraryRules(const std::string& linkRule,
+                               const std::string& extraFlags, bool relink);
   void WriteLibraryRules(const std::string& linkRule,
                          const std::string& extraFlags, bool relink);
   // MacOSX Framework support methods
@@ -33,6 +36,9 @@ protected:
 
   // Store the computd framework version for OS X Frameworks.
   std::string FrameworkVersion;
+
+private:
+  std::string DeviceLinkObject;
 };
 
 #endif
-- 
cgit v0.12


From a5e806b3f51d91814d8a39a4bb84363d530874bb Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Fri, 4 Nov 2016 16:23:00 -0400
Subject: CUDA: Add support for CMAKE_CUDA_COMPILE_OPTIONS_VISIBILITY

---
 Source/cmTarget.cxx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Source/cmTarget.cxx b/Source/cmTarget.cxx
index 48acc6e..3741c33 100644
--- a/Source/cmTarget.cxx
+++ b/Source/cmTarget.cxx
@@ -360,6 +360,7 @@ cmTarget::cmTarget(std::string const& name, cmStateEnums::TargetType type,
       this->GetType() != cmStateEnums::UTILITY) {
     this->SetPropertyDefault("C_VISIBILITY_PRESET", CM_NULLPTR);
     this->SetPropertyDefault("CXX_VISIBILITY_PRESET", CM_NULLPTR);
+    this->SetPropertyDefault("CUDA_VISIBILITY_PRESET", CM_NULLPTR);
     this->SetPropertyDefault("VISIBILITY_INLINES_HIDDEN", CM_NULLPTR);
   }
 
-- 
cgit v0.12


From 9cf5b98d54497b425fe341e4ad5bc188d9fa5445 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Tue, 8 Nov 2016 11:51:14 -0500
Subject: CUDA: Prefer environment variables CUDACXX and CUDAHOSTCXX.

---
 Modules/CMakeCUDACompiler.cmake.in       |  1 +
 Modules/CMakeDetermineCUDACompiler.cmake | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in
index 7148275..8a6c0bc 100644
--- a/Modules/CMakeCUDACompiler.cmake.in
+++ b/Modules/CMakeCUDACompiler.cmake.in
@@ -6,6 +6,7 @@ set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@")
 set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@")
 
 set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
+set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX")
 
 set(CMAKE_CUDA_COMPILER_ID_RUN 1)
 set(CMAKE_CUDA_SOURCE_FILE_EXTENSIONS cu)
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index 9ad4fc6..419f3a5 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -12,6 +12,17 @@ endif()
 if(NOT CMAKE_CUDA_COMPILER)
   set(CMAKE_CUDA_COMPILER_INIT NOTFOUND)
 
+    # prefer the environment variable CUDACXX
+    if(NOT $ENV{CUDACXX} STREQUAL "")
+      get_filename_component(CMAKE_CUDA_COMPILER_INIT $ENV{CUDACXX} PROGRAM PROGRAM_ARGS CMAKE_CUDA_FLAGS_ENV_INIT)
+      if(CMAKE_CUDA_FLAGS_ENV_INIT)
+        set(CMAKE_CUDA_COMPILER_ARG1 "${CMAKE_CUDA_FLAGS_ENV_INIT}" CACHE STRING "First argument to CXX compiler")
+      endif()
+      if(NOT EXISTS ${CMAKE_CUDA_COMPILER_INIT})
+        message(FATAL_ERROR "Could not find compiler set in environment variable CUDACXX:\n$ENV{CUDACXX}.\n${CMAKE_CUDA_COMPILER_INIT}")
+      endif()
+    endif()
+
   # finally list compilers to try
   if(NOT CMAKE_CUDA_COMPILER_INIT)
     set(CMAKE_CUDA_COMPILER_LIST nvcc)
@@ -25,8 +36,13 @@ endif()
 mark_as_advanced(CMAKE_CUDA_COMPILER)
 
 #Allow the user to specify a host compiler
-#todo: need to specify this to compiler test passes
 set(CMAKE_CUDA_HOST_COMPILER "" CACHE FILEPATH "Host compiler to be used by nvcc")
+if(NOT $ENV{CUDAHOSTCXX} STREQUAL "")
+  get_filename_component(CMAKE_CUDA_HOST_COMPILER $ENV{CUDAHOSTCXX} PROGRAM)
+  if(NOT EXISTS ${CMAKE_CUDA_HOST_COMPILER})
+    message(FATAL_ERROR "Could not find compiler set in environment variable CUDAHOSTCXX:\n$ENV{CUDAHOSTCXX}.\n${CMAKE_CUDA_HOST_COMPILER}")
+  endif()
+endif()
 
 # Build a small source file to identify the compiler.
 if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
@@ -96,3 +112,4 @@ configure_file(${CMAKE_ROOT}/Modules/CMakeCUDACompiler.cmake.in
   )
 
 set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
+set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX")
-- 
cgit v0.12


From 7b9131da64b4b569e10ec145fab0c8e22fa69761 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Tue, 1 Nov 2016 16:11:51 -0400
Subject: CUDA: Add tests to verify CUDA compiler works properly.

---
 Tests/CMakeLists.txt                              |  7 ++++
 Tests/Cuda/CMakeLists.txt                         |  4 ++
 Tests/Cuda/Complex/CMakeLists.txt                 | 40 ++++++++++++++++++++
 Tests/Cuda/Complex/dynamic.cpp                    |  5 +++
 Tests/Cuda/Complex/dynamic.cu                     | 29 ++++++++++++++
 Tests/Cuda/Complex/file1.cu                       | 10 +++++
 Tests/Cuda/Complex/file1.h                        |  7 ++++
 Tests/Cuda/Complex/file2.cu                       | 20 ++++++++++
 Tests/Cuda/Complex/file2.h                        | 10 +++++
 Tests/Cuda/Complex/file3.cu                       | 25 ++++++++++++
 Tests/Cuda/Complex/main.cpp                       | 14 +++++++
 Tests/Cuda/Complex/mixed.cpp                      | 14 +++++++
 Tests/Cuda/Complex/mixed.cu                       | 25 ++++++++++++
 Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt  | 17 +++++++++
 Tests/Cuda/ConsumeCompileFeatures/main.cu         | 18 +++++++++
 Tests/Cuda/ConsumeCompileFeatures/static.cpp      | 10 +++++
 Tests/Cuda/ConsumeCompileFeatures/static.cu       |  9 +++++
 Tests/Cuda/ObjectLibrary/CMakeLists.txt           | 12 ++++++
 Tests/Cuda/ObjectLibrary/main.cpp                 | 20 ++++++++++
 Tests/Cuda/ObjectLibrary/static.cpp               |  6 +++
 Tests/Cuda/ObjectLibrary/static.cu                | 21 +++++++++++
 Tests/CudaOnly/CMakeLists.txt                     |  4 ++
 Tests/CudaOnly/EnableStandard/CMakeLists.txt      | 15 ++++++++
 Tests/CudaOnly/EnableStandard/main.cu             | 17 +++++++++
 Tests/CudaOnly/EnableStandard/shared.cu           |  9 +++++
 Tests/CudaOnly/EnableStandard/static.cu           |  9 +++++
 Tests/CudaOnly/SeparateCompilation/CMakeLists.txt | 33 ++++++++++++++++
 Tests/CudaOnly/SeparateCompilation/file1.cu       | 10 +++++
 Tests/CudaOnly/SeparateCompilation/file1.h        |  7 ++++
 Tests/CudaOnly/SeparateCompilation/file2.cu       | 20 ++++++++++
 Tests/CudaOnly/SeparateCompilation/file2.h        | 10 +++++
 Tests/CudaOnly/SeparateCompilation/file3.cu       | 25 ++++++++++++
 Tests/CudaOnly/SeparateCompilation/file4.cu       | 25 ++++++++++++
 Tests/CudaOnly/SeparateCompilation/file5.cu       | 25 ++++++++++++
 Tests/CudaOnly/SeparateCompilation/main.cu        | 15 ++++++++
 Tests/CudaOnly/WithDefs/CMakeLists.txt            | 31 +++++++++++++++
 Tests/CudaOnly/WithDefs/main.notcu                | 46 +++++++++++++++++++++++
 37 files changed, 624 insertions(+)
 create mode 100644 Tests/Cuda/CMakeLists.txt
 create mode 100644 Tests/Cuda/Complex/CMakeLists.txt
 create mode 100644 Tests/Cuda/Complex/dynamic.cpp
 create mode 100644 Tests/Cuda/Complex/dynamic.cu
 create mode 100644 Tests/Cuda/Complex/file1.cu
 create mode 100644 Tests/Cuda/Complex/file1.h
 create mode 100644 Tests/Cuda/Complex/file2.cu
 create mode 100644 Tests/Cuda/Complex/file2.h
 create mode 100644 Tests/Cuda/Complex/file3.cu
 create mode 100644 Tests/Cuda/Complex/main.cpp
 create mode 100644 Tests/Cuda/Complex/mixed.cpp
 create mode 100644 Tests/Cuda/Complex/mixed.cu
 create mode 100644 Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt
 create mode 100644 Tests/Cuda/ConsumeCompileFeatures/main.cu
 create mode 100644 Tests/Cuda/ConsumeCompileFeatures/static.cpp
 create mode 100644 Tests/Cuda/ConsumeCompileFeatures/static.cu
 create mode 100644 Tests/Cuda/ObjectLibrary/CMakeLists.txt
 create mode 100644 Tests/Cuda/ObjectLibrary/main.cpp
 create mode 100644 Tests/Cuda/ObjectLibrary/static.cpp
 create mode 100644 Tests/Cuda/ObjectLibrary/static.cu
 create mode 100644 Tests/CudaOnly/CMakeLists.txt
 create mode 100644 Tests/CudaOnly/EnableStandard/CMakeLists.txt
 create mode 100644 Tests/CudaOnly/EnableStandard/main.cu
 create mode 100644 Tests/CudaOnly/EnableStandard/shared.cu
 create mode 100644 Tests/CudaOnly/EnableStandard/static.cu
 create mode 100644 Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
 create mode 100644 Tests/CudaOnly/SeparateCompilation/file1.cu
 create mode 100644 Tests/CudaOnly/SeparateCompilation/file1.h
 create mode 100644 Tests/CudaOnly/SeparateCompilation/file2.cu
 create mode 100644 Tests/CudaOnly/SeparateCompilation/file2.h
 create mode 100644 Tests/CudaOnly/SeparateCompilation/file3.cu
 create mode 100644 Tests/CudaOnly/SeparateCompilation/file4.cu
 create mode 100644 Tests/CudaOnly/SeparateCompilation/file5.cu
 create mode 100644 Tests/CudaOnly/SeparateCompilation/main.cu
 create mode 100644 Tests/CudaOnly/WithDefs/CMakeLists.txt
 create mode 100644 Tests/CudaOnly/WithDefs/main.notcu

diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt
index e914d00..8b59e8f 100644
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -324,6 +324,8 @@ if(BUILD_TESTING)
       ADD_TEST_MACRO(VSGNUFortran ${CMAKE_COMMAND} -P runtest.cmake)
     endif()
   endif()
+
+
   ADD_TEST_MACRO(COnly COnly)
   ADD_TEST_MACRO(CxxOnly CxxOnly)
   ADD_TEST_MACRO(CxxSubdirC CxxSubdirC)
@@ -1353,6 +1355,11 @@ ${CMake_BINARY_DIR}/bin/cmake -DDIR=dev -P ${CMake_SOURCE_DIR}/Utilities/Release
     endif()
   endif()
 
+  if(CMake_TEST_CUDA)
+    add_subdirectory(Cuda)
+    add_subdirectory(CudaOnly)
+  endif()
+
   if(CMake_TEST_FindBoost)
     add_subdirectory(FindBoost)
   endif()
diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt
new file mode 100644
index 0000000..5772fcf
--- /dev/null
+++ b/Tests/Cuda/CMakeLists.txt
@@ -0,0 +1,4 @@
+
+ADD_TEST_MACRO(Cuda.Complex CudaComplex)
+ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures)
+ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary)
diff --git a/Tests/Cuda/Complex/CMakeLists.txt b/Tests/Cuda/Complex/CMakeLists.txt
new file mode 100644
index 0000000..9a3703a
--- /dev/null
+++ b/Tests/Cuda/Complex/CMakeLists.txt
@@ -0,0 +1,40 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaComplex CXX CUDA)
+#Goal for this example:
+
+#build a cpp dynamic library base
+#build a cuda static library base that uses separable compilation
+
+#build a cuda dynamic library that uses the first dynamic library
+#build a mixed cpp & cuda dynamic library uses all 3 previous libraries
+
+#lastly build a cpp executable that uses this last cuda dynamic library
+
+#this tests that we can properly handle linking cuda and cpp together
+#and also bulding cpp targets that need cuda implicit libraries
+
+#verify that we can pass explicit cuda arch flags
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+set(CMAKE_CUDA_STANDARD 11)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
+set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
+
+add_library(CudaComplexCppBase SHARED dynamic.cpp)
+add_library(CudaComplexSeperableLib STATIC file1.cu file2.cu file3.cu)
+set_target_properties(CudaComplexSeperableLib
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+set_target_properties( CudaComplexSeperableLib
+                       PROPERTIES POSITION_INDEPENDENT_CODE ON)
+
+add_library(CudaComplexSharedLib SHARED dynamic.cu)
+target_link_libraries(CudaComplexSharedLib PUBLIC CudaComplexCppBase)
+
+add_library(CudaComplexMixedLib SHARED mixed.cpp mixed.cu)
+target_link_libraries(CudaComplexMixedLib
+                      PUBLIC CudaComplexSharedLib
+                      PRIVATE CudaComplexSeperableLib)
+
+add_executable(CudaComplex main.cpp)
+target_link_libraries(CudaComplex PUBLIC CudaComplexMixedLib)
diff --git a/Tests/Cuda/Complex/dynamic.cpp b/Tests/Cuda/Complex/dynamic.cpp
new file mode 100644
index 0000000..d579f1e
--- /dev/null
+++ b/Tests/Cuda/Complex/dynamic.cpp
@@ -0,0 +1,5 @@
+
+int dynamic_base_func(int x)
+{
+  return x * x;
+}
diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu
new file mode 100644
index 0000000..9540e86
--- /dev/null
+++ b/Tests/Cuda/Complex/dynamic.cu
@@ -0,0 +1,29 @@
+
+#include <string>
+#include <cuda.h>
+
+int dynamic_base_func(int);
+
+int __host__ cuda_dynamic_host_func(int x)
+{
+  return dynamic_base_func(x);
+}
+
+static
+__global__
+void DetermineIfValidCudaDevice()
+{
+}
+
+void cuda_dynamic_lib_func(std::string& contents )
+{
+  DetermineIfValidCudaDevice <<<1,1>>> ();
+  if(cudaSuccess == cudaGetLastError())
+    {
+    contents = "ran a cuda kernel";
+    }
+  else
+    {
+    contents = "cant run a cuda kernel";
+    }
+}
diff --git a/Tests/Cuda/Complex/file1.cu b/Tests/Cuda/Complex/file1.cu
new file mode 100644
index 0000000..a2e8bf3
--- /dev/null
+++ b/Tests/Cuda/Complex/file1.cu
@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+  result_type r;
+  r.input = x;
+  r.sum = x*x;
+  return r;
+}
diff --git a/Tests/Cuda/Complex/file1.h b/Tests/Cuda/Complex/file1.h
new file mode 100644
index 0000000..ff1945c
--- /dev/null
+++ b/Tests/Cuda/Complex/file1.h
@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+  int input;
+  int sum;
+};
diff --git a/Tests/Cuda/Complex/file2.cu b/Tests/Cuda/Complex/file2.cu
new file mode 100644
index 0000000..6b8b06b
--- /dev/null
+++ b/Tests/Cuda/Complex/file2.cu
@@ -0,0 +1,20 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+  if(x!=42)
+    {
+    const result_type r = file1_func(x);
+    const result_type_dynamic rd { r.input, r.sum, true };
+    return rd;
+    }
+  else
+    {
+    const result_type_dynamic rd { x, x*x*x, false };
+    return rd;
+    }
+
+}
diff --git a/Tests/Cuda/Complex/file2.h b/Tests/Cuda/Complex/file2.h
new file mode 100644
index 0000000..d2dbaa4
--- /dev/null
+++ b/Tests/Cuda/Complex/file2.h
@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+  int input;
+  int sum;
+  bool from_static;
+};
diff --git a/Tests/Cuda/Complex/file3.cu b/Tests/Cuda/Complex/file3.cu
new file mode 100644
index 0000000..3c5e952
--- /dev/null
+++ b/Tests/Cuda/Complex/file3.cu
@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void file3_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file3_launch_kernel(int x)
+{
+  result_type r;
+  file3_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}
diff --git a/Tests/Cuda/Complex/main.cpp b/Tests/Cuda/Complex/main.cpp
new file mode 100644
index 0000000..a72ffd7
--- /dev/null
+++ b/Tests/Cuda/Complex/main.cpp
@@ -0,0 +1,14 @@
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type call_cuda_seperable_code(int x);
+result_type mixed_launch_kernel(int x);
+
+int main(int argc, char** argv)
+{
+  call_cuda_seperable_code(42);
+  mixed_launch_kernel(42);
+  return 0;
+}
diff --git a/Tests/Cuda/Complex/mixed.cpp b/Tests/Cuda/Complex/mixed.cpp
new file mode 100644
index 0000000..205f091
--- /dev/null
+++ b/Tests/Cuda/Complex/mixed.cpp
@@ -0,0 +1,14 @@
+
+int dynamic_base_func(int);
+int cuda_dynamic_host_func(int);
+int file3_launch_kernel(int);
+
+int dynamic_final_func(int x)
+{
+  return cuda_dynamic_host_func(dynamic_base_func(x));
+}
+
+int call_cuda_seperable_code(int x)
+{
+  return file3_launch_kernel(x);
+}
diff --git a/Tests/Cuda/Complex/mixed.cu b/Tests/Cuda/Complex/mixed.cu
new file mode 100644
index 0000000..d2e8275
--- /dev/null
+++ b/Tests/Cuda/Complex/mixed.cu
@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void mixed_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int mixed_launch_kernel(int x)
+{
+  result_type r;
+  mixed_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}
diff --git a/Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt b/Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt
new file mode 100644
index 0000000..8361b9e
--- /dev/null
+++ b/Tests/Cuda/ConsumeCompileFeatures/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaConsumeCompileFeatures CXX CUDA)
+#Goal for this example:
+
+#build a c++11 library that express a c++11 public compile feature
+#link a cuda library and verify it builds with c++11 enabled
+
+#build a standalone c++/cuda mixed executable where we express a c++11
+#compile feature.
+
+
+add_library(CudaConsumeLib STATIC static.cpp static.cu)
+target_compile_features(CudaConsumeLib PUBLIC cxx_constexpr)
+
+add_executable(CudaConsumeCompileFeatures main.cu)
+target_link_libraries(CudaConsumeCompileFeatures PRIVATE CudaConsumeLib)
diff --git a/Tests/Cuda/ConsumeCompileFeatures/main.cu b/Tests/Cuda/ConsumeCompileFeatures/main.cu
new file mode 100644
index 0000000..712871c
--- /dev/null
+++ b/Tests/Cuda/ConsumeCompileFeatures/main.cu
@@ -0,0 +1,18 @@
+
+#include <iostream>
+
+int static_cxx11_func(int);
+
+void test_functions()
+{
+  auto x = static_cxx11_func( int(42) );
+  std::cout << x << std::endl;
+}
+
+int main(int argc, char **argv)
+{
+  test_functions();
+  std::cout << "this executable doesn't use cuda code, just call methods defined" << std::endl;
+  std::cout << "in libraries that have cuda code" << std::endl;
+  return 0;
+}
diff --git a/Tests/Cuda/ConsumeCompileFeatures/static.cpp b/Tests/Cuda/ConsumeCompileFeatures/static.cpp
new file mode 100644
index 0000000..565d52e
--- /dev/null
+++ b/Tests/Cuda/ConsumeCompileFeatures/static.cpp
@@ -0,0 +1,10 @@
+
+
+#include <type_traits>
+
+int static_cuda11_func(int);
+
+int static_cxx11_func(int x)
+{
+  return static_cuda11_func(x) + std::integral_constant<int, 32>::value;
+}
diff --git a/Tests/Cuda/ConsumeCompileFeatures/static.cu b/Tests/Cuda/ConsumeCompileFeatures/static.cu
new file mode 100644
index 0000000..73e43a8
--- /dev/null
+++ b/Tests/Cuda/ConsumeCompileFeatures/static.cu
@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ static_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}
diff --git a/Tests/Cuda/ObjectLibrary/CMakeLists.txt b/Tests/Cuda/ObjectLibrary/CMakeLists.txt
new file mode 100644
index 0000000..cbe1e67
--- /dev/null
+++ b/Tests/Cuda/ObjectLibrary/CMakeLists.txt
@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 3.7)
+project (CudaObjectLibrary CUDA CXX)
+#Goal for this example:
+
+#build a object files some with cuda and some without than
+#embed these into an executable
+
+add_library(CudaMixedObjectLib OBJECT static.cu static.cpp)
+
+add_executable(CudaObjectLibrary
+               main.cpp
+               $<TARGET_OBJECTS:CudaMixedObjectLib>)
diff --git a/Tests/Cuda/ObjectLibrary/main.cpp b/Tests/Cuda/ObjectLibrary/main.cpp
new file mode 100644
index 0000000..1a70a99
--- /dev/null
+++ b/Tests/Cuda/ObjectLibrary/main.cpp
@@ -0,0 +1,20 @@
+
+#include <iostream>
+
+int static_func(int);
+int file1_sq_func(int);
+
+void test_functions()
+{
+  file1_sq_func(static_func(42));
+}
+
+int main(int argc, char** argv)
+{
+  test_functions();
+  std::cout
+    << "this executable doesn't use cuda code, just call methods defined"
+    << std::endl;
+  std::cout << "in object files that have cuda code" << std::endl;
+  return 0;
+}
diff --git a/Tests/Cuda/ObjectLibrary/static.cpp b/Tests/Cuda/ObjectLibrary/static.cpp
new file mode 100644
index 0000000..6db1f91
--- /dev/null
+++ b/Tests/Cuda/ObjectLibrary/static.cpp
@@ -0,0 +1,6 @@
+int file1_sq_func(int);
+
+int static_func(int x)
+{
+  return file1_sq_func(x);
+}
diff --git a/Tests/Cuda/ObjectLibrary/static.cu b/Tests/Cuda/ObjectLibrary/static.cu
new file mode 100644
index 0000000..2374c23
--- /dev/null
+++ b/Tests/Cuda/ObjectLibrary/static.cu
@@ -0,0 +1,21 @@
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+int __host__ file1_sq_func(int x)
+{
+  cudaError_t err;
+  int nDevices = 0;
+  err = cudaGetDeviceCount(&nDevices);
+  if(err != cudaSuccess)
+  {
+    std::cout << "nDevices: " << nDevices << std::endl;
+    std::cout << "err: " << err << std::endl;
+    return 1;
+  }
+  std::cout << "this library uses cuda code" << std::endl;
+  std::cout << "you have " << nDevices << " devices that support cuda" << std::endl;
+
+  return x * x;
+}
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
new file mode 100644
index 0000000..85a2051
--- /dev/null
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -0,0 +1,4 @@
+
+ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
+ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
+ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
diff --git a/Tests/CudaOnly/EnableStandard/CMakeLists.txt b/Tests/CudaOnly/EnableStandard/CMakeLists.txt
new file mode 100644
index 0000000..53b9132
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/CMakeLists.txt
@@ -0,0 +1,15 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyEnableStandard CUDA)
+
+#Goal for this example:
+#build cuda sources that require C++11 to be enabled.
+
+add_library(CUDAStatic11 STATIC static.cu)
+add_library(CUDADynamic11 SHARED shared.cu)
+
+add_executable(CudaOnlyEnableStandard main.cu)
+target_link_libraries(CudaOnlyEnableStandard PRIVATE CUDAStatic11 CUDADynamic11)
+
+set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD 11)
+set_target_properties(CUDAStatic11 CUDADynamic11 PROPERTIES CUDA_STANDARD_REQUIRED TRUE)
diff --git a/Tests/CudaOnly/EnableStandard/main.cu b/Tests/CudaOnly/EnableStandard/main.cu
new file mode 100644
index 0000000..83e9dfd
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/main.cu
@@ -0,0 +1,17 @@
+
+#include <iostream>
+
+int static_cuda11_func(int);
+int shared_cuda11_func(int);
+
+void test_functions()
+{
+  static_cuda11_func( int(42) );
+  shared_cuda11_func( int(42) );
+}
+
+int main(int argc, char **argv)
+{
+  test_functions();
+  return 0;
+}
diff --git a/Tests/CudaOnly/EnableStandard/shared.cu b/Tests/CudaOnly/EnableStandard/shared.cu
new file mode 100644
index 0000000..28555b3
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/shared.cu
@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ shared_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}
diff --git a/Tests/CudaOnly/EnableStandard/static.cu b/Tests/CudaOnly/EnableStandard/static.cu
new file mode 100644
index 0000000..73e43a8
--- /dev/null
+++ b/Tests/CudaOnly/EnableStandard/static.cu
@@ -0,0 +1,9 @@
+
+#include <type_traits>
+
+using tt = std::true_type;
+using ft = std::false_type;
+int __host__ static_cuda11_func(int x)
+{
+  return x * x + std::integral_constant<int, 17>::value;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
new file mode 100644
index 0000000..7055eef
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
@@ -0,0 +1,33 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlySeparateCompilation CUDA)
+
+#Goal for this example:
+#Build a static library that defines multiple methods and kernels that
+#use each other.
+#After that confirm that we can call those methods from dynamic libraries
+#and executables.
+#We complicate the matter by also testing that multiple static libraries
+#all containing cuda separable compilation code links properly
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+add_library(CUDASerarateLibA STATIC file1.cu file2.cu file3.cu)
+
+#Having file4/file5 in a shared library causes serious problems
+#with the nvcc linker and it will generate bad entries that will
+#cause a segv when trying to run the executable
+#
+add_library(CUDASerarateLibB STATIC file4.cu file5.cu)
+target_link_libraries(CUDASerarateLibB PRIVATE CUDASerarateLibA)
+
+add_executable(CudaOnlySeparateCompilation main.cu)
+target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASerarateLibB)
+
+set_target_properties( CUDASerarateLibA
+                       CUDASerarateLibB
+                       PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+set_target_properties( CUDASerarateLibA
+                       CUDASerarateLibB
+                       PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/Tests/CudaOnly/SeparateCompilation/file1.cu b/Tests/CudaOnly/SeparateCompilation/file1.cu
new file mode 100644
index 0000000..a2e8bf3
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file1.cu
@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+  result_type r;
+  r.input = x;
+  r.sum = x*x;
+  return r;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file1.h b/Tests/CudaOnly/SeparateCompilation/file1.h
new file mode 100644
index 0000000..ff1945c
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file1.h
@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+  int input;
+  int sum;
+};
diff --git a/Tests/CudaOnly/SeparateCompilation/file2.cu b/Tests/CudaOnly/SeparateCompilation/file2.cu
new file mode 100644
index 0000000..6b8b06b
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file2.cu
@@ -0,0 +1,20 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+  if(x!=42)
+    {
+    const result_type r = file1_func(x);
+    const result_type_dynamic rd { r.input, r.sum, true };
+    return rd;
+    }
+  else
+    {
+    const result_type_dynamic rd { x, x*x*x, false };
+    return rd;
+    }
+
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file2.h b/Tests/CudaOnly/SeparateCompilation/file2.h
new file mode 100644
index 0000000..d2dbaa4
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file2.h
@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+  int input;
+  int sum;
+  bool from_static;
+};
diff --git a/Tests/CudaOnly/SeparateCompilation/file3.cu b/Tests/CudaOnly/SeparateCompilation/file3.cu
new file mode 100644
index 0000000..670a18b
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file3.cu
@@ -0,0 +1,25 @@
+
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+
+static
+__global__
+void file3_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+result_type file3_launch_kernel(int x)
+{
+  result_type r;
+  file3_kernel <<<1,1>>> (r,x);
+  return r;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file4.cu b/Tests/CudaOnly/SeparateCompilation/file4.cu
new file mode 100644
index 0000000..86ef623
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file4.cu
@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void file4_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file4_launch_kernel(int x)
+{
+  result_type r;
+  file4_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/file5.cu b/Tests/CudaOnly/SeparateCompilation/file5.cu
new file mode 100644
index 0000000..6fdb32a
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/file5.cu
@@ -0,0 +1,25 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+result_type_dynamic __device__ file2_func(int x);
+
+static
+__global__
+void file5_kernel(result_type& r, int x)
+{
+  //call static_func which is a method that is defined in the
+  //static library that is always out of date
+  r = file1_func(x);
+  result_type_dynamic rd = file2_func(x);
+}
+
+int file5_launch_kernel(int x)
+{
+  result_type r;
+  file5_kernel <<<1,1>>> (r,x);
+  return r.sum;
+}
diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main.cu
new file mode 100644
index 0000000..d4520ae
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/main.cu
@@ -0,0 +1,15 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+// result_type file4_launch_kernel(int x);
+// result_type file5_launch_kernel(int x);
+
+int main(int argc, char **argv)
+{
+  // file4_launch_kernel(42);
+  // file5_launch_kernel(42);
+  return 0;
+}
diff --git a/Tests/CudaOnly/WithDefs/CMakeLists.txt b/Tests/CudaOnly/WithDefs/CMakeLists.txt
new file mode 100644
index 0000000..c4ca8b9
--- /dev/null
+++ b/Tests/CudaOnly/WithDefs/CMakeLists.txt
@@ -0,0 +1,31 @@
+
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyWithDefs CUDA)
+
+#verify that we can pass explicit cuda arch flags
+set(CMAKE_CUDA_FLAGS "-gencode arch=compute_30,code=compute_30")
+set(debug_compile_flags --generate-code arch=compute_20,code=sm_20 -Xcompiler=-Werror)
+set(release_compile_defs DEFREL)
+
+#Goal for this example:
+#build a executable that needs to be passed a complex define through add_defintions
+#this verifies we can pass things such as '_','(' to nvcc
+add_definitions("-DPACKED_DEFINE=__attribute__((packed))")
+set_source_files_properties(main.notcu PROPERTIES LANGUAGE CUDA)
+add_executable(CudaOnlyWithDefs main.notcu)
+
+target_compile_options(CudaOnlyWithDefs
+  PRIVATE
+    $<$<CONFIG:DEBUG>:$<BUILD_INTERFACE:${debug_compile_flags}>>
+  )
+
+target_compile_definitions(CudaOnlyWithDefs
+  PRIVATE
+    $<$<CONFIG:RELEASE>:$<BUILD_INTERFACE:${release_compile_defs}>>
+  )
+
+#we need to add an rpath for the cuda library so that everything
+#loads properly on the mac
+if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+  set_target_properties(CudaOnlyWithDefs PROPERTIES LINK_FLAGS "-Wl,-rpath,${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}")
+endif()
diff --git a/Tests/CudaOnly/WithDefs/main.notcu b/Tests/CudaOnly/WithDefs/main.notcu
new file mode 100644
index 0000000..6b02bbc
--- /dev/null
+++ b/Tests/CudaOnly/WithDefs/main.notcu
@@ -0,0 +1,46 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+
+static
+__global__
+void DetermineIfValidCudaDevice()
+{
+}
+
+struct PACKED_DEFINE result_type
+{
+  bool valid;
+  int value;
+#if defined(NDEBUG) && !defined(DEFREL)
+#error missing DEFREL flag
+#endif
+};
+
+result_type can_launch_kernel()
+{
+  result_type r;
+  DetermineIfValidCudaDevice <<<1,1>>> ();
+  r.valid = (cudaSuccess == cudaGetLastError());
+  if(r.valid)
+    {
+    r.value = 1;
+    }
+  else
+    {
+    r.value = -1;
+    }
+  return r;
+}
+
+int main(int argc, char **argv)
+{
+  cudaError_t err;
+  int nDevices = 0;
+  err = cudaGetDeviceCount(&nDevices);
+  if(err != cudaSuccess)
+    {
+      return 1;
+    }
+  return 0;
+}
-- 
cgit v0.12


From 4cc601f2c5c0fe76fa76f7e4a47d112df7a1c032 Mon Sep 17 00:00:00 2001
From: Brad King <brad.king@kitware.com>
Date: Wed, 9 Nov 2016 14:27:44 -0500
Subject: Help: Add release note for CUDA support

---
 Help/release/dev/CUDA-language-support.rst | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 Help/release/dev/CUDA-language-support.rst

diff --git a/Help/release/dev/CUDA-language-support.rst b/Help/release/dev/CUDA-language-support.rst
new file mode 100644
index 0000000..7df45bf
--- /dev/null
+++ b/Help/release/dev/CUDA-language-support.rst
@@ -0,0 +1,6 @@
+CUDA-language-support
+---------------------
+
+* CMake learned to support CUDA as a first-class language.
+  It is supported by the :ref:`Makefile Generators` and the
+  :generator:`Ninja` generator.
-- 
cgit v0.12