CUDA: Clang separable compilation

For NVCC the compiler takes care of device linking when passed the "-dlink" flag. Clang doesn't support such magic and requires the buildsystem to do the work that NVCC does behind the scenes. The implementation is based on Bazel's device linking documentation: https://github.com/tensorflow/tensorflow/blob/7cabcdf073abad8c46e9dda62bb8fa4682d2061e/third_party/nccl/build_defs.bzl.tpl#L259 Closes: #20726
author: Raul Tambre <raul@tambre.ee> 2020-09-05 16:40:02 (GMT)
committer: Brad King <brad.king@kitware.com> 2020-09-24 19:19:54 (GMT)
commit: c63fe018353cf6afb30980c4cac7493be7cd0a82 (patch)
tree: 68d2daf0cd8ab91a9feaa49392607c6cfecd2ac4 /Modules
parent: c98ec731f90eb0180c89108b7d2e42263b66d1ed (diff)
download: CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.zip
CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.tar.gz
CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.tar.bz2
5 files changed, 25 insertions, 3 deletions
diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in
index 704ad09..871e18e 100644
--- a/Modules/CMakeCUDACompiler.cmake.in
+++ b/Modules/CMakeCUDACompiler.cmake.in
@@ -3,6 +3,8 @@ set(CMAKE_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@")
 set(CMAKE_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@")
 set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@")
 set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@")
+set(CMAKE_CUDA_DEVICE_LINKER "@CMAKE_CUDA_DEVICE_LINKER@")
+set(CMAKE_CUDA_FATBINARY "@CMAKE_CUDA_FATBINARY@")
 set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@")
 set(CMAKE_CUDA_COMPILE_FEATURES "@CMAKE_CUDA_COMPILE_FEATURES@")
 set(CMAKE_CUDA03_COMPILE_FEATURES "@CMAKE_CUDA03_COMPILE_FEATURES@")
@@ -44,6 +46,7 @@ if(CMAKE_CUDA_LIBRARY_ARCHITECTURE)
 endif()
 
 set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@")
+set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT@")
 set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@")
 
 set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")
diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index f9f7574..58e6e29 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -145,7 +145,7 @@ endif()
 #Specify how to compile when separable compilation has been requested
 if(NOT CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION)
   set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
-    "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} -dc <SOURCE> -o <OBJECT>")
+    "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} ${_CMAKE_CUDA_DEVICE_CODE} <SOURCE> -o <OBJECT>")
 endif()
 
 #Specify how to compile when whole compilation has been requested
@@ -200,6 +200,11 @@ if(NOT CMAKE_CUDA_DEVICE_LINK_EXECUTABLE)
     "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_DLINK_FLAGS}")
 endif()
 
+# Used when device linking is handled by CMake.
+if(NOT CMAKE_CUDA_DEVICE_LINK_COMPILE)
+  set(CMAKE_CUDA_DEVICE_LINK_COMPILE "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <FLAGS> -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ -D__NV_EXTRA_INITIALIZATION=\"\" -D__NV_EXTRA_FINALIZATION=\"\" -DREGISTERLINKBINARYFILE=\\\"<REGISTER_FILE>\\\" -DFATBINFILE=\\\"<FATBINARY>\\\" ${_CMAKE_COMPILE_AS_CUDA_FLAG} -c \"${CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT}/bin/crt/link.stub\" -o <OBJECT>")
+endif()
+
 unset(__IMPLICT_DLINK_FLAGS)
 
 set(CMAKE_CUDA_INFORMATION_LOADED 1)
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index e60a973..9220551 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -169,11 +169,14 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
     endif()
 
     get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${_CUDA_NVCC_EXECUTABLE}" DIRECTORY)
+    set(CMAKE_CUDA_DEVICE_LINKER "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/nvlink${CMAKE_EXECUTABLE_SUFFIX}")
+    set(CMAKE_CUDA_FATBINARY "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/fatbinary${CMAKE_EXECUTABLE_SUFFIX}")
     get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY)
 
-    # CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file.
-    # In a non-scattered installation this is equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT.
+    # In a non-scattered installation the following are equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT.
     # We first check for a non-scattered installation to prefer it over a scattered installation.
+
+    # CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file.
     if(EXISTS "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/version.txt")
       set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
     elseif(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/version.txt")
@@ -181,6 +184,15 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
     elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/cuda/version.txt")
       set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/cuda")
     endif()
+
+    # CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT contains the linking stubs necessary for device linking and other low-level library files.
+    if(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub")
+      set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit")
+    elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub")
+      set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit")
+    else()
+      set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
+    endif()
   endif()
 
   set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
diff --git a/Modules/Compiler/Clang-CUDA.cmake b/Modules/Compiler/Clang-CUDA.cmake
index 336827b..fd8c2b7 100644
--- a/Modules/Compiler/Clang-CUDA.cmake
+++ b/Modules/Compiler/Clang-CUDA.cmake
@@ -13,6 +13,7 @@ __compiler_clang_cxx_standards(CUDA)
 set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE)
 set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda")
 set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S")
+set(_CMAKE_CUDA_DEVICE_CODE "-fgpu-rdc -c")
 
 # RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default.
 set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
index 3187294..7c24373 100644
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -6,6 +6,7 @@ set(CMAKE_CUDA_VERBOSE_COMPILE_FLAG "-Xcompiler=-v")
 
 set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu")
 set(_CMAKE_CUDA_PTX_FLAG "-ptx")
+set(_CMAKE_CUDA_DEVICE_CODE "-dc")
 
 if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89)
   # The -forward-unknown-to-host-compiler flag was only
author	Raul Tambre <raul@tambre.ee>	2020-09-05 16:40:02 (GMT)
committer	Brad King <brad.king@kitware.com>	2020-09-24 19:19:54 (GMT)
commit	c63fe018353cf6afb30980c4cac7493be7cd0a82 (patch)
tree	68d2daf0cd8ab91a9feaa49392607c6cfecd2ac4 /Modules
parent	c98ec731f90eb0180c89108b7d2e42263b66d1ed (diff)
download	CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.zip CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.tar.gz CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.tar.bz2