diff options
author | Raul Tambre <raul@tambre.ee> | 2020-09-05 16:40:02 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2020-09-24 19:19:54 (GMT) |
commit | c63fe018353cf6afb30980c4cac7493be7cd0a82 (patch) | |
tree | 68d2daf0cd8ab91a9feaa49392607c6cfecd2ac4 /Modules | |
parent | c98ec731f90eb0180c89108b7d2e42263b66d1ed (diff) | |
download | CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.zip CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.tar.gz CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.tar.bz2 |
CUDA: Clang separable compilation
For NVCC the compiler takes care of device linking when passed the "-dlink"
flag.
Clang doesn't support such magic and requires the buildsystem to do the work
that NVCC does behind the scenes.
The implementation is based on Bazel's device linking documentation:
https://github.com/tensorflow/tensorflow/blob/7cabcdf073abad8c46e9dda62bb8fa4682d2061e/third_party/nccl/build_defs.bzl.tpl#L259
Closes: #20726
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/CMakeCUDACompiler.cmake.in | 3 | ||||
-rw-r--r-- | Modules/CMakeCUDAInformation.cmake | 7 | ||||
-rw-r--r-- | Modules/CMakeDetermineCUDACompiler.cmake | 16 | ||||
-rw-r--r-- | Modules/Compiler/Clang-CUDA.cmake | 1 | ||||
-rw-r--r-- | Modules/Compiler/NVIDIA-CUDA.cmake | 1 |
5 files changed, 25 insertions, 3 deletions
diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in index 704ad09..871e18e 100644 --- a/Modules/CMakeCUDACompiler.cmake.in +++ b/Modules/CMakeCUDACompiler.cmake.in @@ -3,6 +3,8 @@ set(CMAKE_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@") set(CMAKE_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@") set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@") set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@") +set(CMAKE_CUDA_DEVICE_LINKER "@CMAKE_CUDA_DEVICE_LINKER@") +set(CMAKE_CUDA_FATBINARY "@CMAKE_CUDA_FATBINARY@") set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@") set(CMAKE_CUDA_COMPILE_FEATURES "@CMAKE_CUDA_COMPILE_FEATURES@") set(CMAKE_CUDA03_COMPILE_FEATURES "@CMAKE_CUDA03_COMPILE_FEATURES@") @@ -44,6 +46,7 @@ if(CMAKE_CUDA_LIBRARY_ARCHITECTURE) endif() set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@") +set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT@") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@") set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@") diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake index f9f7574..58e6e29 100644 --- a/Modules/CMakeCUDAInformation.cmake +++ b/Modules/CMakeCUDAInformation.cmake @@ -145,7 +145,7 @@ endif() #Specify how to compile when separable compilation has been requested if(NOT CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION) set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION - "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} -dc <SOURCE> -o <OBJECT>") + "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} ${_CMAKE_CUDA_DEVICE_CODE} <SOURCE> -o <OBJECT>") endif() #Specify how to compile when whole compilation has been requested @@ -200,6 +200,11 @@ if(NOT CMAKE_CUDA_DEVICE_LINK_EXECUTABLE) "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_DLINK_FLAGS}") endif() +# Used when device linking is handled by CMake. +if(NOT CMAKE_CUDA_DEVICE_LINK_COMPILE) + set(CMAKE_CUDA_DEVICE_LINK_COMPILE "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <FLAGS> -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ -D__NV_EXTRA_INITIALIZATION=\"\" -D__NV_EXTRA_FINALIZATION=\"\" -DREGISTERLINKBINARYFILE=\\\"<REGISTER_FILE>\\\" -DFATBINFILE=\\\"<FATBINARY>\\\" ${_CMAKE_COMPILE_AS_CUDA_FLAG} -c \"${CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT}/bin/crt/link.stub\" -o <OBJECT>") +endif() + unset(__IMPLICT_DLINK_FLAGS) set(CMAKE_CUDA_INFORMATION_LOADED 1) diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake index e60a973..9220551 100644 --- a/Modules/CMakeDetermineCUDACompiler.cmake +++ b/Modules/CMakeDetermineCUDACompiler.cmake @@ -169,11 +169,14 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) endif() get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${_CUDA_NVCC_EXECUTABLE}" DIRECTORY) + set(CMAKE_CUDA_DEVICE_LINKER "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/nvlink${CMAKE_EXECUTABLE_SUFFIX}") + set(CMAKE_CUDA_FATBINARY "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/fatbinary${CMAKE_EXECUTABLE_SUFFIX}") get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY) - # CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file. - # In a non-scattered installation this is equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT. + # In a non-scattered installation the following are equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT. # We first check for a non-scattered installation to prefer it over a scattered installation. + + # CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file. if(EXISTS "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/version.txt") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}") elseif(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/version.txt") @@ -181,6 +184,15 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/cuda/version.txt") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/cuda") endif() + + # CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT contains the linking stubs necessary for device linking and other low-level library files. + if(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub") + set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit") + elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub") + set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit") + else() + set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}") + endif() endif() set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v") diff --git a/Modules/Compiler/Clang-CUDA.cmake b/Modules/Compiler/Clang-CUDA.cmake index 336827b..fd8c2b7 100644 --- a/Modules/Compiler/Clang-CUDA.cmake +++ b/Modules/Compiler/Clang-CUDA.cmake @@ -13,6 +13,7 @@ __compiler_clang_cxx_standards(CUDA) set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE) set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda") set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S") +set(_CMAKE_CUDA_DEVICE_CODE "-fgpu-rdc -c") # RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default. set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}") diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake index 3187294..7c24373 100644 --- a/Modules/Compiler/NVIDIA-CUDA.cmake +++ b/Modules/Compiler/NVIDIA-CUDA.cmake @@ -6,6 +6,7 @@ set(CMAKE_CUDA_VERBOSE_COMPILE_FLAG "-Xcompiler=-v") set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu") set(_CMAKE_CUDA_PTX_FLAG "-ptx") +set(_CMAKE_CUDA_DEVICE_CODE "-dc") if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89) # The -forward-unknown-to-host-compiler flag was only |