From c63fe018353cf6afb30980c4cac7493be7cd0a82 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sat, 5 Sep 2020 19:40:02 +0300 Subject: CUDA: Clang separable compilation For NVCC the compiler takes care of device linking when passed the "-dlink" flag. Clang doesn't support such magic and requires the buildsystem to do the work that NVCC does behind the scenes. The implementation is based on Bazel's device linking documentation: https://github.com/tensorflow/tensorflow/blob/7cabcdf073abad8c46e9dda62bb8fa4682d2061e/third_party/nccl/build_defs.bzl.tpl#L259 Closes: #20726 --- .../dev/cuda-clang-separable-compilation.rst | 4 + Modules/CMakeCUDACompiler.cmake.in | 3 + Modules/CMakeCUDAInformation.cmake | 7 +- Modules/CMakeDetermineCUDACompiler.cmake | 16 +- Modules/Compiler/Clang-CUDA.cmake | 1 + Modules/Compiler/NVIDIA-CUDA.cmake | 1 + Source/cmLocalGenerator.cxx | 11 - Source/cmLocalGenerator.h | 2 +- Source/cmMakefileExecutableTargetGenerator.cxx | 53 +++-- Source/cmMakefileExecutableTargetGenerator.h | 4 + Source/cmMakefileLibraryTargetGenerator.cxx | 73 ++++--- Source/cmMakefileLibraryTargetGenerator.h | 5 + Source/cmMakefileTargetGenerator.cxx | 133 +++++++++++- Source/cmMakefileTargetGenerator.h | 7 +- Source/cmNinjaNormalTargetGenerator.cxx | 224 ++++++++++++++++++--- Source/cmNinjaNormalTargetGenerator.h | 15 +- Source/cmNinjaTargetGenerator.cxx | 9 +- Source/cmNinjaTargetGenerator.h | 3 +- Source/cmRulePlaceholderExpander.cxx | 10 + Source/cmRulePlaceholderExpander.h | 2 + Tests/Cuda/CMakeLists.txt | 7 +- Tests/CudaOnly/CMakeLists.txt | 36 ++-- 22 files changed, 502 insertions(+), 124 deletions(-) create mode 100644 Help/release/dev/cuda-clang-separable-compilation.rst diff --git a/Help/release/dev/cuda-clang-separable-compilation.rst b/Help/release/dev/cuda-clang-separable-compilation.rst new file mode 100644 index 0000000..8ff4cf4 --- /dev/null +++ b/Help/release/dev/cuda-clang-separable-compilation.rst @@ -0,0 +1,4 @@ +cuda-clang-separable-compilation +-------------------------------- + +* :prop_tgt:`CUDA_SEPARABLE_COMPILATION` is now supported when using Clang. diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in index 704ad09..871e18e 100644 --- a/Modules/CMakeCUDACompiler.cmake.in +++ b/Modules/CMakeCUDACompiler.cmake.in @@ -3,6 +3,8 @@ set(CMAKE_CUDA_HOST_COMPILER "@CMAKE_CUDA_HOST_COMPILER@") set(CMAKE_CUDA_HOST_LINK_LAUNCHER "@CMAKE_CUDA_HOST_LINK_LAUNCHER@") set(CMAKE_CUDA_COMPILER_ID "@CMAKE_CUDA_COMPILER_ID@") set(CMAKE_CUDA_COMPILER_VERSION "@CMAKE_CUDA_COMPILER_VERSION@") +set(CMAKE_CUDA_DEVICE_LINKER "@CMAKE_CUDA_DEVICE_LINKER@") +set(CMAKE_CUDA_FATBINARY "@CMAKE_CUDA_FATBINARY@") set(CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT "@CMAKE_CUDA_STANDARD_COMPUTED_DEFAULT@") set(CMAKE_CUDA_COMPILE_FEATURES "@CMAKE_CUDA_COMPILE_FEATURES@") set(CMAKE_CUDA03_COMPILE_FEATURES "@CMAKE_CUDA03_COMPILE_FEATURES@") @@ -44,6 +46,7 @@ if(CMAKE_CUDA_LIBRARY_ARCHITECTURE) endif() set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@") +set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT@") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@") set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@") diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake index f9f7574..58e6e29 100644 --- a/Modules/CMakeCUDAInformation.cmake +++ b/Modules/CMakeCUDAInformation.cmake @@ -145,7 +145,7 @@ endif() #Specify how to compile when separable compilation has been requested if(NOT CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION) set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION - " ${_CMAKE_CUDA_EXTRA_FLAGS} ${_CMAKE_COMPILE_AS_CUDA_FLAG} -dc -o ") + " ${_CMAKE_CUDA_EXTRA_FLAGS} ${_CMAKE_COMPILE_AS_CUDA_FLAG} ${_CMAKE_CUDA_DEVICE_CODE} -o ") endif() #Specify how to compile when whole compilation has been requested @@ -200,6 +200,11 @@ if(NOT CMAKE_CUDA_DEVICE_LINK_EXECUTABLE) " ${_CMAKE_CUDA_EXTRA_FLAGS} ${CMAKE_CUDA_COMPILE_OPTIONS_PIC} ${_CMAKE_CUDA_EXTRA_DEVICE_LINK_FLAGS} -shared -dlink -o ${__IMPLICT_DLINK_FLAGS}") endif() +# Used when device linking is handled by CMake. +if(NOT CMAKE_CUDA_DEVICE_LINK_COMPILE) + set(CMAKE_CUDA_DEVICE_LINK_COMPILE " ${_CMAKE_CUDA_EXTRA_FLAGS} -D__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ -D__NV_EXTRA_INITIALIZATION=\"\" -D__NV_EXTRA_FINALIZATION=\"\" -DREGISTERLINKBINARYFILE=\\\"\\\" -DFATBINFILE=\\\"\\\" ${_CMAKE_COMPILE_AS_CUDA_FLAG} -c \"${CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT}/bin/crt/link.stub\" -o ") +endif() + unset(__IMPLICT_DLINK_FLAGS) set(CMAKE_CUDA_INFORMATION_LOADED 1) diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake index e60a973..9220551 100644 --- a/Modules/CMakeDetermineCUDACompiler.cmake +++ b/Modules/CMakeDetermineCUDACompiler.cmake @@ -169,11 +169,14 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) endif() get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${_CUDA_NVCC_EXECUTABLE}" DIRECTORY) + set(CMAKE_CUDA_DEVICE_LINKER "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/nvlink${CMAKE_EXECUTABLE_SUFFIX}") + set(CMAKE_CUDA_FATBINARY "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/fatbinary${CMAKE_EXECUTABLE_SUFFIX}") get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY) - # CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file. - # In a non-scattered installation this is equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT. + # In a non-scattered installation the following are equivalent to CMAKE_CUDA_COMPILER_TOOLKIT_ROOT. # We first check for a non-scattered installation to prefer it over a scattered installation. + + # CMAKE_CUDA_COMPILER_LIBRARY_ROOT contains the device library and version file. if(EXISTS "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}/version.txt") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}") elseif(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/cuda/version.txt") @@ -181,6 +184,15 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/cuda/version.txt") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/cuda") endif() + + # CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT contains the linking stubs necessary for device linking and other low-level library files. + if(CMAKE_SYSROOT_LINK AND EXISTS "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub") + set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT_LINK}/usr/lib/nvidia-cuda-toolkit") + elseif(EXISTS "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub") + set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_SYSROOT}/usr/lib/nvidia-cuda-toolkit") + else() + set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}") + endif() endif() set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v") diff --git a/Modules/Compiler/Clang-CUDA.cmake b/Modules/Compiler/Clang-CUDA.cmake index 336827b..fd8c2b7 100644 --- a/Modules/Compiler/Clang-CUDA.cmake +++ b/Modules/Compiler/Clang-CUDA.cmake @@ -13,6 +13,7 @@ __compiler_clang_cxx_standards(CUDA) set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE) set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda") set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S") +set(_CMAKE_CUDA_DEVICE_CODE "-fgpu-rdc -c") # RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE__COMPILER. Override the default. set(CMAKE_CUDA_LINK_EXECUTABLE " -o ${__IMPLICT_LINKS}") diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake index 3187294..7c24373 100644 --- a/Modules/Compiler/NVIDIA-CUDA.cmake +++ b/Modules/Compiler/NVIDIA-CUDA.cmake @@ -6,6 +6,7 @@ set(CMAKE_CUDA_VERBOSE_COMPILE_FLAG "-Xcompiler=-v") set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu") set(_CMAKE_CUDA_PTX_FLAG "-ptx") +set(_CMAKE_CUDA_DEVICE_CODE "-dc") if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89) # The -forward-unknown-to-host-compiler flag was only diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx index 47931b0..4e6010c 100644 --- a/Source/cmLocalGenerator.cxx +++ b/Source/cmLocalGenerator.cxx @@ -1955,17 +1955,6 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags, } else if (lang == "CUDA") { target->AddCUDAArchitectureFlags(flags); target->AddCUDAToolkitFlags(flags); - - if (compiler == "Clang") { - bool separable = target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION"); - - if (separable) { - this->Makefile->IssueMessage( - MessageType::FATAL_ERROR, - "CUDA_SEPARABLE_COMPILATION isn't supported on Clang. " - "See CMake issue #20726."); - } - } } else if (lang == "ISPC") { target->AddISPCTargetFlags(flags); } diff --git a/Source/cmLocalGenerator.h b/Source/cmLocalGenerator.h index fad6136..22d3599 100644 --- a/Source/cmLocalGenerator.h +++ b/Source/cmLocalGenerator.h @@ -446,7 +446,7 @@ public: void GetTargetCompileFlags(cmGeneratorTarget* target, std::string const& config, std::string const& lang, std::string& flags, - std::string const& arch = std::string()); + std::string const& arch); std::vector> GetTargetCompileFlags( cmGeneratorTarget* target, std::string const& config, std::string const& lang, std::string const& arch = std::string()); diff --git a/Source/cmMakefileExecutableTargetGenerator.cxx b/Source/cmMakefileExecutableTargetGenerator.cxx index 9b5c6e6..871878c 100644 --- a/Source/cmMakefileExecutableTargetGenerator.cxx +++ b/Source/cmMakefileExecutableTargetGenerator.cxx @@ -91,19 +91,12 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule( std::vector commands; - // Get the language to use for linking this library. - std::string linkLanguage = "CUDA"; + // Get the name of the device object to generate. std::string const& objExt = this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION"); - - // Build list of dependencies. - std::vector depends; - this->AppendLinkDepends(depends, linkLanguage); - - // Get the name of the device object to generate. - std::string const targetOutputReal = + std::string const targetOutput = this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt; - this->DeviceLinkObject = targetOutputReal; + this->DeviceLinkObject = targetOutput; this->NumberOfProgressActions++; if (!this->NoRuleMessages) { @@ -111,7 +104,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule( this->MakeEchoProgress(progress); // Add the link message. std::string buildEcho = - cmStrCat("Linking ", linkLanguage, " device code ", + cmStrCat("Linking CUDA device code ", this->LocalGenerator->ConvertToOutputFormat( this->LocalGenerator->MaybeConvertToRelativePath( this->LocalGenerator->GetCurrentBinaryDirectory(), @@ -121,6 +114,29 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule( commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress); } + if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") { + this->WriteDeviceLinkRule(commands, targetOutput); + } else { + this->WriteNvidiaDeviceExecutableRule(relink, commands, targetOutput); + } + + // Write the main driver rule to build everything in this target. + this->WriteTargetDriverRule(targetOutput, relink); +#else + static_cast(relink); +#endif +} + +void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule( + bool relink, std::vector& commands, + const std::string& targetOutput) +{ + const std::string linkLanguage = "CUDA"; + + // Build list of dependencies. + std::vector depends; + this->AppendLinkDepends(depends, linkLanguage); + // Build a list of compiler flags and linker flags. std::string langFlags; std::string linkFlags; @@ -136,7 +152,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule( // may need to be cleaned. std::vector exeCleanFiles; exeCleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath( - this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal)); + this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput)); // Determine whether a link script will be used. bool useLinkScript = this->GlobalGenerator->GetUseLinkScript(); @@ -195,7 +211,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule( : cmOutputConverter::SHELL; std::string target = this->LocalGenerator->ConvertToOutputFormat( this->LocalGenerator->MaybeConvertToRelativePath( - this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal), + this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput), output); std::string targetFullPathCompilePDB = @@ -226,7 +242,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule( this->LocalGenerator->CreateRulePlaceholderExpander()); // Expand placeholders in the commands. - rulePlaceholderExpander->SetTargetImpLib(targetOutputReal); + rulePlaceholderExpander->SetTargetImpLib(targetOutput); for (std::string& real_link_command : real_link_commands) { real_link_command = cmStrCat(launcher, real_link_command); rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator, @@ -255,17 +271,10 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule( // Write the build rule. this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, - targetOutputReal, depends, commands, - false); - - // Write the main driver rule to build everything in this target. - this->WriteTargetDriverRule(targetOutputReal, relink); + targetOutput, depends, commands, false); // Clean all the possible executable names and symlinks. this->CleanFiles.insert(exeCleanFiles.begin(), exeCleanFiles.end()); -#else - static_cast(relink); -#endif } void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink) diff --git a/Source/cmMakefileExecutableTargetGenerator.h b/Source/cmMakefileExecutableTargetGenerator.h index 782692a..520f577 100644 --- a/Source/cmMakefileExecutableTargetGenerator.h +++ b/Source/cmMakefileExecutableTargetGenerator.h @@ -5,6 +5,7 @@ #include "cmConfigure.h" // IWYU pragma: keep #include +#include #include "cmMakefileTargetGenerator.h" @@ -23,6 +24,9 @@ public: protected: virtual void WriteExecutableRule(bool relink); virtual void WriteDeviceExecutableRule(bool relink); + virtual void WriteNvidiaDeviceExecutableRule( + bool relink, std::vector& commands, + const std::string& targetOutput); private: std::string DeviceLinkObject; diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx index 1c25fc4..b32ea6a 100644 --- a/Source/cmMakefileLibraryTargetGenerator.cxx +++ b/Source/cmMakefileLibraryTargetGenerator.cxx @@ -129,8 +129,7 @@ void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules() const bool requiresDeviceLinking = requireDeviceLinking( *this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName()); if (requiresDeviceLinking) { - std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY"; - this->WriteDeviceLibraryRules(linkRuleVar, false); + this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", false); } std::string linkLanguage = @@ -156,8 +155,7 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink) const bool requiresDeviceLinking = requireDeviceLinking( *this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName()); if (requiresDeviceLinking) { - std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY"; - this->WriteDeviceLibraryRules(linkRuleVar, relink); + this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", relink); } } @@ -191,8 +189,7 @@ void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink) const bool requiresDeviceLinking = requireDeviceLinking( *this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName()); if (requiresDeviceLinking) { - std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY"; - this->WriteDeviceLibraryRules(linkRuleVar, relink); + this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", relink); } } @@ -239,29 +236,13 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules( // TODO: Merge the methods that call this method to avoid // code duplication. std::vector commands; - - // Get the language to use for linking this library. - std::string linkLanguage = "CUDA"; std::string const objExt = this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION"); - // Build list of dependencies. - std::vector depends; - this->AppendLinkDepends(depends, linkLanguage); - - // Add language-specific flags. - std::string langFlags; - this->LocalGenerator->AddLanguageFlagsForLinking( - langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName()); - - // Create set of linking flags. - std::string linkFlags; - this->GetDeviceLinkFlags(linkFlags, linkLanguage); - // Get the name of the device object to generate. - std::string const targetOutputReal = + std::string const targetOutput = this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt; - this->DeviceLinkObject = targetOutputReal; + this->DeviceLinkObject = targetOutput; this->NumberOfProgressActions++; if (!this->NoRuleMessages) { @@ -269,7 +250,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules( this->MakeEchoProgress(progress); // Add the link message. std::string buildEcho = - cmStrCat("Linking ", linkLanguage, " device code ", + cmStrCat("Linking CUDA device code ", this->LocalGenerator->ConvertToOutputFormat( this->LocalGenerator->MaybeConvertToRelativePath( this->LocalGenerator->GetCurrentBinaryDirectory(), @@ -278,10 +259,41 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules( this->LocalGenerator->AppendEcho( commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress); } + + if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") { + this->WriteDeviceLinkRule(commands, targetOutput); + } else { + this->WriteNvidiaDeviceLibraryRules(linkRuleVar, relink, commands, + targetOutput); + } + + // Write the main driver rule to build everything in this target. + this->WriteTargetDriverRule(targetOutput, relink); +} + +void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules( + const std::string& linkRuleVar, bool relink, + std::vector& commands, const std::string& targetOutput) +{ + std::string linkLanguage = "CUDA"; + + // Build list of dependencies. + std::vector depends; + this->AppendLinkDepends(depends, linkLanguage); + + // Add language-specific flags. + std::string langFlags; + this->LocalGenerator->AddLanguageFlagsForLinking( + langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName()); + + // Create set of linking flags. + std::string linkFlags; + this->GetDeviceLinkFlags(linkFlags, linkLanguage); + // Clean files associated with this library. std::set libCleanFiles; libCleanFiles.insert(this->LocalGenerator->MaybeConvertToRelativePath( - this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal)); + this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput)); // Determine whether a link script will be used. bool useLinkScript = this->GlobalGenerator->GetUseLinkScript(); @@ -335,7 +347,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules( std::string target = this->LocalGenerator->ConvertToOutputFormat( this->LocalGenerator->MaybeConvertToRelativePath( - this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal), + this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput), output); std::string targetFullPathCompilePDB = @@ -364,7 +376,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules( this->LocalGenerator->CreateRulePlaceholderExpander()); // Construct the main link rule and expand placeholders. - rulePlaceholderExpander->SetTargetImpLib(targetOutputReal); + rulePlaceholderExpander->SetTargetImpLib(targetOutput); std::string linkRule = this->GetLinkRule(linkRuleVar); cmExpandList(linkRule, real_link_commands); @@ -399,14 +411,11 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules( commands1.clear(); // Compute the list of outputs. - std::vector outputs(1, targetOutputReal); + std::vector outputs(1, targetOutput); // Write the build rule. this->WriteMakeRule(*this->BuildFileStream, nullptr, outputs, depends, commands, false); - - // Write the main driver rule to build everything in this target. - this->WriteTargetDriverRule(targetOutputReal, relink); #else static_cast(linkRuleVar); static_cast(relink); diff --git a/Source/cmMakefileLibraryTargetGenerator.h b/Source/cmMakefileLibraryTargetGenerator.h index 6a38e18..cc989e7 100644 --- a/Source/cmMakefileLibraryTargetGenerator.h +++ b/Source/cmMakefileLibraryTargetGenerator.h @@ -5,6 +5,7 @@ #include "cmConfigure.h" // IWYU pragma: keep #include +#include #include "cmMakefileTargetGenerator.h" @@ -27,6 +28,10 @@ protected: void WriteModuleLibraryRules(bool relink); void WriteDeviceLibraryRules(const std::string& linkRule, bool relink); + void WriteNvidiaDeviceLibraryRules(const std::string& linkRuleVar, + bool relink, + std::vector& commands, + const std::string& targetOutput); void WriteLibraryRules(const std::string& linkRule, const std::string& extraFlags, bool relink); // MacOSX Framework support methods diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx index e1fe0e5..5f97d86 100644 --- a/Source/cmMakefileTargetGenerator.cxx +++ b/Source/cmMakefileTargetGenerator.cxx @@ -2,10 +2,13 @@ file Copyright.txt or https://cmake.org/licensing for details. */ #include "cmMakefileTargetGenerator.h" +#include #include #include +#include #include #include +#include #include #include @@ -25,6 +28,7 @@ #include "cmMakefileExecutableTargetGenerator.h" #include "cmMakefileLibraryTargetGenerator.h" #include "cmMakefileUtilityTargetGenerator.h" +#include "cmMessageType.h" #include "cmOutputConverter.h" #include "cmPolicies.h" #include "cmProperty.h" @@ -1323,6 +1327,130 @@ void cmMakefileTargetGenerator::WriteObjectDependRules( } } +void cmMakefileTargetGenerator::WriteDeviceLinkRule( + std::vector& commands, const std::string& output) +{ + std::string architecturesStr = + this->GeneratorTarget->GetSafeProperty("CUDA_ARCHITECTURES"); + + if (cmIsOff(architecturesStr)) { + this->Makefile->IssueMessage(MessageType::FATAL_ERROR, + "CUDA_SEPARABLE_COMPILATION on Clang " + "requires CUDA_ARCHITECTURES to be set."); + return; + } + + std::vector architectures = cmExpandedList(architecturesStr); + + // Ensure there are no duplicates. + const std::vector linkDeps = [&]() -> std::vector { + std::vector deps; + this->AppendTargetDepends(deps, true); + this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA"); + std::copy(this->Objects.begin(), this->Objects.end(), + std::back_inserter(deps)); + + std::unordered_set depsSet(deps.begin(), deps.end()); + deps.clear(); + std::copy(depsSet.begin(), depsSet.end(), std::back_inserter(deps)); + return deps; + }(); + + const std::string objectDir = this->GeneratorTarget->ObjectDirectory; + const std::string relObjectDir = + this->LocalGenerator->MaybeConvertToRelativePath( + this->LocalGenerator->GetCurrentBinaryDirectory(), objectDir); + + // Construct a list of files associated with this executable that + // may need to be cleaned. + std::vector cleanFiles; + cleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath( + this->LocalGenerator->GetCurrentBinaryDirectory(), output)); + + std::string profiles; + std::vector fatbinaryDepends; + std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h"); + + // Link device code for each architecture. + for (const std::string& architectureKind : architectures) { + // Clang always generates real code, so strip the specifier. + const std::string architecture = + architectureKind.substr(0, architectureKind.find('-')); + const std::string cubin = + cmStrCat(relObjectDir, "sm_", architecture, ".cubin"); + + profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); + fatbinaryDepends.emplace_back(cubin); + + std::string registerFileCmd; + + // The generated register file contains macros that when expanded register + // the device routines. Because the routines are the same for all + // architectures the register file will be the same too. Thus generate it + // only on the first invocation to reduce overhead. + if (fatbinaryDepends.size() == 1) { + std::string registerFileRel = + this->LocalGenerator->MaybeConvertToRelativePath( + this->LocalGenerator->GetCurrentBinaryDirectory(), registerFile); + registerFileCmd = + cmStrCat(" --register-link-binaries=", registerFileRel); + cleanFiles.push_back(registerFileRel); + } + + std::string command = cmStrCat( + this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"), + " -arch=sm_", architecture, registerFileCmd, " -o=$@ ", + cmJoin(linkDeps, " ")); + + this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, cubin, + linkDeps, { command }, false); + } + + // Combine all architectures into a single fatbinary. + const std::string fatbinaryCommand = + cmStrCat(this->Makefile->GetRequiredDefinition("CMAKE_CUDA_FATBINARY"), + " -64 -cmdline=--compile-only -compress-all -link " + "--embedded-fatbin=$@", + profiles); + const std::string fatbinaryOutput = + cmStrCat(objectDir, "cmake_cuda_fatbin.h"); + const std::string fatbinaryOutputRel = + this->LocalGenerator->MaybeConvertToRelativePath( + this->LocalGenerator->GetCurrentBinaryDirectory(), fatbinaryOutput); + + this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, + fatbinaryOutputRel, fatbinaryDepends, + { fatbinaryCommand }, false); + + // Compile the stub that registers the kernels and contains the fatbinaries. + cmRulePlaceholderExpander::RuleVariables vars; + vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str(); + vars.CMTargetType = + cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()).c_str(); + + vars.Language = "CUDA"; + vars.Object = output.c_str(); + vars.Fatbinary = fatbinaryOutput.c_str(); + vars.RegisterFile = registerFile.c_str(); + + std::string flags = this->GetFlags("CUDA", this->GetConfigName()); + vars.Flags = flags.c_str(); + + std::string compileCmd = this->GetLinkRule("CMAKE_CUDA_DEVICE_LINK_COMPILE"); + std::unique_ptr rulePlaceholderExpander( + this->LocalGenerator->CreateRulePlaceholderExpander()); + rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator, + compileCmd, vars); + + commands.emplace_back(compileCmd); + this->LocalGenerator->WriteMakeRule( + *this->BuildFileStream, nullptr, output, + { cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false); + + // Clean all the possible executable names and symlinks. + this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end()); +} + void cmMakefileTargetGenerator::GenerateCustomRuleFile( cmCustomCommandGenerator const& ccg) { @@ -1579,10 +1707,11 @@ void cmMakefileTargetGenerator::WriteTargetDriverRule( } void cmMakefileTargetGenerator::AppendTargetDepends( - std::vector& depends) + std::vector& depends, bool ignoreType) { // Static libraries never depend on anything for linking. - if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY) { + if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY && + !ignoreType) { return; } diff --git a/Source/cmMakefileTargetGenerator.h b/Source/cmMakefileTargetGenerator.h index 1740d54..cb804e0 100644 --- a/Source/cmMakefileTargetGenerator.h +++ b/Source/cmMakefileTargetGenerator.h @@ -104,6 +104,10 @@ protected: void WriteObjectDependRules(cmSourceFile const& source, std::vector& depends); + // CUDA device linking. + void WriteDeviceLinkRule(std::vector& commands, + const std::string& output); + // write the build rule for a custom command void GenerateCustomRuleFile(cmCustomCommandGenerator const& ccg); @@ -127,7 +131,8 @@ protected: void DriveCustomCommands(std::vector& depends); // append intertarget dependencies - void AppendTargetDepends(std::vector& depends); + void AppendTargetDepends(std::vector& depends, + bool ignoreType = false); // Append object file dependencies. void AppendObjectDepends(std::vector& depends); diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx index 210b36e..ccb959b 100644 --- a/Source/cmNinjaNormalTargetGenerator.cxx +++ b/Source/cmNinjaNormalTargetGenerator.cxx @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,7 @@ #include "cmLocalGenerator.h" #include "cmLocalNinjaGenerator.h" #include "cmMakefile.h" +#include "cmMessageType.h" #include "cmNinjaLinkLineDeviceComputer.h" #include "cmNinjaTypes.h" #include "cmOSXBundleGenerator.h" @@ -178,6 +180,33 @@ std::string cmNinjaNormalTargetGenerator::LanguageLinkerDeviceRule( "_", config); } +std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaDeviceRule( + const std::string& config) const +{ + return cmStrCat( + this->TargetLinkLanguage(config), "_DEVICE_LINK__", + cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()), + '_', config); +} + +std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaDeviceCompileRule( + const std::string& config) const +{ + return cmStrCat( + this->TargetLinkLanguage(config), "_DEVICE_LINK_COMPILE__", + cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()), + '_', config); +} + +std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaFatbinaryRule( + const std::string& config) const +{ + return cmStrCat( + this->TargetLinkLanguage(config), "_FATBINARY__", + cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()), + '_', config); +} + struct cmNinjaRemoveNoOpCommands { bool operator()(std::string const& cmd) @@ -186,7 +215,7 @@ struct cmNinjaRemoveNoOpCommands } }; -void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule( +void cmNinjaNormalTargetGenerator::WriteNvidiaDeviceLinkRule( bool useResponseFile, const std::string& config) { cmNinjaRule rule(this->LanguageLinkerDeviceRule(config)); @@ -272,6 +301,55 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule( } } +void cmNinjaNormalTargetGenerator::WriteDeviceLinkRules( + const std::string& config) +{ + const cmMakefile* mf = this->GetMakefile(); + + cmNinjaRule rule(LanguageLinkerCudaDeviceRule(config)); + rule.Command = this->GetLocalGenerator()->BuildCommandLine( + { cmStrCat(mf->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"), + " -arch=$ARCH $REGISTER -o=$out $in") }); + rule.Comment = "Rule for CUDA device linking."; + rule.Description = "Linking CUDA $out"; + this->GetGlobalGenerator()->AddRule(rule); + + cmRulePlaceholderExpander::RuleVariables vars; + vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str(); + vars.CMTargetType = + cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()).c_str(); + + vars.Language = "CUDA"; + vars.Object = "$out"; + vars.Fatbinary = "$FATBIN"; + vars.RegisterFile = "$REGISTER"; + + std::string flags = this->GetFlags("CUDA", config); + vars.Flags = flags.c_str(); + + std::string compileCmd = this->GetMakefile()->GetRequiredDefinition( + "CMAKE_CUDA_DEVICE_LINK_COMPILE"); + std::unique_ptr rulePlaceholderExpander( + this->GetLocalGenerator()->CreateRulePlaceholderExpander()); + rulePlaceholderExpander->ExpandRuleVariables(this->GetLocalGenerator(), + compileCmd, vars); + + rule.Name = LanguageLinkerCudaDeviceCompileRule(config); + rule.Command = this->GetLocalGenerator()->BuildCommandLine({ compileCmd }); + rule.Comment = "Rule for compiling CUDA device stubs."; + rule.Description = "Compiling CUDA device stub $out"; + this->GetGlobalGenerator()->AddRule(rule); + + rule.Name = LanguageLinkerCudaFatbinaryRule(config); + rule.Command = this->GetLocalGenerator()->BuildCommandLine( + { cmStrCat(mf->GetRequiredDefinition("CMAKE_CUDA_FATBINARY"), + " -64 -cmdline=--compile-only -compress-all -link " + "--embedded-fatbin=$out $PROFILES") }); + rule.Comment = "Rule for CUDA fatbinaries."; + rule.Description = "Creating fatbinary $out"; + this->GetGlobalGenerator()->AddRule(rule); +} + void cmNinjaNormalTargetGenerator::WriteLinkRule(bool useResponseFile, const std::string& config) { @@ -586,7 +664,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement( // First and very important step is to make sure while inside this // step our link language is set to CUDA - std::string cudaLinkLanguage = "CUDA"; std::string const& objExt = this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION"); @@ -598,6 +675,118 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement( std::string targetOutputReal = ConvertToNinjaPath(targetOutputDir + "cmake_device_link" + objExt); + if (firstForConfig) { + globalGen->GetByproductsForCleanTarget(config).push_back(targetOutputReal); + } + this->DeviceLinkObject = targetOutputReal; + + // Write comments. + cmGlobalNinjaGenerator::WriteDivider(this->GetCommonFileStream()); + this->GetCommonFileStream() + << "# Device Link build statements for " + << cmState::GetTargetTypeName(genTarget->GetType()) << " target " + << this->GetTargetName() << "\n\n"; + + if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") { + std::string architecturesStr = + this->GeneratorTarget->GetSafeProperty("CUDA_ARCHITECTURES"); + + if (cmIsOff(architecturesStr)) { + this->Makefile->IssueMessage(MessageType::FATAL_ERROR, + "CUDA_SEPARABLE_COMPILATION on Clang " + "requires CUDA_ARCHITECTURES to be set."); + return; + } + + this->WriteDeviceLinkRules(config); + this->WriteDeviceLinkStatements(config, cmExpandedList(architecturesStr), + targetOutputReal); + } else { + this->WriteNvidiaDeviceLinkStatement(config, fileConfig, targetOutputDir, + targetOutputReal); + } +} + +void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( + const std::string& config, const std::vector& architectures, + const std::string& output) +{ + // Ensure there are no duplicates. + const cmNinjaDeps explicitDeps = [&]() -> std::vector { + std::unordered_set depsSet; + const cmNinjaDeps linkDeps = + this->ComputeLinkDeps(this->TargetLinkLanguage(config), config, true); + const cmNinjaDeps objects = this->GetObjects(config); + depsSet.insert(linkDeps.begin(), linkDeps.end()); + depsSet.insert(objects.begin(), objects.end()); + + std::vector deps; + std::copy(depsSet.begin(), depsSet.end(), std::back_inserter(deps)); + return deps; + }(); + + const std::string objectDir = + cmStrCat(this->GeneratorTarget->GetSupportDirectory(), + this->GetGlobalGenerator()->ConfigDirectory(config)); + const std::string ninjaOutputDir = this->ConvertToNinjaPath(objectDir); + + cmNinjaBuild fatbinary(LanguageLinkerCudaFatbinaryRule(config)); + + // Link device code for each architecture. + for (const std::string& architectureKind : architectures) { + // Clang always generates real code, so strip the specifier. + const std::string architecture = + architectureKind.substr(0, architectureKind.find('-')); + const std::string cubin = + cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin"); + + fatbinary.Variables["PROFILES"] += + cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); + fatbinary.ExplicitDeps.emplace_back(cubin); + + cmNinjaBuild dlink(LanguageLinkerCudaDeviceRule(config)); + dlink.ExplicitDeps = explicitDeps; + dlink.Outputs = { cubin }; + dlink.Variables["ARCH"] = cmStrCat("sm_", architecture); + + // The generated register file contains macros that when expanded register + // the device routines. Because the routines are the same for all + // architectures the register file will be the same too. Thus generate it + // only on the first invocation to reduce overhead. + if (fatbinary.ExplicitDeps.size() == 1) { + dlink.Variables["REGISTER"] = cmStrCat( + "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h"); + } + + this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink); + } + + // Combine all architectures into a single fatbinary. + fatbinary.Outputs = { cmStrCat(ninjaOutputDir, "/cmake_cuda_fatbin.h") }; + this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), + fatbinary); + + // Compile the stub that registers the kernels and contains the fatbinaries. + cmNinjaBuild dcompile(LanguageLinkerCudaDeviceCompileRule(config)); + dcompile.Outputs = { output }; + dcompile.ExplicitDeps = { cmStrCat(ninjaOutputDir, "/cmake_cuda_fatbin.h") }; + dcompile.Variables["FATBIN"] = + this->GetLocalGenerator()->ConvertToOutputFormat( + cmStrCat(objectDir, "/cmake_cuda_fatbin.h"), cmOutputConverter::SHELL); + dcompile.Variables["REGISTER"] = + this->GetLocalGenerator()->ConvertToOutputFormat( + cmStrCat(objectDir, "/cmake_cuda_register.h"), cmOutputConverter::SHELL); + this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), + dcompile); +} + +void cmNinjaNormalTargetGenerator::WriteNvidiaDeviceLinkStatement( + const std::string& config, const std::string& fileConfig, + const std::string& outputDir, const std::string& output) +{ + cmGeneratorTarget* genTarget = this->GetGeneratorTarget(); + cmGlobalNinjaGenerator* globalGen = this->GetGlobalGenerator(); + std::string targetOutputImplib = ConvertToNinjaPath( genTarget->GetFullPath(config, cmStateEnums::ImportLibraryArtifact)); @@ -606,8 +795,8 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement( cmStrCat(this->GetLocalGenerator()->GetTargetDirectory(genTarget), globalGen->ConfigDirectory(fileConfig), "/"); targetOutputFileConfigDir = - globalGen->ExpandCFGIntDir(targetOutputDir, fileConfig); - if (targetOutputDir == targetOutputFileConfigDir) { + globalGen->ExpandCFGIntDir(outputDir, fileConfig); + if (outputDir == targetOutputFileConfigDir) { return; } @@ -623,27 +812,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement( } } - if (firstForConfig) { - globalGen->GetByproductsForCleanTarget(config).push_back(targetOutputReal); - } - this->DeviceLinkObject = targetOutputReal; - - // Write comments. - cmGlobalNinjaGenerator::WriteDivider(this->GetCommonFileStream()); - const cmStateEnums::TargetType targetType = genTarget->GetType(); - this->GetCommonFileStream() << "# Device Link build statements for " - << cmState::GetTargetTypeName(targetType) - << " target " << this->GetTargetName() << "\n\n"; - // Compute the comment. cmNinjaBuild build(this->LanguageLinkerDeviceRule(config)); build.Comment = - cmStrCat("Link the ", this->GetVisibleTypeName(), ' ', targetOutputReal); + cmStrCat("Link the ", this->GetVisibleTypeName(), ' ', output); cmNinjaVars& vars = build.Variables; // Compute outputs. - build.Outputs.push_back(targetOutputReal); + build.Outputs.push_back(output); // Compute specific libraries to link with. build.ExplicitDeps = this->GetObjects(config); build.ImplicitDeps = @@ -659,7 +836,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement( cmLocalNinjaGenerator& localGen = *this->GetLocalGenerator(); vars["TARGET_FILE"] = - localGen.ConvertToOutputFormat(targetOutputReal, cmOutputConverter::SHELL); + localGen.ConvertToOutputFormat(output, cmOutputConverter::SHELL); std::unique_ptr linkLineComputer( new cmNinjaLinkLineDeviceComputer( @@ -683,8 +860,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement( // Compute language specific link flags. std::string langFlags; - localGen.AddLanguageFlagsForLinking(langFlags, genTarget, cudaLinkLanguage, - config); + localGen.AddLanguageFlagsForLinking(langFlags, genTarget, "CUDA", config); vars["LANGUAGE_COMPILE_FLAGS"] = langFlags; auto const tgtNames = this->TargetNames(config); @@ -692,7 +868,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement( vars["SONAME_FLAG"] = this->GetMakefile()->GetSONameFlag(this->TargetLinkLanguage(config)); vars["SONAME"] = tgtNames.SharedObject; - if (targetType == cmStateEnums::SHARED_LIBRARY) { + if (genTarget->GetType() == cmStateEnums::SHARED_LIBRARY) { std::string install_dir = this->GetGeneratorTarget()->GetInstallNameDirForBuildTree(config); if (!install_dir.empty()) { @@ -731,7 +907,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement( // do not check if the user has explicitly forced a response file. int const commandLineLengthLimit = static_cast(cmSystemTools::CalculateCommandLineLengthLimit()) - - globalGen->GetRuleCmdLength(this->LanguageLinkerDeviceRule(config)); + globalGen->GetRuleCmdLength(build.Rule); build.RspFile = this->ConvertToNinjaPath( cmStrCat("CMakeFiles/", genTarget->GetName(), @@ -746,7 +922,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement( bool usedResponseFile = false; globalGen->WriteBuild(this->GetCommonFileStream(), build, commandLineLengthLimit, &usedResponseFile); - this->WriteDeviceLinkRule(usedResponseFile, config); + this->WriteNvidiaDeviceLinkRule(usedResponseFile, config); } void cmNinjaNormalTargetGenerator::WriteLinkStatement( diff --git a/Source/cmNinjaNormalTargetGenerator.h b/Source/cmNinjaNormalTargetGenerator.h index 25e40d0..ffc405c 100644 --- a/Source/cmNinjaNormalTargetGenerator.h +++ b/Source/cmNinjaNormalTargetGenerator.h @@ -21,18 +21,31 @@ public: private: std::string LanguageLinkerRule(const std::string& config) const; std::string LanguageLinkerDeviceRule(const std::string& config) const; + std::string LanguageLinkerCudaDeviceRule(const std::string& config) const; + std::string LanguageLinkerCudaDeviceCompileRule( + const std::string& config) const; + std::string LanguageLinkerCudaFatbinaryRule(const std::string& config) const; const char* GetVisibleTypeName() const; void WriteLanguagesRules(const std::string& config); void WriteLinkRule(bool useResponseFile, const std::string& config); - void WriteDeviceLinkRule(bool useResponseFile, const std::string& config); + void WriteDeviceLinkRules(const std::string& config); + void WriteNvidiaDeviceLinkRule(bool useResponseFile, + const std::string& config); void WriteLinkStatement(const std::string& config, const std::string& fileConfig, bool firstForConfig); void WriteDeviceLinkStatement(const std::string& config, const std::string& fileConfig, bool firstForConfig); + void WriteDeviceLinkStatements(const std::string& config, + const std::vector& architectures, + const std::string& output); + void WriteNvidiaDeviceLinkStatement(const std::string& config, + const std::string& fileConfig, + const std::string& outputDir, + const std::string& output); void WriteObjectLibStatement(const std::string& config); diff --git a/Source/cmNinjaTargetGenerator.cxx b/Source/cmNinjaTargetGenerator.cxx index accdcf1..04d84a0 100644 --- a/Source/cmNinjaTargetGenerator.cxx +++ b/Source/cmNinjaTargetGenerator.cxx @@ -346,11 +346,13 @@ std::string cmNinjaTargetGenerator::ComputeIncludes( } cmNinjaDeps cmNinjaTargetGenerator::ComputeLinkDeps( - const std::string& linkLanguage, const std::string& config) const + const std::string& linkLanguage, const std::string& config, + bool ignoreType) const { // Static libraries never depend on other targets for linking. - if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY || - this->GeneratorTarget->GetType() == cmStateEnums::OBJECT_LIBRARY) { + if (!ignoreType && + (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY || + this->GeneratorTarget->GetType() == cmStateEnums::OBJECT_LIBRARY)) { return cmNinjaDeps(); } @@ -1009,6 +1011,7 @@ void cmNinjaTargetGenerator::WriteObjectBuildStatements( { std::vector objectSources; this->GeneratorTarget->GetObjectSources(objectSources, config); + for (cmSourceFile const* sf : objectSources) { this->WriteObjectBuildStatement(sf, config, fileConfig, firstForConfig); } diff --git a/Source/cmNinjaTargetGenerator.h b/Source/cmNinjaTargetGenerator.h index 9d9ce60..a27c9b4 100644 --- a/Source/cmNinjaTargetGenerator.h +++ b/Source/cmNinjaTargetGenerator.h @@ -113,7 +113,8 @@ protected: /// @return the list of link dependency for the given target @a target. cmNinjaDeps ComputeLinkDeps(const std::string& linkLanguage, - const std::string& config) const; + const std::string& config, + bool ignoreType = false) const; /// @return the source file path for the given @a source. std::string GetSourceFilePath(cmSourceFile const* source) const; diff --git a/Source/cmRulePlaceholderExpander.cxx b/Source/cmRulePlaceholderExpander.cxx index 6f40ec6..f5f9c67 100644 --- a/Source/cmRulePlaceholderExpander.cxx +++ b/Source/cmRulePlaceholderExpander.cxx @@ -141,6 +141,16 @@ std::string cmRulePlaceholderExpander::ExpandRuleVariable( return replaceValues.DependencyFile; } } + if (replaceValues.Fatbinary) { + if (variable == "FATBINARY") { + return replaceValues.Fatbinary; + } + } + if (replaceValues.RegisterFile) { + if (variable == "REGISTER_FILE") { + return replaceValues.RegisterFile; + } + } if (replaceValues.Target) { if (variable == "TARGET_QUOTED") { diff --git a/Source/cmRulePlaceholderExpander.h b/Source/cmRulePlaceholderExpander.h index dfce8bb..c8d107d 100644 --- a/Source/cmRulePlaceholderExpander.h +++ b/Source/cmRulePlaceholderExpander.h @@ -64,6 +64,8 @@ public: const char* SwiftOutputFileMap; const char* SwiftSources; const char* ISPCHeader; + const char* Fatbinary; + const char* RegisterFile; }; // Expand rule variables in CMake of the type found in language rules diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt index 35ceb33..be5ccac 100644 --- a/Tests/Cuda/CMakeLists.txt +++ b/Tests/Cuda/CMakeLists.txt @@ -17,13 +17,12 @@ add_cuda_test_macro(Cuda.SeparableCompCXXOnly SeparableCompCXXOnly) add_cuda_test_macro(Cuda.Toolkit Toolkit) add_cuda_test_macro(Cuda.IncludePathNoToolkit IncludePathNoToolkit) add_cuda_test_macro(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit) +add_cuda_test_macro(Cuda.Complex CudaComplex) +add_cuda_test_macro(Cuda.ProperLinkFlags ProperLinkFlags) -# Separable compilation is currently only supported on NVCC. Disable tests -# using it for other compilers. if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") - add_cuda_test_macro(Cuda.Complex CudaComplex) + # Clang lacks __CUDACC_VER*__ defines. add_cuda_test_macro(Cuda.ProperDeviceLibraries ProperDeviceLibraries) - add_cuda_test_macro(Cuda.ProperLinkFlags ProperLinkFlags) endif() # The CUDA only ships the shared version of the toolkit libraries diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index 7376a73..033f197 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -12,33 +12,31 @@ add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusT add_cuda_test_macro(CudaOnly.Standard98 CudaOnlyStandard98) add_cuda_test_macro(CudaOnly.Toolkit CudaOnlyToolkit) add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs) +add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine) +add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) +add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") + # Clang doesn't have flags for selecting the runtime. add_cuda_test_macro(CudaOnly.SharedRuntimeViaCUDAFlags CudaOnlySharedRuntimeViaCUDAFlags) - # Separable compilation is currently only supported on NVCC. Disable tests - # using it for other compilers. - add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine) - add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) - add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) - - add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND - ${CMAKE_CTEST_COMMAND} -C $ - --build-and-test - "${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/" - "${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/" - ${build_generator_args} - --build-project DontResolveDeviceSymbols - --build-options ${build_options} - --test-command ${CMAKE_CTEST_COMMAND} -V -C $ - ) - set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND - PROPERTY LABELS "CUDA") - # Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode. add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) endif() +add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND + ${CMAKE_CTEST_COMMAND} -C $ + --build-and-test + "${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/" + "${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/" + ${build_generator_args} + --build-project DontResolveDeviceSymbols + --build-options ${build_options} + --test-command ${CMAKE_CTEST_COMMAND} -V -C $ +) +set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND + PROPERTY LABELS "CUDA") + # The CUDA only ships the shared version of the toolkit libraries # on windows if(NOT WIN32) -- cgit v0.12