summaryrefslogtreecommitdiffstats
path: root/Source
diff options
context:
space:
mode:
authorRaul Tambre <raul@tambre.ee>2020-09-05 16:40:02 (GMT)
committerBrad King <brad.king@kitware.com>2020-09-24 19:19:54 (GMT)
commitc63fe018353cf6afb30980c4cac7493be7cd0a82 (patch)
tree68d2daf0cd8ab91a9feaa49392607c6cfecd2ac4 /Source
parentc98ec731f90eb0180c89108b7d2e42263b66d1ed (diff)
downloadCMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.zip
CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.tar.gz
CMake-c63fe018353cf6afb30980c4cac7493be7cd0a82.tar.bz2
CUDA: Clang separable compilation
For NVCC the compiler takes care of device linking when passed the "-dlink" flag. Clang doesn't support such magic and requires the buildsystem to do the work that NVCC does behind the scenes. The implementation is based on Bazel's device linking documentation: https://github.com/tensorflow/tensorflow/blob/7cabcdf073abad8c46e9dda62bb8fa4682d2061e/third_party/nccl/build_defs.bzl.tpl#L259 Closes: #20726
Diffstat (limited to 'Source')
-rw-r--r--Source/cmLocalGenerator.cxx11
-rw-r--r--Source/cmLocalGenerator.h2
-rw-r--r--Source/cmMakefileExecutableTargetGenerator.cxx53
-rw-r--r--Source/cmMakefileExecutableTargetGenerator.h4
-rw-r--r--Source/cmMakefileLibraryTargetGenerator.cxx73
-rw-r--r--Source/cmMakefileLibraryTargetGenerator.h5
-rw-r--r--Source/cmMakefileTargetGenerator.cxx133
-rw-r--r--Source/cmMakefileTargetGenerator.h7
-rw-r--r--Source/cmNinjaNormalTargetGenerator.cxx224
-rw-r--r--Source/cmNinjaNormalTargetGenerator.h15
-rw-r--r--Source/cmNinjaTargetGenerator.cxx9
-rw-r--r--Source/cmNinjaTargetGenerator.h3
-rw-r--r--Source/cmRulePlaceholderExpander.cxx10
-rw-r--r--Source/cmRulePlaceholderExpander.h2
14 files changed, 453 insertions, 98 deletions
diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx
index 47931b0..4e6010c 100644
--- a/Source/cmLocalGenerator.cxx
+++ b/Source/cmLocalGenerator.cxx
@@ -1955,17 +1955,6 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags,
} else if (lang == "CUDA") {
target->AddCUDAArchitectureFlags(flags);
target->AddCUDAToolkitFlags(flags);
-
- if (compiler == "Clang") {
- bool separable = target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION");
-
- if (separable) {
- this->Makefile->IssueMessage(
- MessageType::FATAL_ERROR,
- "CUDA_SEPARABLE_COMPILATION isn't supported on Clang. "
- "See CMake issue #20726.");
- }
- }
} else if (lang == "ISPC") {
target->AddISPCTargetFlags(flags);
}
diff --git a/Source/cmLocalGenerator.h b/Source/cmLocalGenerator.h
index fad6136..22d3599 100644
--- a/Source/cmLocalGenerator.h
+++ b/Source/cmLocalGenerator.h
@@ -446,7 +446,7 @@ public:
void GetTargetCompileFlags(cmGeneratorTarget* target,
std::string const& config,
std::string const& lang, std::string& flags,
- std::string const& arch = std::string());
+ std::string const& arch);
std::vector<BT<std::string>> GetTargetCompileFlags(
cmGeneratorTarget* target, std::string const& config,
std::string const& lang, std::string const& arch = std::string());
diff --git a/Source/cmMakefileExecutableTargetGenerator.cxx b/Source/cmMakefileExecutableTargetGenerator.cxx
index 9b5c6e6..871878c 100644
--- a/Source/cmMakefileExecutableTargetGenerator.cxx
+++ b/Source/cmMakefileExecutableTargetGenerator.cxx
@@ -91,19 +91,12 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
std::vector<std::string> commands;
- // Get the language to use for linking this library.
- std::string linkLanguage = "CUDA";
+ // Get the name of the device object to generate.
std::string const& objExt =
this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
-
- // Build list of dependencies.
- std::vector<std::string> depends;
- this->AppendLinkDepends(depends, linkLanguage);
-
- // Get the name of the device object to generate.
- std::string const targetOutputReal =
+ std::string const targetOutput =
this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt;
- this->DeviceLinkObject = targetOutputReal;
+ this->DeviceLinkObject = targetOutput;
this->NumberOfProgressActions++;
if (!this->NoRuleMessages) {
@@ -111,7 +104,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
this->MakeEchoProgress(progress);
// Add the link message.
std::string buildEcho =
- cmStrCat("Linking ", linkLanguage, " device code ",
+ cmStrCat("Linking CUDA device code ",
this->LocalGenerator->ConvertToOutputFormat(
this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(),
@@ -121,6 +114,29 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
}
+ if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
+ this->WriteDeviceLinkRule(commands, targetOutput);
+ } else {
+ this->WriteNvidiaDeviceExecutableRule(relink, commands, targetOutput);
+ }
+
+ // Write the main driver rule to build everything in this target.
+ this->WriteTargetDriverRule(targetOutput, relink);
+#else
+ static_cast<void>(relink);
+#endif
+}
+
+void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
+ bool relink, std::vector<std::string>& commands,
+ const std::string& targetOutput)
+{
+ const std::string linkLanguage = "CUDA";
+
+ // Build list of dependencies.
+ std::vector<std::string> depends;
+ this->AppendLinkDepends(depends, linkLanguage);
+
// Build a list of compiler flags and linker flags.
std::string langFlags;
std::string linkFlags;
@@ -136,7 +152,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
// may need to be cleaned.
std::vector<std::string> exeCleanFiles;
exeCleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
- this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal));
+ this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput));
// Determine whether a link script will be used.
bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
@@ -195,7 +211,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
: cmOutputConverter::SHELL;
std::string target = this->LocalGenerator->ConvertToOutputFormat(
this->LocalGenerator->MaybeConvertToRelativePath(
- this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal),
+ this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput),
output);
std::string targetFullPathCompilePDB =
@@ -226,7 +242,7 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
this->LocalGenerator->CreateRulePlaceholderExpander());
// Expand placeholders in the commands.
- rulePlaceholderExpander->SetTargetImpLib(targetOutputReal);
+ rulePlaceholderExpander->SetTargetImpLib(targetOutput);
for (std::string& real_link_command : real_link_commands) {
real_link_command = cmStrCat(launcher, real_link_command);
rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator,
@@ -255,17 +271,10 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
// Write the build rule.
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
- targetOutputReal, depends, commands,
- false);
-
- // Write the main driver rule to build everything in this target.
- this->WriteTargetDriverRule(targetOutputReal, relink);
+ targetOutput, depends, commands, false);
// Clean all the possible executable names and symlinks.
this->CleanFiles.insert(exeCleanFiles.begin(), exeCleanFiles.end());
-#else
- static_cast<void>(relink);
-#endif
}
void cmMakefileExecutableTargetGenerator::WriteExecutableRule(bool relink)
diff --git a/Source/cmMakefileExecutableTargetGenerator.h b/Source/cmMakefileExecutableTargetGenerator.h
index 782692a..520f577 100644
--- a/Source/cmMakefileExecutableTargetGenerator.h
+++ b/Source/cmMakefileExecutableTargetGenerator.h
@@ -5,6 +5,7 @@
#include "cmConfigure.h" // IWYU pragma: keep
#include <string>
+#include <vector>
#include "cmMakefileTargetGenerator.h"
@@ -23,6 +24,9 @@ public:
protected:
virtual void WriteExecutableRule(bool relink);
virtual void WriteDeviceExecutableRule(bool relink);
+ virtual void WriteNvidiaDeviceExecutableRule(
+ bool relink, std::vector<std::string>& commands,
+ const std::string& targetOutput);
private:
std::string DeviceLinkObject;
diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx
index 1c25fc4..b32ea6a 100644
--- a/Source/cmMakefileLibraryTargetGenerator.cxx
+++ b/Source/cmMakefileLibraryTargetGenerator.cxx
@@ -129,8 +129,7 @@ void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules()
const bool requiresDeviceLinking = requireDeviceLinking(
*this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
if (requiresDeviceLinking) {
- std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
- this->WriteDeviceLibraryRules(linkRuleVar, false);
+ this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", false);
}
std::string linkLanguage =
@@ -156,8 +155,7 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink)
const bool requiresDeviceLinking = requireDeviceLinking(
*this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
if (requiresDeviceLinking) {
- std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
- this->WriteDeviceLibraryRules(linkRuleVar, relink);
+ this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", relink);
}
}
@@ -191,8 +189,7 @@ void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink)
const bool requiresDeviceLinking = requireDeviceLinking(
*this->GeneratorTarget, *this->LocalGenerator, this->GetConfigName());
if (requiresDeviceLinking) {
- std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
- this->WriteDeviceLibraryRules(linkRuleVar, relink);
+ this->WriteDeviceLibraryRules("CMAKE_CUDA_DEVICE_LINK_LIBRARY", relink);
}
}
@@ -239,29 +236,13 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
// TODO: Merge the methods that call this method to avoid
// code duplication.
std::vector<std::string> commands;
-
- // Get the language to use for linking this library.
- std::string linkLanguage = "CUDA";
std::string const objExt =
this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
- // Build list of dependencies.
- std::vector<std::string> depends;
- this->AppendLinkDepends(depends, linkLanguage);
-
- // Add language-specific flags.
- std::string langFlags;
- this->LocalGenerator->AddLanguageFlagsForLinking(
- langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
-
- // Create set of linking flags.
- std::string linkFlags;
- this->GetDeviceLinkFlags(linkFlags, linkLanguage);
-
// Get the name of the device object to generate.
- std::string const targetOutputReal =
+ std::string const targetOutput =
this->GeneratorTarget->ObjectDirectory + "cmake_device_link" + objExt;
- this->DeviceLinkObject = targetOutputReal;
+ this->DeviceLinkObject = targetOutput;
this->NumberOfProgressActions++;
if (!this->NoRuleMessages) {
@@ -269,7 +250,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
this->MakeEchoProgress(progress);
// Add the link message.
std::string buildEcho =
- cmStrCat("Linking ", linkLanguage, " device code ",
+ cmStrCat("Linking CUDA device code ",
this->LocalGenerator->ConvertToOutputFormat(
this->LocalGenerator->MaybeConvertToRelativePath(
this->LocalGenerator->GetCurrentBinaryDirectory(),
@@ -278,10 +259,41 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
this->LocalGenerator->AppendEcho(
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
}
+
+ if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
+ this->WriteDeviceLinkRule(commands, targetOutput);
+ } else {
+ this->WriteNvidiaDeviceLibraryRules(linkRuleVar, relink, commands,
+ targetOutput);
+ }
+
+ // Write the main driver rule to build everything in this target.
+ this->WriteTargetDriverRule(targetOutput, relink);
+}
+
+void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
+ const std::string& linkRuleVar, bool relink,
+ std::vector<std::string>& commands, const std::string& targetOutput)
+{
+ std::string linkLanguage = "CUDA";
+
+ // Build list of dependencies.
+ std::vector<std::string> depends;
+ this->AppendLinkDepends(depends, linkLanguage);
+
+ // Add language-specific flags.
+ std::string langFlags;
+ this->LocalGenerator->AddLanguageFlagsForLinking(
+ langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
+
+ // Create set of linking flags.
+ std::string linkFlags;
+ this->GetDeviceLinkFlags(linkFlags, linkLanguage);
+
// Clean files associated with this library.
std::set<std::string> libCleanFiles;
libCleanFiles.insert(this->LocalGenerator->MaybeConvertToRelativePath(
- this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal));
+ this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput));
// Determine whether a link script will be used.
bool useLinkScript = this->GlobalGenerator->GetUseLinkScript();
@@ -335,7 +347,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
std::string target = this->LocalGenerator->ConvertToOutputFormat(
this->LocalGenerator->MaybeConvertToRelativePath(
- this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutputReal),
+ this->LocalGenerator->GetCurrentBinaryDirectory(), targetOutput),
output);
std::string targetFullPathCompilePDB =
@@ -364,7 +376,7 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
this->LocalGenerator->CreateRulePlaceholderExpander());
// Construct the main link rule and expand placeholders.
- rulePlaceholderExpander->SetTargetImpLib(targetOutputReal);
+ rulePlaceholderExpander->SetTargetImpLib(targetOutput);
std::string linkRule = this->GetLinkRule(linkRuleVar);
cmExpandList(linkRule, real_link_commands);
@@ -399,14 +411,11 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
commands1.clear();
// Compute the list of outputs.
- std::vector<std::string> outputs(1, targetOutputReal);
+ std::vector<std::string> outputs(1, targetOutput);
// Write the build rule.
this->WriteMakeRule(*this->BuildFileStream, nullptr, outputs, depends,
commands, false);
-
- // Write the main driver rule to build everything in this target.
- this->WriteTargetDriverRule(targetOutputReal, relink);
#else
static_cast<void>(linkRuleVar);
static_cast<void>(relink);
diff --git a/Source/cmMakefileLibraryTargetGenerator.h b/Source/cmMakefileLibraryTargetGenerator.h
index 6a38e18..cc989e7 100644
--- a/Source/cmMakefileLibraryTargetGenerator.h
+++ b/Source/cmMakefileLibraryTargetGenerator.h
@@ -5,6 +5,7 @@
#include "cmConfigure.h" // IWYU pragma: keep
#include <string>
+#include <vector>
#include "cmMakefileTargetGenerator.h"
@@ -27,6 +28,10 @@ protected:
void WriteModuleLibraryRules(bool relink);
void WriteDeviceLibraryRules(const std::string& linkRule, bool relink);
+ void WriteNvidiaDeviceLibraryRules(const std::string& linkRuleVar,
+ bool relink,
+ std::vector<std::string>& commands,
+ const std::string& targetOutput);
void WriteLibraryRules(const std::string& linkRule,
const std::string& extraFlags, bool relink);
// MacOSX Framework support methods
diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx
index e1fe0e5..5f97d86 100644
--- a/Source/cmMakefileTargetGenerator.cxx
+++ b/Source/cmMakefileTargetGenerator.cxx
@@ -2,10 +2,13 @@
file Copyright.txt or https://cmake.org/licensing for details. */
#include "cmMakefileTargetGenerator.h"
+#include <algorithm>
#include <cassert>
#include <cstdio>
+#include <iterator>
#include <sstream>
#include <unordered_map>
+#include <unordered_set>
#include <utility>
#include <cm/memory>
@@ -25,6 +28,7 @@
#include "cmMakefileExecutableTargetGenerator.h"
#include "cmMakefileLibraryTargetGenerator.h"
#include "cmMakefileUtilityTargetGenerator.h"
+#include "cmMessageType.h"
#include "cmOutputConverter.h"
#include "cmPolicies.h"
#include "cmProperty.h"
@@ -1323,6 +1327,130 @@ void cmMakefileTargetGenerator::WriteObjectDependRules(
}
}
+void cmMakefileTargetGenerator::WriteDeviceLinkRule(
+ std::vector<std::string>& commands, const std::string& output)
+{
+ std::string architecturesStr =
+ this->GeneratorTarget->GetSafeProperty("CUDA_ARCHITECTURES");
+
+ if (cmIsOff(architecturesStr)) {
+ this->Makefile->IssueMessage(MessageType::FATAL_ERROR,
+ "CUDA_SEPARABLE_COMPILATION on Clang "
+ "requires CUDA_ARCHITECTURES to be set.");
+ return;
+ }
+
+ std::vector<std::string> architectures = cmExpandedList(architecturesStr);
+
+ // Ensure there are no duplicates.
+ const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
+ std::vector<std::string> deps;
+ this->AppendTargetDepends(deps, true);
+ this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
+ std::copy(this->Objects.begin(), this->Objects.end(),
+ std::back_inserter(deps));
+
+ std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
+ deps.clear();
+ std::copy(depsSet.begin(), depsSet.end(), std::back_inserter(deps));
+ return deps;
+ }();
+
+ const std::string objectDir = this->GeneratorTarget->ObjectDirectory;
+ const std::string relObjectDir =
+ this->LocalGenerator->MaybeConvertToRelativePath(
+ this->LocalGenerator->GetCurrentBinaryDirectory(), objectDir);
+
+ // Construct a list of files associated with this executable that
+ // may need to be cleaned.
+ std::vector<std::string> cleanFiles;
+ cleanFiles.push_back(this->LocalGenerator->MaybeConvertToRelativePath(
+ this->LocalGenerator->GetCurrentBinaryDirectory(), output));
+
+ std::string profiles;
+ std::vector<std::string> fatbinaryDepends;
+ std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h");
+
+ // Link device code for each architecture.
+ for (const std::string& architectureKind : architectures) {
+ // Clang always generates real code, so strip the specifier.
+ const std::string architecture =
+ architectureKind.substr(0, architectureKind.find('-'));
+ const std::string cubin =
+ cmStrCat(relObjectDir, "sm_", architecture, ".cubin");
+
+ profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
+ fatbinaryDepends.emplace_back(cubin);
+
+ std::string registerFileCmd;
+
+ // The generated register file contains macros that when expanded register
+ // the device routines. Because the routines are the same for all
+ // architectures the register file will be the same too. Thus generate it
+ // only on the first invocation to reduce overhead.
+ if (fatbinaryDepends.size() == 1) {
+ std::string registerFileRel =
+ this->LocalGenerator->MaybeConvertToRelativePath(
+ this->LocalGenerator->GetCurrentBinaryDirectory(), registerFile);
+ registerFileCmd =
+ cmStrCat(" --register-link-binaries=", registerFileRel);
+ cleanFiles.push_back(registerFileRel);
+ }
+
+ std::string command = cmStrCat(
+ this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
+ " -arch=sm_", architecture, registerFileCmd, " -o=$@ ",
+ cmJoin(linkDeps, " "));
+
+ this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, cubin,
+ linkDeps, { command }, false);
+ }
+
+ // Combine all architectures into a single fatbinary.
+ const std::string fatbinaryCommand =
+ cmStrCat(this->Makefile->GetRequiredDefinition("CMAKE_CUDA_FATBINARY"),
+ " -64 -cmdline=--compile-only -compress-all -link "
+ "--embedded-fatbin=$@",
+ profiles);
+ const std::string fatbinaryOutput =
+ cmStrCat(objectDir, "cmake_cuda_fatbin.h");
+ const std::string fatbinaryOutputRel =
+ this->LocalGenerator->MaybeConvertToRelativePath(
+ this->LocalGenerator->GetCurrentBinaryDirectory(), fatbinaryOutput);
+
+ this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
+ fatbinaryOutputRel, fatbinaryDepends,
+ { fatbinaryCommand }, false);
+
+ // Compile the stub that registers the kernels and contains the fatbinaries.
+ cmRulePlaceholderExpander::RuleVariables vars;
+ vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str();
+ vars.CMTargetType =
+ cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()).c_str();
+
+ vars.Language = "CUDA";
+ vars.Object = output.c_str();
+ vars.Fatbinary = fatbinaryOutput.c_str();
+ vars.RegisterFile = registerFile.c_str();
+
+ std::string flags = this->GetFlags("CUDA", this->GetConfigName());
+ vars.Flags = flags.c_str();
+
+ std::string compileCmd = this->GetLinkRule("CMAKE_CUDA_DEVICE_LINK_COMPILE");
+ std::unique_ptr<cmRulePlaceholderExpander> rulePlaceholderExpander(
+ this->LocalGenerator->CreateRulePlaceholderExpander());
+ rulePlaceholderExpander->ExpandRuleVariables(this->LocalGenerator,
+ compileCmd, vars);
+
+ commands.emplace_back(compileCmd);
+ this->LocalGenerator->WriteMakeRule(
+ *this->BuildFileStream, nullptr, output,
+ { cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false);
+
+ // Clean all the possible executable names and symlinks.
+ this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());
+}
+
void cmMakefileTargetGenerator::GenerateCustomRuleFile(
cmCustomCommandGenerator const& ccg)
{
@@ -1579,10 +1707,11 @@ void cmMakefileTargetGenerator::WriteTargetDriverRule(
}
void cmMakefileTargetGenerator::AppendTargetDepends(
- std::vector<std::string>& depends)
+ std::vector<std::string>& depends, bool ignoreType)
{
// Static libraries never depend on anything for linking.
- if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY) {
+ if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY &&
+ !ignoreType) {
return;
}
diff --git a/Source/cmMakefileTargetGenerator.h b/Source/cmMakefileTargetGenerator.h
index 1740d54..cb804e0 100644
--- a/Source/cmMakefileTargetGenerator.h
+++ b/Source/cmMakefileTargetGenerator.h
@@ -104,6 +104,10 @@ protected:
void WriteObjectDependRules(cmSourceFile const& source,
std::vector<std::string>& depends);
+ // CUDA device linking.
+ void WriteDeviceLinkRule(std::vector<std::string>& commands,
+ const std::string& output);
+
// write the build rule for a custom command
void GenerateCustomRuleFile(cmCustomCommandGenerator const& ccg);
@@ -127,7 +131,8 @@ protected:
void DriveCustomCommands(std::vector<std::string>& depends);
// append intertarget dependencies
- void AppendTargetDepends(std::vector<std::string>& depends);
+ void AppendTargetDepends(std::vector<std::string>& depends,
+ bool ignoreType = false);
// Append object file dependencies.
void AppendObjectDepends(std::vector<std::string>& depends);
diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx
index 210b36e..ccb959b 100644
--- a/Source/cmNinjaNormalTargetGenerator.cxx
+++ b/Source/cmNinjaNormalTargetGenerator.cxx
@@ -8,6 +8,7 @@
#include <map>
#include <set>
#include <sstream>
+#include <unordered_set>
#include <utility>
#include <cm/memory>
@@ -25,6 +26,7 @@
#include "cmLocalGenerator.h"
#include "cmLocalNinjaGenerator.h"
#include "cmMakefile.h"
+#include "cmMessageType.h"
#include "cmNinjaLinkLineDeviceComputer.h"
#include "cmNinjaTypes.h"
#include "cmOSXBundleGenerator.h"
@@ -178,6 +180,33 @@ std::string cmNinjaNormalTargetGenerator::LanguageLinkerDeviceRule(
"_", config);
}
+std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaDeviceRule(
+ const std::string& config) const
+{
+ return cmStrCat(
+ this->TargetLinkLanguage(config), "_DEVICE_LINK__",
+ cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
+ '_', config);
+}
+
+std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaDeviceCompileRule(
+ const std::string& config) const
+{
+ return cmStrCat(
+ this->TargetLinkLanguage(config), "_DEVICE_LINK_COMPILE__",
+ cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
+ '_', config);
+}
+
+std::string cmNinjaNormalTargetGenerator::LanguageLinkerCudaFatbinaryRule(
+ const std::string& config) const
+{
+ return cmStrCat(
+ this->TargetLinkLanguage(config), "_FATBINARY__",
+ cmGlobalNinjaGenerator::EncodeRuleName(this->GeneratorTarget->GetName()),
+ '_', config);
+}
+
struct cmNinjaRemoveNoOpCommands
{
bool operator()(std::string const& cmd)
@@ -186,7 +215,7 @@ struct cmNinjaRemoveNoOpCommands
}
};
-void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(
+void cmNinjaNormalTargetGenerator::WriteNvidiaDeviceLinkRule(
bool useResponseFile, const std::string& config)
{
cmNinjaRule rule(this->LanguageLinkerDeviceRule(config));
@@ -272,6 +301,55 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(
}
}
+void cmNinjaNormalTargetGenerator::WriteDeviceLinkRules(
+ const std::string& config)
+{
+ const cmMakefile* mf = this->GetMakefile();
+
+ cmNinjaRule rule(LanguageLinkerCudaDeviceRule(config));
+ rule.Command = this->GetLocalGenerator()->BuildCommandLine(
+ { cmStrCat(mf->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
+ " -arch=$ARCH $REGISTER -o=$out $in") });
+ rule.Comment = "Rule for CUDA device linking.";
+ rule.Description = "Linking CUDA $out";
+ this->GetGlobalGenerator()->AddRule(rule);
+
+ cmRulePlaceholderExpander::RuleVariables vars;
+ vars.CMTargetName = this->GetGeneratorTarget()->GetName().c_str();
+ vars.CMTargetType =
+ cmState::GetTargetTypeName(this->GetGeneratorTarget()->GetType()).c_str();
+
+ vars.Language = "CUDA";
+ vars.Object = "$out";
+ vars.Fatbinary = "$FATBIN";
+ vars.RegisterFile = "$REGISTER";
+
+ std::string flags = this->GetFlags("CUDA", config);
+ vars.Flags = flags.c_str();
+
+ std::string compileCmd = this->GetMakefile()->GetRequiredDefinition(
+ "CMAKE_CUDA_DEVICE_LINK_COMPILE");
+ std::unique_ptr<cmRulePlaceholderExpander> rulePlaceholderExpander(
+ this->GetLocalGenerator()->CreateRulePlaceholderExpander());
+ rulePlaceholderExpander->ExpandRuleVariables(this->GetLocalGenerator(),
+ compileCmd, vars);
+
+ rule.Name = LanguageLinkerCudaDeviceCompileRule(config);
+ rule.Command = this->GetLocalGenerator()->BuildCommandLine({ compileCmd });
+ rule.Comment = "Rule for compiling CUDA device stubs.";
+ rule.Description = "Compiling CUDA device stub $out";
+ this->GetGlobalGenerator()->AddRule(rule);
+
+ rule.Name = LanguageLinkerCudaFatbinaryRule(config);
+ rule.Command = this->GetLocalGenerator()->BuildCommandLine(
+ { cmStrCat(mf->GetRequiredDefinition("CMAKE_CUDA_FATBINARY"),
+ " -64 -cmdline=--compile-only -compress-all -link "
+ "--embedded-fatbin=$out $PROFILES") });
+ rule.Comment = "Rule for CUDA fatbinaries.";
+ rule.Description = "Creating fatbinary $out";
+ this->GetGlobalGenerator()->AddRule(rule);
+}
+
void cmNinjaNormalTargetGenerator::WriteLinkRule(bool useResponseFile,
const std::string& config)
{
@@ -586,7 +664,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
// First and very important step is to make sure while inside this
// step our link language is set to CUDA
- std::string cudaLinkLanguage = "CUDA";
std::string const& objExt =
this->Makefile->GetSafeDefinition("CMAKE_CUDA_OUTPUT_EXTENSION");
@@ -598,6 +675,118 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
std::string targetOutputReal =
ConvertToNinjaPath(targetOutputDir + "cmake_device_link" + objExt);
+ if (firstForConfig) {
+ globalGen->GetByproductsForCleanTarget(config).push_back(targetOutputReal);
+ }
+ this->DeviceLinkObject = targetOutputReal;
+
+ // Write comments.
+ cmGlobalNinjaGenerator::WriteDivider(this->GetCommonFileStream());
+ this->GetCommonFileStream()
+ << "# Device Link build statements for "
+ << cmState::GetTargetTypeName(genTarget->GetType()) << " target "
+ << this->GetTargetName() << "\n\n";
+
+ if (this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID") == "Clang") {
+ std::string architecturesStr =
+ this->GeneratorTarget->GetSafeProperty("CUDA_ARCHITECTURES");
+
+ if (cmIsOff(architecturesStr)) {
+ this->Makefile->IssueMessage(MessageType::FATAL_ERROR,
+ "CUDA_SEPARABLE_COMPILATION on Clang "
+ "requires CUDA_ARCHITECTURES to be set.");
+ return;
+ }
+
+ this->WriteDeviceLinkRules(config);
+ this->WriteDeviceLinkStatements(config, cmExpandedList(architecturesStr),
+ targetOutputReal);
+ } else {
+ this->WriteNvidiaDeviceLinkStatement(config, fileConfig, targetOutputDir,
+ targetOutputReal);
+ }
+}
+
+void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
+ const std::string& config, const std::vector<std::string>& architectures,
+ const std::string& output)
+{
+ // Ensure there are no duplicates.
+ const cmNinjaDeps explicitDeps = [&]() -> std::vector<std::string> {
+ std::unordered_set<std::string> depsSet;
+ const cmNinjaDeps linkDeps =
+ this->ComputeLinkDeps(this->TargetLinkLanguage(config), config, true);
+ const cmNinjaDeps objects = this->GetObjects(config);
+ depsSet.insert(linkDeps.begin(), linkDeps.end());
+ depsSet.insert(objects.begin(), objects.end());
+
+ std::vector<std::string> deps;
+ std::copy(depsSet.begin(), depsSet.end(), std::back_inserter(deps));
+ return deps;
+ }();
+
+ const std::string objectDir =
+ cmStrCat(this->GeneratorTarget->GetSupportDirectory(),
+ this->GetGlobalGenerator()->ConfigDirectory(config));
+ const std::string ninjaOutputDir = this->ConvertToNinjaPath(objectDir);
+
+ cmNinjaBuild fatbinary(LanguageLinkerCudaFatbinaryRule(config));
+
+ // Link device code for each architecture.
+ for (const std::string& architectureKind : architectures) {
+ // Clang always generates real code, so strip the specifier.
+ const std::string architecture =
+ architectureKind.substr(0, architectureKind.find('-'));
+ const std::string cubin =
+ cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin");
+
+ fatbinary.Variables["PROFILES"] +=
+ cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
+ fatbinary.ExplicitDeps.emplace_back(cubin);
+
+ cmNinjaBuild dlink(LanguageLinkerCudaDeviceRule(config));
+ dlink.ExplicitDeps = explicitDeps;
+ dlink.Outputs = { cubin };
+ dlink.Variables["ARCH"] = cmStrCat("sm_", architecture);
+
+ // The generated register file contains macros that when expanded register
+ // the device routines. Because the routines are the same for all
+ // architectures the register file will be the same too. Thus generate it
+ // only on the first invocation to reduce overhead.
+ if (fatbinary.ExplicitDeps.size() == 1) {
+ dlink.Variables["REGISTER"] = cmStrCat(
+ "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h");
+ }
+
+ this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink);
+ }
+
+ // Combine all architectures into a single fatbinary.
+ fatbinary.Outputs = { cmStrCat(ninjaOutputDir, "/cmake_cuda_fatbin.h") };
+ this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(),
+ fatbinary);
+
+ // Compile the stub that registers the kernels and contains the fatbinaries.
+ cmNinjaBuild dcompile(LanguageLinkerCudaDeviceCompileRule(config));
+ dcompile.Outputs = { output };
+ dcompile.ExplicitDeps = { cmStrCat(ninjaOutputDir, "/cmake_cuda_fatbin.h") };
+ dcompile.Variables["FATBIN"] =
+ this->GetLocalGenerator()->ConvertToOutputFormat(
+ cmStrCat(objectDir, "/cmake_cuda_fatbin.h"), cmOutputConverter::SHELL);
+ dcompile.Variables["REGISTER"] =
+ this->GetLocalGenerator()->ConvertToOutputFormat(
+ cmStrCat(objectDir, "/cmake_cuda_register.h"), cmOutputConverter::SHELL);
+ this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(),
+ dcompile);
+}
+
+void cmNinjaNormalTargetGenerator::WriteNvidiaDeviceLinkStatement(
+ const std::string& config, const std::string& fileConfig,
+ const std::string& outputDir, const std::string& output)
+{
+ cmGeneratorTarget* genTarget = this->GetGeneratorTarget();
+ cmGlobalNinjaGenerator* globalGen = this->GetGlobalGenerator();
+
std::string targetOutputImplib = ConvertToNinjaPath(
genTarget->GetFullPath(config, cmStateEnums::ImportLibraryArtifact));
@@ -606,8 +795,8 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
cmStrCat(this->GetLocalGenerator()->GetTargetDirectory(genTarget),
globalGen->ConfigDirectory(fileConfig), "/");
targetOutputFileConfigDir =
- globalGen->ExpandCFGIntDir(targetOutputDir, fileConfig);
- if (targetOutputDir == targetOutputFileConfigDir) {
+ globalGen->ExpandCFGIntDir(outputDir, fileConfig);
+ if (outputDir == targetOutputFileConfigDir) {
return;
}
@@ -623,27 +812,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
}
}
- if (firstForConfig) {
- globalGen->GetByproductsForCleanTarget(config).push_back(targetOutputReal);
- }
- this->DeviceLinkObject = targetOutputReal;
-
- // Write comments.
- cmGlobalNinjaGenerator::WriteDivider(this->GetCommonFileStream());
- const cmStateEnums::TargetType targetType = genTarget->GetType();
- this->GetCommonFileStream() << "# Device Link build statements for "
- << cmState::GetTargetTypeName(targetType)
- << " target " << this->GetTargetName() << "\n\n";
-
// Compute the comment.
cmNinjaBuild build(this->LanguageLinkerDeviceRule(config));
build.Comment =
- cmStrCat("Link the ", this->GetVisibleTypeName(), ' ', targetOutputReal);
+ cmStrCat("Link the ", this->GetVisibleTypeName(), ' ', output);
cmNinjaVars& vars = build.Variables;
// Compute outputs.
- build.Outputs.push_back(targetOutputReal);
+ build.Outputs.push_back(output);
// Compute specific libraries to link with.
build.ExplicitDeps = this->GetObjects(config);
build.ImplicitDeps =
@@ -659,7 +836,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
cmLocalNinjaGenerator& localGen = *this->GetLocalGenerator();
vars["TARGET_FILE"] =
- localGen.ConvertToOutputFormat(targetOutputReal, cmOutputConverter::SHELL);
+ localGen.ConvertToOutputFormat(output, cmOutputConverter::SHELL);
std::unique_ptr<cmLinkLineComputer> linkLineComputer(
new cmNinjaLinkLineDeviceComputer(
@@ -683,8 +860,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
// Compute language specific link flags.
std::string langFlags;
- localGen.AddLanguageFlagsForLinking(langFlags, genTarget, cudaLinkLanguage,
- config);
+ localGen.AddLanguageFlagsForLinking(langFlags, genTarget, "CUDA", config);
vars["LANGUAGE_COMPILE_FLAGS"] = langFlags;
auto const tgtNames = this->TargetNames(config);
@@ -692,7 +868,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
vars["SONAME_FLAG"] =
this->GetMakefile()->GetSONameFlag(this->TargetLinkLanguage(config));
vars["SONAME"] = tgtNames.SharedObject;
- if (targetType == cmStateEnums::SHARED_LIBRARY) {
+ if (genTarget->GetType() == cmStateEnums::SHARED_LIBRARY) {
std::string install_dir =
this->GetGeneratorTarget()->GetInstallNameDirForBuildTree(config);
if (!install_dir.empty()) {
@@ -731,7 +907,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
// do not check if the user has explicitly forced a response file.
int const commandLineLengthLimit =
static_cast<int>(cmSystemTools::CalculateCommandLineLengthLimit()) -
- globalGen->GetRuleCmdLength(this->LanguageLinkerDeviceRule(config));
+ globalGen->GetRuleCmdLength(build.Rule);
build.RspFile = this->ConvertToNinjaPath(
cmStrCat("CMakeFiles/", genTarget->GetName(),
@@ -746,7 +922,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement(
bool usedResponseFile = false;
globalGen->WriteBuild(this->GetCommonFileStream(), build,
commandLineLengthLimit, &usedResponseFile);
- this->WriteDeviceLinkRule(usedResponseFile, config);
+ this->WriteNvidiaDeviceLinkRule(usedResponseFile, config);
}
void cmNinjaNormalTargetGenerator::WriteLinkStatement(
diff --git a/Source/cmNinjaNormalTargetGenerator.h b/Source/cmNinjaNormalTargetGenerator.h
index 25e40d0..ffc405c 100644
--- a/Source/cmNinjaNormalTargetGenerator.h
+++ b/Source/cmNinjaNormalTargetGenerator.h
@@ -21,18 +21,31 @@ public:
private:
std::string LanguageLinkerRule(const std::string& config) const;
std::string LanguageLinkerDeviceRule(const std::string& config) const;
+ std::string LanguageLinkerCudaDeviceRule(const std::string& config) const;
+ std::string LanguageLinkerCudaDeviceCompileRule(
+ const std::string& config) const;
+ std::string LanguageLinkerCudaFatbinaryRule(const std::string& config) const;
const char* GetVisibleTypeName() const;
void WriteLanguagesRules(const std::string& config);
void WriteLinkRule(bool useResponseFile, const std::string& config);
- void WriteDeviceLinkRule(bool useResponseFile, const std::string& config);
+ void WriteDeviceLinkRules(const std::string& config);
+ void WriteNvidiaDeviceLinkRule(bool useResponseFile,
+ const std::string& config);
void WriteLinkStatement(const std::string& config,
const std::string& fileConfig, bool firstForConfig);
void WriteDeviceLinkStatement(const std::string& config,
const std::string& fileConfig,
bool firstForConfig);
+ void WriteDeviceLinkStatements(const std::string& config,
+ const std::vector<std::string>& architectures,
+ const std::string& output);
+ void WriteNvidiaDeviceLinkStatement(const std::string& config,
+ const std::string& fileConfig,
+ const std::string& outputDir,
+ const std::string& output);
void WriteObjectLibStatement(const std::string& config);
diff --git a/Source/cmNinjaTargetGenerator.cxx b/Source/cmNinjaTargetGenerator.cxx
index accdcf1..04d84a0 100644
--- a/Source/cmNinjaTargetGenerator.cxx
+++ b/Source/cmNinjaTargetGenerator.cxx
@@ -346,11 +346,13 @@ std::string cmNinjaTargetGenerator::ComputeIncludes(
}
cmNinjaDeps cmNinjaTargetGenerator::ComputeLinkDeps(
- const std::string& linkLanguage, const std::string& config) const
+ const std::string& linkLanguage, const std::string& config,
+ bool ignoreType) const
{
// Static libraries never depend on other targets for linking.
- if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY ||
- this->GeneratorTarget->GetType() == cmStateEnums::OBJECT_LIBRARY) {
+ if (!ignoreType &&
+ (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY ||
+ this->GeneratorTarget->GetType() == cmStateEnums::OBJECT_LIBRARY)) {
return cmNinjaDeps();
}
@@ -1009,6 +1011,7 @@ void cmNinjaTargetGenerator::WriteObjectBuildStatements(
{
std::vector<cmSourceFile const*> objectSources;
this->GeneratorTarget->GetObjectSources(objectSources, config);
+
for (cmSourceFile const* sf : objectSources) {
this->WriteObjectBuildStatement(sf, config, fileConfig, firstForConfig);
}
diff --git a/Source/cmNinjaTargetGenerator.h b/Source/cmNinjaTargetGenerator.h
index 9d9ce60..a27c9b4 100644
--- a/Source/cmNinjaTargetGenerator.h
+++ b/Source/cmNinjaTargetGenerator.h
@@ -113,7 +113,8 @@ protected:
/// @return the list of link dependency for the given target @a target.
cmNinjaDeps ComputeLinkDeps(const std::string& linkLanguage,
- const std::string& config) const;
+ const std::string& config,
+ bool ignoreType = false) const;
/// @return the source file path for the given @a source.
std::string GetSourceFilePath(cmSourceFile const* source) const;
diff --git a/Source/cmRulePlaceholderExpander.cxx b/Source/cmRulePlaceholderExpander.cxx
index 6f40ec6..f5f9c67 100644
--- a/Source/cmRulePlaceholderExpander.cxx
+++ b/Source/cmRulePlaceholderExpander.cxx
@@ -141,6 +141,16 @@ std::string cmRulePlaceholderExpander::ExpandRuleVariable(
return replaceValues.DependencyFile;
}
}
+ if (replaceValues.Fatbinary) {
+ if (variable == "FATBINARY") {
+ return replaceValues.Fatbinary;
+ }
+ }
+ if (replaceValues.RegisterFile) {
+ if (variable == "REGISTER_FILE") {
+ return replaceValues.RegisterFile;
+ }
+ }
if (replaceValues.Target) {
if (variable == "TARGET_QUOTED") {
diff --git a/Source/cmRulePlaceholderExpander.h b/Source/cmRulePlaceholderExpander.h
index dfce8bb..c8d107d 100644
--- a/Source/cmRulePlaceholderExpander.h
+++ b/Source/cmRulePlaceholderExpander.h
@@ -64,6 +64,8 @@ public:
const char* SwiftOutputFileMap;
const char* SwiftSources;
const char* ISPCHeader;
+ const char* Fatbinary;
+ const char* RegisterFile;
};
// Expand rule variables in CMake of the type found in language rules