summaryrefslogtreecommitdiffstats
path: root/Source
diff options
context:
space:
mode:
authorRobert Maynard <rmaynard@nvidia.com>2022-04-22 16:51:26 (GMT)
committerRobert Maynard <rmaynard@nvidia.com>2022-07-22 14:34:45 (GMT)
commit96bc59b1ca01be231347404d178445263687dd22 (patch)
treed9c015f30a1e43f0d5ded6dc75a638471f085ed6 /Source
parent1527d48cd0071e3e1737b51db3738f7f76ddbf80 (diff)
downloadCMake-96bc59b1ca01be231347404d178445263687dd22.zip
CMake-96bc59b1ca01be231347404d178445263687dd22.tar.gz
CMake-96bc59b1ca01be231347404d178445263687dd22.tar.bz2
CUDA: Add Device LTO support for nvcc
Fixes #22200
Diffstat (limited to 'Source')
-rw-r--r--Source/cmGeneratorTarget.cxx30
-rw-r--r--Source/cmGeneratorTarget.h5
-rw-r--r--Source/cmGhsMultiTargetGenerator.cxx4
-rw-r--r--Source/cmGlobalXCodeGenerator.cxx4
-rw-r--r--Source/cmLinkLineDeviceComputer.cxx20
-rw-r--r--Source/cmLinkLineDeviceComputer.h1
-rw-r--r--Source/cmLocalGenerator.cxx25
-rw-r--r--Source/cmLocalGenerator.h13
-rw-r--r--Source/cmLocalVisualStudio7Generator.cxx3
-rw-r--r--Source/cmMakefileExecutableTargetGenerator.cxx17
-rw-r--r--Source/cmMakefileLibraryTargetGenerator.cxx13
-rw-r--r--Source/cmVisualStudio10TargetGenerator.cxx11
12 files changed, 114 insertions, 32 deletions
diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx
index 433c1d5..dace055 100644
--- a/Source/cmGeneratorTarget.cxx
+++ b/Source/cmGeneratorTarget.cxx
@@ -916,11 +916,19 @@ bool cmGeneratorTarget::IsIPOEnabled(std::string const& lang,
return false;
}
- if (lang != "C" && lang != "CXX" && lang != "Fortran") {
+ if (lang != "C" && lang != "CXX" && lang != "CUDA" && lang != "Fortran") {
// We do not define IPO behavior for other languages.
return false;
}
+ if (lang == "CUDA") {
+ // CUDA IPO requires both CUDA_ARCHITECTURES and CUDA_SEPARABLE_COMPILATION
+ if (cmIsOff(this->GetSafeProperty("CUDA_ARCHITECTURES")) ||
+ cmIsOff(this->GetSafeProperty("CUDA_SEPARABLE_COMPILATION"))) {
+ return false;
+ }
+ }
+
cmPolicies::PolicyStatus cmp0069 = this->GetPolicyStatusCMP0069();
if (cmp0069 == cmPolicies::OLD || cmp0069 == cmPolicies::WARN) {
@@ -3428,7 +3436,9 @@ void cmGeneratorTarget::AddExplicitLanguageFlags(std::string& flags,
"EXPLICIT_LANGUAGE");
}
-void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
+void cmGeneratorTarget::AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
+ const std::string& config,
+ std::string& flags) const
{
std::string property = this->GetSafeProperty("CUDA_ARCHITECTURES");
@@ -3460,6 +3470,7 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
std::string const& compiler =
this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
+ const bool ipoEnabled = this->IsIPOEnabled("CUDA", config);
// Check for special modes: `all`, `all-major`.
if (property == "all" || property == "all-major") {
@@ -3539,6 +3550,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
}
if (compiler == "NVIDIA") {
+ if (ipoEnabled && compileOrLink == cmBuildStep::Link) {
+ if (cmValue cudaIPOFlags =
+ this->Makefile->GetDefinition("CMAKE_CUDA_LINK_OPTIONS_IPO")) {
+ flags += cudaIPOFlags;
+ }
+ }
+
for (CudaArchitecture& architecture : architectures) {
flags +=
" --generate-code=arch=compute_" + architecture.name + ",code=[";
@@ -3551,7 +3569,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
}
}
- if (architecture.real) {
+ if (ipoEnabled) {
+ if (compileOrLink == cmBuildStep::Compile) {
+ flags += "lto_" + architecture.name;
+ } else if (compileOrLink == cmBuildStep::Link) {
+ flags += "sm_" + architecture.name;
+ }
+ } else if (architecture.real) {
flags += "sm_" + architecture.name;
}
diff --git a/Source/cmGeneratorTarget.h b/Source/cmGeneratorTarget.h
index 349afa7..25e6a81 100644
--- a/Source/cmGeneratorTarget.h
+++ b/Source/cmGeneratorTarget.h
@@ -23,6 +23,7 @@
#include "cmStateTypes.h"
#include "cmValue.h"
+enum class cmBuildStep;
class cmComputeLinkInformation;
class cmCustomCommand;
class cmGlobalGenerator;
@@ -471,7 +472,9 @@ public:
void AddExplicitLanguageFlags(std::string& flags,
cmSourceFile const& sf) const;
- void AddCUDAArchitectureFlags(std::string& flags) const;
+ void AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
+ const std::string& config,
+ std::string& flags) const;
void AddCUDAToolkitFlags(std::string& flags) const;
void AddHIPArchitectureFlags(std::string& flags) const;
diff --git a/Source/cmGhsMultiTargetGenerator.cxx b/Source/cmGhsMultiTargetGenerator.cxx
index bf019c3..138d3f1 100644
--- a/Source/cmGhsMultiTargetGenerator.cxx
+++ b/Source/cmGhsMultiTargetGenerator.cxx
@@ -183,8 +183,8 @@ void cmGhsMultiTargetGenerator::SetCompilerFlags(std::string const& config,
auto i = this->FlagsByLanguage.find(language);
if (i == this->FlagsByLanguage.end()) {
std::string flags;
- this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
- language, config);
+ this->LocalGenerator->AddLanguageFlags(
+ flags, this->GeneratorTarget, cmBuildStep::Compile, language, config);
this->LocalGenerator->AddCMP0018Flags(flags, this->GeneratorTarget,
language, config);
this->LocalGenerator->AddVisibilityPresetFlags(
diff --git a/Source/cmGlobalXCodeGenerator.cxx b/Source/cmGlobalXCodeGenerator.cxx
index 456f5bc..70a379e 100644
--- a/Source/cmGlobalXCodeGenerator.cxx
+++ b/Source/cmGlobalXCodeGenerator.cxx
@@ -2368,8 +2368,8 @@ void cmGlobalXCodeGenerator::CreateBuildSettings(cmGeneratorTarget* gtgt,
std::string& flags = cflags[lang];
// Add language-specific flags.
- this->CurrentLocalGenerator->AddLanguageFlags(flags, gtgt, lang,
- configName);
+ this->CurrentLocalGenerator->AddLanguageFlags(
+ flags, gtgt, cmBuildStep::Compile, lang, configName);
if (gtgt->IsIPOEnabled(lang, configName)) {
this->CurrentLocalGenerator->AppendFeatureOptions(flags, lang, "IPO");
diff --git a/Source/cmLinkLineDeviceComputer.cxx b/Source/cmLinkLineDeviceComputer.cxx
index 719b834..b06dc3d 100644
--- a/Source/cmLinkLineDeviceComputer.cxx
+++ b/Source/cmLinkLineDeviceComputer.cxx
@@ -68,6 +68,26 @@ bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinking(
});
}
+bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinkingIPOFlag(
+ cmComputeLinkInformation& cli)
+{
+ // Determine if this item might requires device linking.
+ // For this we only consider targets
+ using ItemVector = cmComputeLinkInformation::ItemVector;
+ ItemVector const& items = cli.GetItems();
+ std::string config = cli.GetConfig();
+ return std::any_of(
+ items.begin(), items.end(),
+ [config](cmComputeLinkInformation::Item const& item) -> bool {
+ return item.Target &&
+ item.Target->GetType() == cmStateEnums::STATIC_LIBRARY &&
+ // this dependency requires us to device link it
+ !item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS") &&
+ item.Target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION") &&
+ item.Target->IsIPOEnabled("CUDA", config);
+ });
+}
+
void cmLinkLineDeviceComputer::ComputeLinkLibraries(
cmComputeLinkInformation& cli, std::string const& stdLibString,
std::vector<BT<std::string>>& linkLibraries)
diff --git a/Source/cmLinkLineDeviceComputer.h b/Source/cmLinkLineDeviceComputer.h
index dee625b..0916307 100644
--- a/Source/cmLinkLineDeviceComputer.h
+++ b/Source/cmLinkLineDeviceComputer.h
@@ -30,6 +30,7 @@ public:
delete;
bool ComputeRequiresDeviceLinking(cmComputeLinkInformation& cli);
+ bool ComputeRequiresDeviceLinkingIPOFlag(cmComputeLinkInformation& cli);
void ComputeLinkLibraries(
cmComputeLinkInformation& cli, std::string const& stdLibString,
diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx
index 67c8bf2..7b823da 100644
--- a/Source/cmLocalGenerator.cxx
+++ b/Source/cmLocalGenerator.cxx
@@ -36,6 +36,7 @@
#include "cmInstallScriptGenerator.h"
#include "cmInstallTargetGenerator.h"
#include "cmLinkLineComputer.h"
+#include "cmLinkLineDeviceComputer.h"
#include "cmMakefile.h"
#include "cmRange.h"
#include "cmRulePlaceholderExpander.h"
@@ -1381,7 +1382,7 @@ std::vector<BT<std::string>> cmLocalGenerator::GetStaticLibraryFlags(
}
void cmLocalGenerator::GetDeviceLinkFlags(
- cmLinkLineComputer& linkLineComputer, const std::string& config,
+ cmLinkLineDeviceComputer& linkLineComputer, const std::string& config,
std::string& linkLibs, std::string& linkFlags, std::string& frameworkPath,
std::string& linkPath, cmGeneratorTarget* target)
{
@@ -1389,6 +1390,18 @@ void cmLocalGenerator::GetDeviceLinkFlags(
cmComputeLinkInformation* pcli = target->GetLinkInformation(config);
+ auto linklang = linkLineComputer.GetLinkerLanguage(target, config);
+ auto ipoEnabled = target->IsIPOEnabled(linklang, config);
+ if (!ipoEnabled) {
+ ipoEnabled = linkLineComputer.ComputeRequiresDeviceLinkingIPOFlag(*pcli);
+ }
+ if (ipoEnabled) {
+ if (cmValue cudaIPOFlags = this->Makefile->GetDefinition(
+ "CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO")) {
+ linkFlags += cudaIPOFlags;
+ }
+ }
+
if (pcli) {
// Compute the required device link libraries when
// resolving gpu lang device symbols
@@ -1396,6 +1409,8 @@ void cmLocalGenerator::GetDeviceLinkFlags(
linkPath);
}
+ // iterate link deps and see if any of them need IPO
+
std::vector<std::string> linkOpts;
target->GetLinkOptions(linkOpts, config, "CUDA");
// LINK_OPTIONS are escaped.
@@ -1590,7 +1605,8 @@ std::vector<BT<std::string>> cmLocalGenerator::GetTargetCompileFlags(
cmMakefile* mf = this->GetMakefile();
// Add language-specific flags.
- this->AddLanguageFlags(compileFlags, target, lang, config);
+ this->AddLanguageFlags(compileFlags, target, cmBuildStep::Compile, lang,
+ config);
if (target->IsIPOEnabled(lang, config)) {
this->AppendFeatureOptions(compileFlags, lang, "IPO");
@@ -1903,6 +1919,7 @@ void cmLocalGenerator::AddArchitectureFlags(std::string& flags,
void cmLocalGenerator::AddLanguageFlags(std::string& flags,
cmGeneratorTarget const* target,
+ cmBuildStep compileOrLink,
const std::string& lang,
const std::string& config)
{
@@ -1926,7 +1943,7 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags,
}
}
} else if (lang == "CUDA") {
- target->AddCUDAArchitectureFlags(flags);
+ target->AddCUDAArchitectureFlags(compileOrLink, config, flags);
target->AddCUDAToolkitFlags(flags);
} else if (lang == "ISPC") {
target->AddISPCTargetFlags(flags);
@@ -2038,7 +2055,7 @@ void cmLocalGenerator::AddLanguageFlagsForLinking(
this->AddCompilerRequirementFlag(flags, target, lang, config);
}
- this->AddLanguageFlags(flags, target, lang, config);
+ this->AddLanguageFlags(flags, target, cmBuildStep::Link, lang, config);
if (target->IsIPOEnabled(lang, config)) {
this->AppendFeatureOptions(flags, lang, "IPO");
diff --git a/Source/cmLocalGenerator.h b/Source/cmLocalGenerator.h
index 7cae1fc..0529431 100644
--- a/Source/cmLocalGenerator.h
+++ b/Source/cmLocalGenerator.h
@@ -35,6 +35,7 @@ class cmGeneratorTarget;
class cmGlobalGenerator;
class cmImplicitDependsList;
class cmLinkLineComputer;
+class cmLinkLineDeviceComputer;
class cmMakefile;
class cmRulePlaceholderExpander;
class cmSourceFile;
@@ -59,6 +60,13 @@ enum class cmDependencyScannerKind
Compiler
};
+/** What to compute language flags for */
+enum class cmBuildStep
+{
+ Compile,
+ Link
+};
+
/** Target and source file which have a specific output. */
struct cmSourcesWithOutput
{
@@ -143,7 +151,8 @@ public:
const std::string& filterArch = std::string());
void AddLanguageFlags(std::string& flags, cmGeneratorTarget const* target,
- const std::string& lang, const std::string& config);
+ cmBuildStep compileOrLink, const std::string& lang,
+ const std::string& config);
void AddLanguageFlagsForLinking(std::string& flags,
cmGeneratorTarget const* target,
const std::string& lang,
@@ -476,7 +485,7 @@ public:
/** Fill out these strings for the given target. Libraries to link,
* flags, and linkflags. */
- void GetDeviceLinkFlags(cmLinkLineComputer& linkLineComputer,
+ void GetDeviceLinkFlags(cmLinkLineDeviceComputer& linkLineComputer,
const std::string& config, std::string& linkLibs,
std::string& linkFlags, std::string& frameworkPath,
std::string& linkPath, cmGeneratorTarget* target);
diff --git a/Source/cmLocalVisualStudio7Generator.cxx b/Source/cmLocalVisualStudio7Generator.cxx
index f65add1..0451d96 100644
--- a/Source/cmLocalVisualStudio7Generator.cxx
+++ b/Source/cmLocalVisualStudio7Generator.cxx
@@ -680,7 +680,8 @@ void cmLocalVisualStudio7Generator::WriteConfiguration(
langForClCompile = linkLanguage;
if (langForClCompile == "C" || langForClCompile == "CXX" ||
langForClCompile == "Fortran") {
- this->AddLanguageFlags(flags, target, langForClCompile, configName);
+ this->AddLanguageFlags(flags, target, cmBuildStep::Compile,
+ langForClCompile, configName);
}
// set the correct language
if (linkLanguage == "C") {
diff --git a/Source/cmMakefileExecutableTargetGenerator.cxx b/Source/cmMakefileExecutableTargetGenerator.cxx
index 74574f7..54f03b9 100644
--- a/Source/cmMakefileExecutableTargetGenerator.cxx
+++ b/Source/cmMakefileExecutableTargetGenerator.cxx
@@ -136,17 +136,11 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
std::vector<std::string> depends;
this->AppendLinkDepends(depends, linkLanguage);
- // Build a list of compiler flags and linker flags.
- std::string langFlags;
- std::string linkFlags;
-
// Add language feature flags.
+ std::string langFlags;
this->LocalGenerator->AddLanguageFlagsForLinking(
langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
- // Add device-specific linker flags.
- this->GetDeviceLinkFlags(linkFlags, linkLanguage);
-
// Construct a list of files associated with this executable that
// may need to be cleaned.
std::vector<std::string> exeCleanFiles;
@@ -173,13 +167,20 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
// Set path conversion for link script shells.
this->LocalGenerator->SetLinkScriptShell(useLinkScript);
- std::unique_ptr<cmLinkLineComputer> linkLineComputer(
+ std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
new cmLinkLineDeviceComputer(
this->LocalGenerator,
this->LocalGenerator->GetStateSnapshot().GetDirectory()));
linkLineComputer->SetForResponse(useResponseFileForLibs);
linkLineComputer->SetRelink(relink);
+ // Create set of linking flags.
+ std::string linkFlags;
+ std::string ignored_;
+ this->LocalGenerator->GetDeviceLinkFlags(
+ *linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_,
+ ignored_, this->GeneratorTarget);
+
// Collect up flags to link in needed libraries.
std::string linkLibs;
this->CreateLinkLibs(
diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx
index 3f7d87d..45ef8c8 100644
--- a/Source/cmMakefileLibraryTargetGenerator.cxx
+++ b/Source/cmMakefileLibraryTargetGenerator.cxx
@@ -287,10 +287,6 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
this->LocalGenerator->AddLanguageFlagsForLinking(
langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
- // Create set of linking flags.
- std::string linkFlags;
- this->GetDeviceLinkFlags(linkFlags, linkLanguage);
-
// Clean files associated with this library.
std::set<std::string> libCleanFiles;
libCleanFiles.insert(
@@ -315,13 +311,20 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
// Collect up flags to link in needed libraries.
std::string linkLibs;
- std::unique_ptr<cmLinkLineComputer> linkLineComputer(
+ std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
new cmLinkLineDeviceComputer(
this->LocalGenerator,
this->LocalGenerator->GetStateSnapshot().GetDirectory()));
linkLineComputer->SetForResponse(useResponseFileForLibs);
linkLineComputer->SetRelink(relink);
+ // Create set of linking flags.
+ std::string linkFlags;
+ std::string ignored_;
+ this->LocalGenerator->GetDeviceLinkFlags(
+ *linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_,
+ ignored_, this->GeneratorTarget);
+
this->CreateLinkLibs(
linkLineComputer.get(), linkLibs, useResponseFileForLibs, depends,
cmMakefileTargetGenerator::ResponseFlagFor::DeviceLink);
diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx
index a7460e8..020691d 100644
--- a/Source/cmVisualStudio10TargetGenerator.cxx
+++ b/Source/cmVisualStudio10TargetGenerator.cxx
@@ -3300,6 +3300,7 @@ bool cmVisualStudio10TargetGenerator::ComputeClOptions(
this->LangForClCompile = langForClCompile;
if (!langForClCompile.empty()) {
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
+ cmBuildStep::Compile,
langForClCompile, configName);
this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget,
langForClCompile, configName);
@@ -3675,8 +3676,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
// Get compile flags for CUDA in this directory.
std::string flags;
- this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA",
- configName);
+ this->LocalGenerator->AddLanguageFlags(
+ flags, this->GeneratorTarget, cmBuildStep::Compile, "CUDA", configName);
this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget, "CUDA",
configName);
@@ -3947,7 +3948,8 @@ bool cmVisualStudio10TargetGenerator::ComputeMasmOptions(
std::string flags;
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
- "ASM_MASM", configName);
+ cmBuildStep::Compile, "ASM_MASM",
+ configName);
masmOptions.Parse(flags);
@@ -3999,7 +4001,8 @@ bool cmVisualStudio10TargetGenerator::ComputeNasmOptions(
std::string flags;
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
- "ASM_NASM", configName);
+ cmBuildStep::Compile, "ASM_NASM",
+ configName);
flags += " -f";
flags += this->Makefile->GetSafeDefinition("CMAKE_ASM_NASM_OBJECT_FORMAT");
nasmOptions.Parse(flags);