summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Help/release/dev/cuda-device-lto.rst7
-rw-r--r--Modules/CheckIPOSupported.cmake40
-rw-r--r--Modules/Compiler/Clang-CUDA.cmake4
-rw-r--r--Modules/Compiler/NVIDIA-CUDA.cmake8
-rw-r--r--Source/cmGeneratorTarget.cxx30
-rw-r--r--Source/cmGeneratorTarget.h5
-rw-r--r--Source/cmGhsMultiTargetGenerator.cxx4
-rw-r--r--Source/cmGlobalXCodeGenerator.cxx4
-rw-r--r--Source/cmLinkLineDeviceComputer.cxx21
-rw-r--r--Source/cmLinkLineDeviceComputer.h1
-rw-r--r--Source/cmLocalGenerator.cxx25
-rw-r--r--Source/cmLocalGenerator.h13
-rw-r--r--Source/cmLocalVisualStudio7Generator.cxx3
-rw-r--r--Source/cmMakefileExecutableTargetGenerator.cxx17
-rw-r--r--Source/cmMakefileLibraryTargetGenerator.cxx13
-rw-r--r--Source/cmNinjaNormalTargetGenerator.cxx1
-rw-r--r--Source/cmVisualStudio10TargetGenerator.cxx11
-rw-r--r--Tests/CMakeLists.txt5
-rw-r--r--Tests/CudaOnly/CMakeLists.txt24
-rw-r--r--Tests/CudaOnly/DeviceLTO/CMakeLists.txt37
-rw-r--r--Tests/CudaOnly/DeviceLTO/file1.cu17
-rw-r--r--Tests/CudaOnly/DeviceLTO/file2.cu5
-rw-r--r--Tests/CudaOnly/DeviceLTO/file3.cu4
-rw-r--r--Tests/CudaOnly/DeviceLTO/main.cu62
-rw-r--r--Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt32
-rw-r--r--Tests/Module/CheckIPOSupported-CUDA/bar.cu12
-rw-r--r--Tests/Module/CheckIPOSupported-CUDA/foo.cu4
-rw-r--r--Tests/Module/CheckIPOSupported-CUDA/main.cu62
-rw-r--r--Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt4
29 files changed, 412 insertions, 63 deletions
diff --git a/Help/release/dev/cuda-device-lto.rst b/Help/release/dev/cuda-device-lto.rst
new file mode 100644
index 0000000..113062b
--- /dev/null
+++ b/Help/release/dev/cuda-device-lto.rst
@@ -0,0 +1,7 @@
+cuda-device-lto
+---------------
+
+* ``CUDA`` language now supports device link time optimization when using
+ ``nvcc``. The :variable:`CMAKE_INTERPROCEDURAL_OPTIMIZATION` variable and
+ the associated :prop_tgt:`INTERPROCEDURAL_OPTIMIZATION` target property will
+ activate device LTO.
diff --git a/Modules/CheckIPOSupported.cmake b/Modules/CheckIPOSupported.cmake
index cca1be9..14262a1 100644
--- a/Modules/CheckIPOSupported.cmake
+++ b/Modules/CheckIPOSupported.cmake
@@ -76,6 +76,23 @@ endmacro()
# Run IPO/LTO test
macro(_ipo_run_language_check language)
+ set(_C_ext "c")
+ set(_CXX_ext "cpp")
+ set(_Fortran_ext "f")
+ string(COMPARE EQUAL "${language}" "CUDA" is_cuda)
+
+ set(ext ${_${language}_ext})
+ if(NOT "${ext}" STREQUAL "")
+ set(copy_sources foo.${ext} main.${ext})
+ elseif(is_cuda)
+ if(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE)
+ set("${X_RESULT}" YES PARENT_SCOPE)
+ endif()
+ return()
+ else()
+ message(FATAL_ERROR "Language not supported")
+ endif()
+
set(testdir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/_CMakeLTOTest-${language}")
file(REMOVE_RECURSE "${testdir}")
@@ -100,20 +117,6 @@ macro(_ipo_run_language_check language)
@ONLY
)
- string(COMPARE EQUAL "${language}" "C" is_c)
- string(COMPARE EQUAL "${language}" "CXX" is_cxx)
- string(COMPARE EQUAL "${language}" "Fortran" is_fortran)
-
- if(is_c)
- set(copy_sources foo.c main.c)
- elseif(is_cxx)
- set(copy_sources foo.cpp main.cpp)
- elseif(is_fortran)
- set(copy_sources foo.f main.f)
- else()
- message(FATAL_ERROR "Language not supported")
- endif()
-
foreach(x ${copy_sources})
configure_file(
"${try_compile_src}/${x}"
@@ -214,6 +217,11 @@ function(check_ipo_supported)
list(APPEND languages "C")
endif()
+ list(FIND enabled_languages "CUDA" result)
+ if(NOT result EQUAL -1)
+ list(APPEND languages "CUDA")
+ endif()
+
list(FIND enabled_languages "Fortran" result)
if(NOT result EQUAL -1)
list(APPEND languages "Fortran")
@@ -222,7 +230,7 @@ function(check_ipo_supported)
string(COMPARE EQUAL "${languages}" "" no_languages)
if(no_languages)
_ipo_not_supported(
- "no C/CXX/Fortran languages found in ENABLED_LANGUAGES global property"
+ "no C/CXX/CUDA/Fortran languages found in ENABLED_LANGUAGES global property"
)
return()
endif()
@@ -230,7 +238,7 @@ function(check_ipo_supported)
set(languages "${X_LANGUAGES}")
set(unsupported_languages "${languages}")
- list(REMOVE_ITEM unsupported_languages "C" "CXX" "Fortran")
+ list(REMOVE_ITEM unsupported_languages "C" "CXX" "CUDA" "Fortran")
string(COMPARE NOTEQUAL "${unsupported_languages}" "" has_unsupported)
if(has_unsupported)
_ipo_not_supported(
diff --git a/Modules/Compiler/Clang-CUDA.cmake b/Modules/Compiler/Clang-CUDA.cmake
index 219897e..d9929f1 100644
--- a/Modules/Compiler/Clang-CUDA.cmake
+++ b/Modules/Compiler/Clang-CUDA.cmake
@@ -35,6 +35,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "")
+# Clang doesn't support CUDA device LTO
+set(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE NO)
+set(_CMAKE_CUDA_IPO_MAY_BE_SUPPORTED_BY_COMPILER NO)
+
if(UNIX)
list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
endif()
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
index 33509ac..2b8a1ea 100644
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -48,6 +48,13 @@ if((NOT DEFINED CMAKE_DEPENDS_USE_COMPILER OR CMAKE_DEPENDS_USE_COMPILER)
set(CMAKE_CUDA_DEPENDS_USE_COMPILER TRUE)
endif()
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2)
+ set(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE YES)
+ set(_CMAKE_CUDA_IPO_MAY_BE_SUPPORTED_BY_COMPILER YES)
+
+ set(CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO " -dlto")
+endif()
+
if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
set(CMAKE_CUDA_COMPILE_OPTIONS_PIE -Xcompiler=-fPIE)
set(CMAKE_CUDA_COMPILE_OPTIONS_PIC -Xcompiler=-fPIC)
@@ -61,6 +68,7 @@ if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -O1 -DNDEBUG")
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT " -O2 -g -DNDEBUG")
endif()
+
set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared)
set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=)
diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx
index d9c0d3f..d59ac2b 100644
--- a/Source/cmGeneratorTarget.cxx
+++ b/Source/cmGeneratorTarget.cxx
@@ -917,11 +917,19 @@ bool cmGeneratorTarget::IsIPOEnabled(std::string const& lang,
return false;
}
- if (lang != "C" && lang != "CXX" && lang != "Fortran") {
+ if (lang != "C" && lang != "CXX" && lang != "CUDA" && lang != "Fortran") {
// We do not define IPO behavior for other languages.
return false;
}
+ if (lang == "CUDA") {
+ // CUDA IPO requires both CUDA_ARCHITECTURES and CUDA_SEPARABLE_COMPILATION
+ if (cmIsOff(this->GetSafeProperty("CUDA_ARCHITECTURES")) ||
+ cmIsOff(this->GetSafeProperty("CUDA_SEPARABLE_COMPILATION"))) {
+ return false;
+ }
+ }
+
cmPolicies::PolicyStatus cmp0069 = this->GetPolicyStatusCMP0069();
if (cmp0069 == cmPolicies::OLD || cmp0069 == cmPolicies::WARN) {
@@ -3429,7 +3437,9 @@ void cmGeneratorTarget::AddExplicitLanguageFlags(std::string& flags,
"EXPLICIT_LANGUAGE");
}
-void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
+void cmGeneratorTarget::AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
+ const std::string& config,
+ std::string& flags) const
{
std::string property = this->GetSafeProperty("CUDA_ARCHITECTURES");
@@ -3461,6 +3471,7 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
std::string const& compiler =
this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
+ const bool ipoEnabled = this->IsIPOEnabled("CUDA", config);
// Check for special modes: `all`, `all-major`.
if (property == "all" || property == "all-major") {
@@ -3540,6 +3551,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
}
if (compiler == "NVIDIA") {
+ if (ipoEnabled && compileOrLink == cmBuildStep::Link) {
+ if (cmValue cudaIPOFlags =
+ this->Makefile->GetDefinition("CMAKE_CUDA_LINK_OPTIONS_IPO")) {
+ flags += cudaIPOFlags;
+ }
+ }
+
for (CudaArchitecture& architecture : architectures) {
flags +=
" --generate-code=arch=compute_" + architecture.name + ",code=[";
@@ -3552,7 +3570,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
}
}
- if (architecture.real) {
+ if (ipoEnabled) {
+ if (compileOrLink == cmBuildStep::Compile) {
+ flags += "lto_" + architecture.name;
+ } else if (compileOrLink == cmBuildStep::Link) {
+ flags += "sm_" + architecture.name;
+ }
+ } else if (architecture.real) {
flags += "sm_" + architecture.name;
}
diff --git a/Source/cmGeneratorTarget.h b/Source/cmGeneratorTarget.h
index 349afa7..25e6a81 100644
--- a/Source/cmGeneratorTarget.h
+++ b/Source/cmGeneratorTarget.h
@@ -23,6 +23,7 @@
#include "cmStateTypes.h"
#include "cmValue.h"
+enum class cmBuildStep;
class cmComputeLinkInformation;
class cmCustomCommand;
class cmGlobalGenerator;
@@ -471,7 +472,9 @@ public:
void AddExplicitLanguageFlags(std::string& flags,
cmSourceFile const& sf) const;
- void AddCUDAArchitectureFlags(std::string& flags) const;
+ void AddCUDAArchitectureFlags(cmBuildStep compileOrLink,
+ const std::string& config,
+ std::string& flags) const;
void AddCUDAToolkitFlags(std::string& flags) const;
void AddHIPArchitectureFlags(std::string& flags) const;
diff --git a/Source/cmGhsMultiTargetGenerator.cxx b/Source/cmGhsMultiTargetGenerator.cxx
index bf019c3..138d3f1 100644
--- a/Source/cmGhsMultiTargetGenerator.cxx
+++ b/Source/cmGhsMultiTargetGenerator.cxx
@@ -183,8 +183,8 @@ void cmGhsMultiTargetGenerator::SetCompilerFlags(std::string const& config,
auto i = this->FlagsByLanguage.find(language);
if (i == this->FlagsByLanguage.end()) {
std::string flags;
- this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
- language, config);
+ this->LocalGenerator->AddLanguageFlags(
+ flags, this->GeneratorTarget, cmBuildStep::Compile, language, config);
this->LocalGenerator->AddCMP0018Flags(flags, this->GeneratorTarget,
language, config);
this->LocalGenerator->AddVisibilityPresetFlags(
diff --git a/Source/cmGlobalXCodeGenerator.cxx b/Source/cmGlobalXCodeGenerator.cxx
index 456f5bc..70a379e 100644
--- a/Source/cmGlobalXCodeGenerator.cxx
+++ b/Source/cmGlobalXCodeGenerator.cxx
@@ -2368,8 +2368,8 @@ void cmGlobalXCodeGenerator::CreateBuildSettings(cmGeneratorTarget* gtgt,
std::string& flags = cflags[lang];
// Add language-specific flags.
- this->CurrentLocalGenerator->AddLanguageFlags(flags, gtgt, lang,
- configName);
+ this->CurrentLocalGenerator->AddLanguageFlags(
+ flags, gtgt, cmBuildStep::Compile, lang, configName);
if (gtgt->IsIPOEnabled(lang, configName)) {
this->CurrentLocalGenerator->AppendFeatureOptions(flags, lang, "IPO");
diff --git a/Source/cmLinkLineDeviceComputer.cxx b/Source/cmLinkLineDeviceComputer.cxx
index 43f1b8e..b06dc3d 100644
--- a/Source/cmLinkLineDeviceComputer.cxx
+++ b/Source/cmLinkLineDeviceComputer.cxx
@@ -57,7 +57,6 @@ bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinking(
// For this we only consider targets
using ItemVector = cmComputeLinkInformation::ItemVector;
ItemVector const& items = cli.GetItems();
- std::string config = cli.GetConfig();
return std::any_of(
items.begin(), items.end(),
[](cmComputeLinkInformation::Item const& item) -> bool {
@@ -69,6 +68,26 @@ bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinking(
});
}
+bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinkingIPOFlag(
+ cmComputeLinkInformation& cli)
+{
+ // Determine if this item might requires device linking.
+ // For this we only consider targets
+ using ItemVector = cmComputeLinkInformation::ItemVector;
+ ItemVector const& items = cli.GetItems();
+ std::string config = cli.GetConfig();
+ return std::any_of(
+ items.begin(), items.end(),
+ [config](cmComputeLinkInformation::Item const& item) -> bool {
+ return item.Target &&
+ item.Target->GetType() == cmStateEnums::STATIC_LIBRARY &&
+ // this dependency requires us to device link it
+ !item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS") &&
+ item.Target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION") &&
+ item.Target->IsIPOEnabled("CUDA", config);
+ });
+}
+
void cmLinkLineDeviceComputer::ComputeLinkLibraries(
cmComputeLinkInformation& cli, std::string const& stdLibString,
std::vector<BT<std::string>>& linkLibraries)
diff --git a/Source/cmLinkLineDeviceComputer.h b/Source/cmLinkLineDeviceComputer.h
index dee625b..0916307 100644
--- a/Source/cmLinkLineDeviceComputer.h
+++ b/Source/cmLinkLineDeviceComputer.h
@@ -30,6 +30,7 @@ public:
delete;
bool ComputeRequiresDeviceLinking(cmComputeLinkInformation& cli);
+ bool ComputeRequiresDeviceLinkingIPOFlag(cmComputeLinkInformation& cli);
void ComputeLinkLibraries(
cmComputeLinkInformation& cli, std::string const& stdLibString,
diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx
index 67c8bf2..7b823da 100644
--- a/Source/cmLocalGenerator.cxx
+++ b/Source/cmLocalGenerator.cxx
@@ -36,6 +36,7 @@
#include "cmInstallScriptGenerator.h"
#include "cmInstallTargetGenerator.h"
#include "cmLinkLineComputer.h"
+#include "cmLinkLineDeviceComputer.h"
#include "cmMakefile.h"
#include "cmRange.h"
#include "cmRulePlaceholderExpander.h"
@@ -1381,7 +1382,7 @@ std::vector<BT<std::string>> cmLocalGenerator::GetStaticLibraryFlags(
}
void cmLocalGenerator::GetDeviceLinkFlags(
- cmLinkLineComputer& linkLineComputer, const std::string& config,
+ cmLinkLineDeviceComputer& linkLineComputer, const std::string& config,
std::string& linkLibs, std::string& linkFlags, std::string& frameworkPath,
std::string& linkPath, cmGeneratorTarget* target)
{
@@ -1389,6 +1390,18 @@ void cmLocalGenerator::GetDeviceLinkFlags(
cmComputeLinkInformation* pcli = target->GetLinkInformation(config);
+ auto linklang = linkLineComputer.GetLinkerLanguage(target, config);
+ auto ipoEnabled = target->IsIPOEnabled(linklang, config);
+ if (!ipoEnabled) {
+ ipoEnabled = linkLineComputer.ComputeRequiresDeviceLinkingIPOFlag(*pcli);
+ }
+ if (ipoEnabled) {
+ if (cmValue cudaIPOFlags = this->Makefile->GetDefinition(
+ "CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO")) {
+ linkFlags += cudaIPOFlags;
+ }
+ }
+
if (pcli) {
// Compute the required device link libraries when
// resolving gpu lang device symbols
@@ -1396,6 +1409,8 @@ void cmLocalGenerator::GetDeviceLinkFlags(
linkPath);
}
+ // iterate link deps and see if any of them need IPO
+
std::vector<std::string> linkOpts;
target->GetLinkOptions(linkOpts, config, "CUDA");
// LINK_OPTIONS are escaped.
@@ -1590,7 +1605,8 @@ std::vector<BT<std::string>> cmLocalGenerator::GetTargetCompileFlags(
cmMakefile* mf = this->GetMakefile();
// Add language-specific flags.
- this->AddLanguageFlags(compileFlags, target, lang, config);
+ this->AddLanguageFlags(compileFlags, target, cmBuildStep::Compile, lang,
+ config);
if (target->IsIPOEnabled(lang, config)) {
this->AppendFeatureOptions(compileFlags, lang, "IPO");
@@ -1903,6 +1919,7 @@ void cmLocalGenerator::AddArchitectureFlags(std::string& flags,
void cmLocalGenerator::AddLanguageFlags(std::string& flags,
cmGeneratorTarget const* target,
+ cmBuildStep compileOrLink,
const std::string& lang,
const std::string& config)
{
@@ -1926,7 +1943,7 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags,
}
}
} else if (lang == "CUDA") {
- target->AddCUDAArchitectureFlags(flags);
+ target->AddCUDAArchitectureFlags(compileOrLink, config, flags);
target->AddCUDAToolkitFlags(flags);
} else if (lang == "ISPC") {
target->AddISPCTargetFlags(flags);
@@ -2038,7 +2055,7 @@ void cmLocalGenerator::AddLanguageFlagsForLinking(
this->AddCompilerRequirementFlag(flags, target, lang, config);
}
- this->AddLanguageFlags(flags, target, lang, config);
+ this->AddLanguageFlags(flags, target, cmBuildStep::Link, lang, config);
if (target->IsIPOEnabled(lang, config)) {
this->AppendFeatureOptions(flags, lang, "IPO");
diff --git a/Source/cmLocalGenerator.h b/Source/cmLocalGenerator.h
index 7cae1fc..0529431 100644
--- a/Source/cmLocalGenerator.h
+++ b/Source/cmLocalGenerator.h
@@ -35,6 +35,7 @@ class cmGeneratorTarget;
class cmGlobalGenerator;
class cmImplicitDependsList;
class cmLinkLineComputer;
+class cmLinkLineDeviceComputer;
class cmMakefile;
class cmRulePlaceholderExpander;
class cmSourceFile;
@@ -59,6 +60,13 @@ enum class cmDependencyScannerKind
Compiler
};
+/** What to compute language flags for */
+enum class cmBuildStep
+{
+ Compile,
+ Link
+};
+
/** Target and source file which have a specific output. */
struct cmSourcesWithOutput
{
@@ -143,7 +151,8 @@ public:
const std::string& filterArch = std::string());
void AddLanguageFlags(std::string& flags, cmGeneratorTarget const* target,
- const std::string& lang, const std::string& config);
+ cmBuildStep compileOrLink, const std::string& lang,
+ const std::string& config);
void AddLanguageFlagsForLinking(std::string& flags,
cmGeneratorTarget const* target,
const std::string& lang,
@@ -476,7 +485,7 @@ public:
/** Fill out these strings for the given target. Libraries to link,
* flags, and linkflags. */
- void GetDeviceLinkFlags(cmLinkLineComputer& linkLineComputer,
+ void GetDeviceLinkFlags(cmLinkLineDeviceComputer& linkLineComputer,
const std::string& config, std::string& linkLibs,
std::string& linkFlags, std::string& frameworkPath,
std::string& linkPath, cmGeneratorTarget* target);
diff --git a/Source/cmLocalVisualStudio7Generator.cxx b/Source/cmLocalVisualStudio7Generator.cxx
index f65add1..0451d96 100644
--- a/Source/cmLocalVisualStudio7Generator.cxx
+++ b/Source/cmLocalVisualStudio7Generator.cxx
@@ -680,7 +680,8 @@ void cmLocalVisualStudio7Generator::WriteConfiguration(
langForClCompile = linkLanguage;
if (langForClCompile == "C" || langForClCompile == "CXX" ||
langForClCompile == "Fortran") {
- this->AddLanguageFlags(flags, target, langForClCompile, configName);
+ this->AddLanguageFlags(flags, target, cmBuildStep::Compile,
+ langForClCompile, configName);
}
// set the correct language
if (linkLanguage == "C") {
diff --git a/Source/cmMakefileExecutableTargetGenerator.cxx b/Source/cmMakefileExecutableTargetGenerator.cxx
index 74574f7..54f03b9 100644
--- a/Source/cmMakefileExecutableTargetGenerator.cxx
+++ b/Source/cmMakefileExecutableTargetGenerator.cxx
@@ -136,17 +136,11 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
std::vector<std::string> depends;
this->AppendLinkDepends(depends, linkLanguage);
- // Build a list of compiler flags and linker flags.
- std::string langFlags;
- std::string linkFlags;
-
// Add language feature flags.
+ std::string langFlags;
this->LocalGenerator->AddLanguageFlagsForLinking(
langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
- // Add device-specific linker flags.
- this->GetDeviceLinkFlags(linkFlags, linkLanguage);
-
// Construct a list of files associated with this executable that
// may need to be cleaned.
std::vector<std::string> exeCleanFiles;
@@ -173,13 +167,20 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule(
// Set path conversion for link script shells.
this->LocalGenerator->SetLinkScriptShell(useLinkScript);
- std::unique_ptr<cmLinkLineComputer> linkLineComputer(
+ std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
new cmLinkLineDeviceComputer(
this->LocalGenerator,
this->LocalGenerator->GetStateSnapshot().GetDirectory()));
linkLineComputer->SetForResponse(useResponseFileForLibs);
linkLineComputer->SetRelink(relink);
+ // Create set of linking flags.
+ std::string linkFlags;
+ std::string ignored_;
+ this->LocalGenerator->GetDeviceLinkFlags(
+ *linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_,
+ ignored_, this->GeneratorTarget);
+
// Collect up flags to link in needed libraries.
std::string linkLibs;
this->CreateLinkLibs(
diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx
index 3f7d87d..45ef8c8 100644
--- a/Source/cmMakefileLibraryTargetGenerator.cxx
+++ b/Source/cmMakefileLibraryTargetGenerator.cxx
@@ -287,10 +287,6 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
this->LocalGenerator->AddLanguageFlagsForLinking(
langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName());
- // Create set of linking flags.
- std::string linkFlags;
- this->GetDeviceLinkFlags(linkFlags, linkLanguage);
-
// Clean files associated with this library.
std::set<std::string> libCleanFiles;
libCleanFiles.insert(
@@ -315,13 +311,20 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules(
// Collect up flags to link in needed libraries.
std::string linkLibs;
- std::unique_ptr<cmLinkLineComputer> linkLineComputer(
+ std::unique_ptr<cmLinkLineDeviceComputer> linkLineComputer(
new cmLinkLineDeviceComputer(
this->LocalGenerator,
this->LocalGenerator->GetStateSnapshot().GetDirectory()));
linkLineComputer->SetForResponse(useResponseFileForLibs);
linkLineComputer->SetRelink(relink);
+ // Create set of linking flags.
+ std::string linkFlags;
+ std::string ignored_;
+ this->LocalGenerator->GetDeviceLinkFlags(
+ *linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_,
+ ignored_, this->GeneratorTarget);
+
this->CreateLinkLibs(
linkLineComputer.get(), linkLibs, useResponseFileForLibs, depends,
cmMakefileTargetGenerator::ResponseFlagFor::DeviceLink);
diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx
index d4f1608..42f0329 100644
--- a/Source/cmNinjaNormalTargetGenerator.cxx
+++ b/Source/cmNinjaNormalTargetGenerator.cxx
@@ -537,7 +537,6 @@ std::vector<std::string> cmNinjaNormalTargetGenerator::ComputeDeviceLinkCmd()
// this target requires separable cuda compilation
// now build the correct command depending on if the target is
// an executable or a dynamic library.
- std::string linkCmd;
switch (this->GetGeneratorTarget()->GetType()) {
case cmStateEnums::STATIC_LIBRARY:
case cmStateEnums::SHARED_LIBRARY:
diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx
index 66b91cb..4c1fa01 100644
--- a/Source/cmVisualStudio10TargetGenerator.cxx
+++ b/Source/cmVisualStudio10TargetGenerator.cxx
@@ -3137,6 +3137,7 @@ bool cmVisualStudio10TargetGenerator::ComputeClOptions(
this->LangForClCompile = langForClCompile;
if (!langForClCompile.empty()) {
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
+ cmBuildStep::Compile,
langForClCompile, configName);
this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget,
langForClCompile, configName);
@@ -3521,8 +3522,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
// Get compile flags for CUDA in this directory.
std::string flags;
- this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA",
- configName);
+ this->LocalGenerator->AddLanguageFlags(
+ flags, this->GeneratorTarget, cmBuildStep::Compile, "CUDA", configName);
this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget, "CUDA",
configName);
@@ -3793,7 +3794,8 @@ bool cmVisualStudio10TargetGenerator::ComputeMasmOptions(
std::string flags;
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
- "ASM_MASM", configName);
+ cmBuildStep::Compile, "ASM_MASM",
+ configName);
masmOptions.Parse(flags);
@@ -3845,7 +3847,8 @@ bool cmVisualStudio10TargetGenerator::ComputeNasmOptions(
std::string flags;
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget,
- "ASM_NASM", configName);
+ cmBuildStep::Compile, "ASM_NASM",
+ configName);
flags += " -f";
flags += this->Makefile->GetSafeDefinition("CMAKE_ASM_NASM_OBJECT_FORMAT");
nasmOptions.Parse(flags);
diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt
index a54b6b5..ab1fba0 100644
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -618,6 +618,11 @@ if(BUILD_TESTING)
set(Module.CheckIPOSupported-CXX_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_CXX=${CMake_TEST_IPO_WORKS_CXX})
ADD_TEST_MACRO(Module.CheckIPOSupported-CXX CheckIPOSupported-CXX)
+ if(CMake_TEST_CUDA)
+ ADD_TEST_MACRO(Module.CheckIPOSupported-CUDA CheckIPOSupported-CUDA)
+ set_property(TEST Module.CheckIPOSupported-CUDA APPEND PROPERTY LABELS "CUDA")
+ endif()
+
if(CMAKE_Fortran_COMPILER)
set(Module.CheckIPOSupported-Fortran_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_Fortran=${CMake_TEST_IPO_WORKS_Fortran})
ADD_TEST_MACRO(Module.CheckIPOSupported-Fortran CheckIPOSupported-Fortran)
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
index aa4755d..091872d 100644
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -7,7 +7,6 @@ endmacro ()
add_cuda_test_macro(CudaOnly.Architecture Architecture)
add_cuda_test_macro(CudaOnly.ArchSpecial CudaOnlyArchSpecial)
add_cuda_test_macro(CudaOnly.CompileFlags CudaOnlyCompileFlags)
-
add_cuda_test_macro(CudaOnly.EnableStandard CudaOnlyEnableStandard)
add_cuda_test_macro(CudaOnly.ExportPTX CudaOnlyExportPTX)
add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit)
@@ -28,6 +27,19 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
endif()
+# The CUDA only ships the shared version of the toolkit libraries
+# on windows
+if(NOT WIN32)
+ add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit)
+endif()
+
+add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO)
+
+if(MSVC)
+ # Tests for features that only work with MSVC
+ add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB)
+endif()
+
add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
--build-and-test
@@ -41,16 +53,6 @@ add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND
PROPERTY LABELS "CUDA")
-# The CUDA only ships the shared version of the toolkit libraries
-# on windows
-if(NOT WIN32)
- add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit)
-endif()
-
-if(MSVC)
- add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB)
-endif()
-
add_test(NAME CudaOnly.RuntimeControls COMMAND
${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
--build-and-test
diff --git a/Tests/CudaOnly/DeviceLTO/CMakeLists.txt b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt
new file mode 100644
index 0000000..653b35d
--- /dev/null
+++ b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt
@@ -0,0 +1,37 @@
+cmake_minimum_required(VERSION 3.18)
+project(DeviceLTO CUDA)
+
+# Goal:
+# Verify that we correctly compile with device LTO
+# Verify that device LTO requirements are propagated to
+# the final device link line
+
+add_library(CUDA_dlto STATIC file1.cu file2.cu file3.cu)
+add_executable(CudaOnlyDeviceLTO main.cu)
+
+set_target_properties(CUDA_dlto
+ PROPERTIES
+ CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
+ CUDA_SEPARABLE_COMPILATION ON
+ POSITION_INDEPENDENT_CODE ON)
+
+set_target_properties(CudaOnlyDeviceLTO
+ PROPERTIES
+ CUDA_SEPARABLE_COMPILATION ON
+ CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}"
+ )
+
+target_link_libraries(CudaOnlyDeviceLTO PRIVATE CUDA_dlto)
+
+include(CheckIPOSupported)
+check_ipo_supported(LANGUAGES CUDA RESULT ipo_supported)
+if(ipo_supported)
+ set_target_properties(CUDA_dlto
+ PROPERTIES
+ INTERPROCEDURAL_OPTIMIZATION ON)
+
+ # When non-LTO variants (i.e. virtual) are built together with LTO ones the
+ # linker warns about missing device LTO for the virtual architectures.
+ # Ignore these warnings.
+ target_link_options(CudaOnlyDeviceLTO PRIVATE "$<DEVICE_LINK:-w>")
+endif()
diff --git a/Tests/CudaOnly/DeviceLTO/file1.cu b/Tests/CudaOnly/DeviceLTO/file1.cu
new file mode 100644
index 0000000..703927c
--- /dev/null
+++ b/Tests/CudaOnly/DeviceLTO/file1.cu
@@ -0,0 +1,17 @@
+#ifdef _WIN32
+# define EXPORT __declspec(dllexport)
+#else
+# define EXPORT
+#endif
+
+extern __device__ int file2_func(int);
+void __global__ kernel(int x)
+{
+ file2_func(x);
+}
+
+EXPORT int launch_kernel(int x)
+{
+ kernel<<<1, 1>>>(x);
+ return x;
+}
diff --git a/Tests/CudaOnly/DeviceLTO/file2.cu b/Tests/CudaOnly/DeviceLTO/file2.cu
new file mode 100644
index 0000000..73d6468
--- /dev/null
+++ b/Tests/CudaOnly/DeviceLTO/file2.cu
@@ -0,0 +1,5 @@
+extern __device__ int file3_func(int);
+int __device__ file2_func(int x)
+{
+ return x + file3_func(x);
+}
diff --git a/Tests/CudaOnly/DeviceLTO/file3.cu b/Tests/CudaOnly/DeviceLTO/file3.cu
new file mode 100644
index 0000000..235ac06
--- /dev/null
+++ b/Tests/CudaOnly/DeviceLTO/file3.cu
@@ -0,0 +1,4 @@
+int __device__ file3_func(int x)
+{
+ return x * x * x;
+}
diff --git a/Tests/CudaOnly/DeviceLTO/main.cu b/Tests/CudaOnly/DeviceLTO/main.cu
new file mode 100644
index 0000000..8ef4873
--- /dev/null
+++ b/Tests/CudaOnly/DeviceLTO/main.cu
@@ -0,0 +1,62 @@
+#include <iostream>
+
+#include "cuda.h"
+
+#ifdef _WIN32
+# define IMPORT __declspec(dllimport)
+#else
+# define IMPORT
+#endif
+
+IMPORT int launch_kernel(int x);
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ std::cout << "prop.major: " << prop.major << std::endl;
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card" << std::endl;
+
+ return 1;
+}
+
+int main()
+{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ cudaError_t err;
+ launch_kernel(1);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "launch_kernel: kernel launch should have passed.\n "
+ "Error message: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt
new file mode 100644
index 0000000..9dd670e
--- /dev/null
+++ b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt
@@ -0,0 +1,32 @@
+cmake_minimum_required(VERSION 3.8)
+project(CheckIPOSupported-CUDA LANGUAGES CUDA)
+
+cmake_policy(SET CMP0069 NEW)
+
+include(CheckIPOSupported)
+check_ipo_supported(RESULT ipo_supported OUTPUT ipo_output)
+if(ipo_supported)
+ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
+endif()
+
+if(NOT ipo_supported AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA"
+ AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2)
+ message(FATAL_ERROR "CheckIPOSupported failed to correctly identify NVIDIA CUDA IPO support")
+endif()
+
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+
+add_library(foo STATIC foo.cu)
+set_target_properties(foo PROPERTIES
+ WINDOWS_EXPORT_ALL_SYMBOLS ON
+ POSITION_INDEPENDENT_CODE ON)
+
+add_library(bar SHARED bar.cu)
+set_target_properties(bar PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON)
+target_link_libraries(bar PRIVATE foo)
+
+add_executable(CheckIPOSupported-CUDA main.cu)
+target_link_libraries(CheckIPOSupported-CUDA PUBLIC bar)
+
+enable_testing()
+add_test(NAME CheckIPOSupported-CUDA COMMAND CheckIPOSupported-CUDA)
diff --git a/Tests/Module/CheckIPOSupported-CUDA/bar.cu b/Tests/Module/CheckIPOSupported-CUDA/bar.cu
new file mode 100644
index 0000000..79b276d
--- /dev/null
+++ b/Tests/Module/CheckIPOSupported-CUDA/bar.cu
@@ -0,0 +1,12 @@
+__device__ int foo_func(int);
+
+void __global__ bar_kernel(int x)
+{
+ foo_func(x);
+}
+
+int launch_kernel(int x)
+{
+ bar_kernel<<<1, 1>>>(x);
+ return x;
+}
diff --git a/Tests/Module/CheckIPOSupported-CUDA/foo.cu b/Tests/Module/CheckIPOSupported-CUDA/foo.cu
new file mode 100644
index 0000000..416607b
--- /dev/null
+++ b/Tests/Module/CheckIPOSupported-CUDA/foo.cu
@@ -0,0 +1,4 @@
+extern __device__ int foo_func(int a)
+{
+ return a * 42 + 9;
+}
diff --git a/Tests/Module/CheckIPOSupported-CUDA/main.cu b/Tests/Module/CheckIPOSupported-CUDA/main.cu
new file mode 100644
index 0000000..8ef4873
--- /dev/null
+++ b/Tests/Module/CheckIPOSupported-CUDA/main.cu
@@ -0,0 +1,62 @@
+#include <iostream>
+
+#include "cuda.h"
+
+#ifdef _WIN32
+# define IMPORT __declspec(dllimport)
+#else
+# define IMPORT
+#endif
+
+IMPORT int launch_kernel(int x);
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ std::cout << "prop.major: " << prop.major << std::endl;
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card" << std::endl;
+
+ return 1;
+}
+
+int main()
+{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ cudaError_t err;
+ launch_kernel(1);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "launch_kernel: kernel launch should have passed.\n "
+ "Error message: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt b/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt
index dc2c3ad..9a1ba04 100644
--- a/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt
+++ b/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt
@@ -1,6 +1,6 @@
^CMake Error at .*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(message\):
- IPO is not supported \(no C/CXX/Fortran languages found in ENABLED_LANGUAGES
- global property\)\.
+ IPO is not supported \(no C/CXX/CUDA/Fortran languages found in
+ ENABLED_LANGUAGES global property\)\.
Call Stack \(most recent call first\):
.*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(_ipo_not_supported\)
default-lang-none\.cmake:[0-9]+ \(check_ipo_supported\)