From 96bc59b1ca01be231347404d178445263687dd22 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 22 Apr 2022 12:51:26 -0400 Subject: CUDA: Add Device LTO support for nvcc Fixes #22200 --- Help/release/dev/cuda-device-lto.rst | 7 +++ Modules/CheckIPOSupported.cmake | 37 ++++++++----- Modules/Compiler/Clang-CUDA.cmake | 4 ++ Modules/Compiler/NVIDIA-CUDA.cmake | 8 +++ Source/cmGeneratorTarget.cxx | 30 +++++++++-- Source/cmGeneratorTarget.h | 5 +- Source/cmGhsMultiTargetGenerator.cxx | 4 +- Source/cmGlobalXCodeGenerator.cxx | 4 +- Source/cmLinkLineDeviceComputer.cxx | 20 +++++++ Source/cmLinkLineDeviceComputer.h | 1 + Source/cmLocalGenerator.cxx | 25 +++++++-- Source/cmLocalGenerator.h | 13 ++++- Source/cmLocalVisualStudio7Generator.cxx | 3 +- Source/cmMakefileExecutableTargetGenerator.cxx | 17 +++--- Source/cmMakefileLibraryTargetGenerator.cxx | 13 +++-- Source/cmVisualStudio10TargetGenerator.cxx | 11 ++-- Tests/CMakeLists.txt | 5 ++ Tests/CudaOnly/CMakeLists.txt | 24 +++++---- Tests/CudaOnly/DeviceLTO/CMakeLists.txt | 37 +++++++++++++ Tests/CudaOnly/DeviceLTO/file1.cu | 17 ++++++ Tests/CudaOnly/DeviceLTO/file2.cu | 5 ++ Tests/CudaOnly/DeviceLTO/file3.cu | 4 ++ Tests/CudaOnly/DeviceLTO/main.cu | 62 ++++++++++++++++++++++ Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt | 32 +++++++++++ Tests/Module/CheckIPOSupported-CUDA/bar.cu | 12 +++++ Tests/Module/CheckIPOSupported-CUDA/foo.cu | 4 ++ Tests/Module/CheckIPOSupported-CUDA/main.cu | 62 ++++++++++++++++++++++ .../CheckIPOSupported/default-lang-none-stderr.txt | 4 +- 28 files changed, 412 insertions(+), 58 deletions(-) create mode 100644 Help/release/dev/cuda-device-lto.rst create mode 100644 Tests/CudaOnly/DeviceLTO/CMakeLists.txt create mode 100644 Tests/CudaOnly/DeviceLTO/file1.cu create mode 100644 Tests/CudaOnly/DeviceLTO/file2.cu create mode 100644 Tests/CudaOnly/DeviceLTO/file3.cu create mode 100644 Tests/CudaOnly/DeviceLTO/main.cu create mode 100644 Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt create mode 100644 Tests/Module/CheckIPOSupported-CUDA/bar.cu create mode 100644 Tests/Module/CheckIPOSupported-CUDA/foo.cu create mode 100644 Tests/Module/CheckIPOSupported-CUDA/main.cu diff --git a/Help/release/dev/cuda-device-lto.rst b/Help/release/dev/cuda-device-lto.rst new file mode 100644 index 0000000..113062b --- /dev/null +++ b/Help/release/dev/cuda-device-lto.rst @@ -0,0 +1,7 @@ +cuda-device-lto +--------------- + +* ``CUDA`` language now supports device link time optimization when using + ``nvcc``. The :variable:`CMAKE_INTERPROCEDURAL_OPTIMIZATION` variable and + the associated :prop_tgt:`INTERPROCEDURAL_OPTIMIZATION` target property will + activate device LTO. diff --git a/Modules/CheckIPOSupported.cmake b/Modules/CheckIPOSupported.cmake index f0b4fd6..14262a1 100644 --- a/Modules/CheckIPOSupported.cmake +++ b/Modules/CheckIPOSupported.cmake @@ -76,6 +76,23 @@ endmacro() # Run IPO/LTO test macro(_ipo_run_language_check language) + set(_C_ext "c") + set(_CXX_ext "cpp") + set(_Fortran_ext "f") + string(COMPARE EQUAL "${language}" "CUDA" is_cuda) + + set(ext ${_${language}_ext}) + if(NOT "${ext}" STREQUAL "") + set(copy_sources foo.${ext} main.${ext}) + elseif(is_cuda) + if(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE) + set("${X_RESULT}" YES PARENT_SCOPE) + endif() + return() + else() + message(FATAL_ERROR "Language not supported") + endif() + set(testdir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/_CMakeLTOTest-${language}") file(REMOVE_RECURSE "${testdir}") @@ -91,17 +108,6 @@ macro(_ipo_run_language_check language) set(try_compile_src "${CMAKE_ROOT}/Modules/CheckIPOSupported") - set(_C_ext "c") - set(_CXX_ext "cpp") - set(_Fortran_ext "f") - - set(ext ${_${language}_ext}) - if(NOT "${ext}" STREQUAL "") - set(copy_sources foo.${ext} main.${ext}) - else() - message(FATAL_ERROR "Language not supported") - endif() - # Use: # * TRY_COMPILE_PROJECT_NAME # * CMAKE_VERSION @@ -211,6 +217,11 @@ function(check_ipo_supported) list(APPEND languages "C") endif() + list(FIND enabled_languages "CUDA" result) + if(NOT result EQUAL -1) + list(APPEND languages "CUDA") + endif() + list(FIND enabled_languages "Fortran" result) if(NOT result EQUAL -1) list(APPEND languages "Fortran") @@ -219,7 +230,7 @@ function(check_ipo_supported) string(COMPARE EQUAL "${languages}" "" no_languages) if(no_languages) _ipo_not_supported( - "no C/CXX/Fortran languages found in ENABLED_LANGUAGES global property" + "no C/CXX/CUDA/Fortran languages found in ENABLED_LANGUAGES global property" ) return() endif() @@ -227,7 +238,7 @@ function(check_ipo_supported) set(languages "${X_LANGUAGES}") set(unsupported_languages "${languages}") - list(REMOVE_ITEM unsupported_languages "C" "CXX" "Fortran") + list(REMOVE_ITEM unsupported_languages "C" "CXX" "CUDA" "Fortran") string(COMPARE NOTEQUAL "${unsupported_languages}" "" has_unsupported) if(has_unsupported) _ipo_not_supported( diff --git a/Modules/Compiler/Clang-CUDA.cmake b/Modules/Compiler/Clang-CUDA.cmake index 219897e..d9929f1 100644 --- a/Modules/Compiler/Clang-CUDA.cmake +++ b/Modules/Compiler/Clang-CUDA.cmake @@ -35,6 +35,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static") set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart") set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "") +# Clang doesn't support CUDA device LTO +set(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE NO) +set(_CMAKE_CUDA_IPO_MAY_BE_SUPPORTED_BY_COMPILER NO) + if(UNIX) list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl") endif() diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake index 33509ac..2b8a1ea 100644 --- a/Modules/Compiler/NVIDIA-CUDA.cmake +++ b/Modules/Compiler/NVIDIA-CUDA.cmake @@ -48,6 +48,13 @@ if((NOT DEFINED CMAKE_DEPENDS_USE_COMPILER OR CMAKE_DEPENDS_USE_COMPILER) set(CMAKE_CUDA_DEPENDS_USE_COMPILER TRUE) endif() +if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2) + set(_CMAKE_CUDA_IPO_SUPPORTED_BY_CMAKE YES) + set(_CMAKE_CUDA_IPO_MAY_BE_SUPPORTED_BY_COMPILER YES) + + set(CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO " -dlto") +endif() + if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC") set(CMAKE_CUDA_COMPILE_OPTIONS_PIE -Xcompiler=-fPIE) set(CMAKE_CUDA_COMPILE_OPTIONS_PIC -Xcompiler=-fPIC) @@ -61,6 +68,7 @@ if(NOT "x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC") string(APPEND CMAKE_CUDA_FLAGS_MINSIZEREL_INIT " -O1 -DNDEBUG") string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO_INIT " -O2 -g -DNDEBUG") endif() + set(CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS -shared) set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA -isystem=) diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx index 433c1d5..dace055 100644 --- a/Source/cmGeneratorTarget.cxx +++ b/Source/cmGeneratorTarget.cxx @@ -916,11 +916,19 @@ bool cmGeneratorTarget::IsIPOEnabled(std::string const& lang, return false; } - if (lang != "C" && lang != "CXX" && lang != "Fortran") { + if (lang != "C" && lang != "CXX" && lang != "CUDA" && lang != "Fortran") { // We do not define IPO behavior for other languages. return false; } + if (lang == "CUDA") { + // CUDA IPO requires both CUDA_ARCHITECTURES and CUDA_SEPARABLE_COMPILATION + if (cmIsOff(this->GetSafeProperty("CUDA_ARCHITECTURES")) || + cmIsOff(this->GetSafeProperty("CUDA_SEPARABLE_COMPILATION"))) { + return false; + } + } + cmPolicies::PolicyStatus cmp0069 = this->GetPolicyStatusCMP0069(); if (cmp0069 == cmPolicies::OLD || cmp0069 == cmPolicies::WARN) { @@ -3428,7 +3436,9 @@ void cmGeneratorTarget::AddExplicitLanguageFlags(std::string& flags, "EXPLICIT_LANGUAGE"); } -void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const +void cmGeneratorTarget::AddCUDAArchitectureFlags(cmBuildStep compileOrLink, + const std::string& config, + std::string& flags) const { std::string property = this->GetSafeProperty("CUDA_ARCHITECTURES"); @@ -3460,6 +3470,7 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const std::string const& compiler = this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID"); + const bool ipoEnabled = this->IsIPOEnabled("CUDA", config); // Check for special modes: `all`, `all-major`. if (property == "all" || property == "all-major") { @@ -3539,6 +3550,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const } if (compiler == "NVIDIA") { + if (ipoEnabled && compileOrLink == cmBuildStep::Link) { + if (cmValue cudaIPOFlags = + this->Makefile->GetDefinition("CMAKE_CUDA_LINK_OPTIONS_IPO")) { + flags += cudaIPOFlags; + } + } + for (CudaArchitecture& architecture : architectures) { flags += " --generate-code=arch=compute_" + architecture.name + ",code=["; @@ -3551,7 +3569,13 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const } } - if (architecture.real) { + if (ipoEnabled) { + if (compileOrLink == cmBuildStep::Compile) { + flags += "lto_" + architecture.name; + } else if (compileOrLink == cmBuildStep::Link) { + flags += "sm_" + architecture.name; + } + } else if (architecture.real) { flags += "sm_" + architecture.name; } diff --git a/Source/cmGeneratorTarget.h b/Source/cmGeneratorTarget.h index 349afa7..25e6a81 100644 --- a/Source/cmGeneratorTarget.h +++ b/Source/cmGeneratorTarget.h @@ -23,6 +23,7 @@ #include "cmStateTypes.h" #include "cmValue.h" +enum class cmBuildStep; class cmComputeLinkInformation; class cmCustomCommand; class cmGlobalGenerator; @@ -471,7 +472,9 @@ public: void AddExplicitLanguageFlags(std::string& flags, cmSourceFile const& sf) const; - void AddCUDAArchitectureFlags(std::string& flags) const; + void AddCUDAArchitectureFlags(cmBuildStep compileOrLink, + const std::string& config, + std::string& flags) const; void AddCUDAToolkitFlags(std::string& flags) const; void AddHIPArchitectureFlags(std::string& flags) const; diff --git a/Source/cmGhsMultiTargetGenerator.cxx b/Source/cmGhsMultiTargetGenerator.cxx index bf019c3..138d3f1 100644 --- a/Source/cmGhsMultiTargetGenerator.cxx +++ b/Source/cmGhsMultiTargetGenerator.cxx @@ -183,8 +183,8 @@ void cmGhsMultiTargetGenerator::SetCompilerFlags(std::string const& config, auto i = this->FlagsByLanguage.find(language); if (i == this->FlagsByLanguage.end()) { std::string flags; - this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, - language, config); + this->LocalGenerator->AddLanguageFlags( + flags, this->GeneratorTarget, cmBuildStep::Compile, language, config); this->LocalGenerator->AddCMP0018Flags(flags, this->GeneratorTarget, language, config); this->LocalGenerator->AddVisibilityPresetFlags( diff --git a/Source/cmGlobalXCodeGenerator.cxx b/Source/cmGlobalXCodeGenerator.cxx index 456f5bc..70a379e 100644 --- a/Source/cmGlobalXCodeGenerator.cxx +++ b/Source/cmGlobalXCodeGenerator.cxx @@ -2368,8 +2368,8 @@ void cmGlobalXCodeGenerator::CreateBuildSettings(cmGeneratorTarget* gtgt, std::string& flags = cflags[lang]; // Add language-specific flags. - this->CurrentLocalGenerator->AddLanguageFlags(flags, gtgt, lang, - configName); + this->CurrentLocalGenerator->AddLanguageFlags( + flags, gtgt, cmBuildStep::Compile, lang, configName); if (gtgt->IsIPOEnabled(lang, configName)) { this->CurrentLocalGenerator->AppendFeatureOptions(flags, lang, "IPO"); diff --git a/Source/cmLinkLineDeviceComputer.cxx b/Source/cmLinkLineDeviceComputer.cxx index 719b834..b06dc3d 100644 --- a/Source/cmLinkLineDeviceComputer.cxx +++ b/Source/cmLinkLineDeviceComputer.cxx @@ -68,6 +68,26 @@ bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinking( }); } +bool cmLinkLineDeviceComputer::ComputeRequiresDeviceLinkingIPOFlag( + cmComputeLinkInformation& cli) +{ + // Determine if this item might requires device linking. + // For this we only consider targets + using ItemVector = cmComputeLinkInformation::ItemVector; + ItemVector const& items = cli.GetItems(); + std::string config = cli.GetConfig(); + return std::any_of( + items.begin(), items.end(), + [config](cmComputeLinkInformation::Item const& item) -> bool { + return item.Target && + item.Target->GetType() == cmStateEnums::STATIC_LIBRARY && + // this dependency requires us to device link it + !item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS") && + item.Target->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION") && + item.Target->IsIPOEnabled("CUDA", config); + }); +} + void cmLinkLineDeviceComputer::ComputeLinkLibraries( cmComputeLinkInformation& cli, std::string const& stdLibString, std::vector>& linkLibraries) diff --git a/Source/cmLinkLineDeviceComputer.h b/Source/cmLinkLineDeviceComputer.h index dee625b..0916307 100644 --- a/Source/cmLinkLineDeviceComputer.h +++ b/Source/cmLinkLineDeviceComputer.h @@ -30,6 +30,7 @@ public: delete; bool ComputeRequiresDeviceLinking(cmComputeLinkInformation& cli); + bool ComputeRequiresDeviceLinkingIPOFlag(cmComputeLinkInformation& cli); void ComputeLinkLibraries( cmComputeLinkInformation& cli, std::string const& stdLibString, diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx index 67c8bf2..7b823da 100644 --- a/Source/cmLocalGenerator.cxx +++ b/Source/cmLocalGenerator.cxx @@ -36,6 +36,7 @@ #include "cmInstallScriptGenerator.h" #include "cmInstallTargetGenerator.h" #include "cmLinkLineComputer.h" +#include "cmLinkLineDeviceComputer.h" #include "cmMakefile.h" #include "cmRange.h" #include "cmRulePlaceholderExpander.h" @@ -1381,7 +1382,7 @@ std::vector> cmLocalGenerator::GetStaticLibraryFlags( } void cmLocalGenerator::GetDeviceLinkFlags( - cmLinkLineComputer& linkLineComputer, const std::string& config, + cmLinkLineDeviceComputer& linkLineComputer, const std::string& config, std::string& linkLibs, std::string& linkFlags, std::string& frameworkPath, std::string& linkPath, cmGeneratorTarget* target) { @@ -1389,6 +1390,18 @@ void cmLocalGenerator::GetDeviceLinkFlags( cmComputeLinkInformation* pcli = target->GetLinkInformation(config); + auto linklang = linkLineComputer.GetLinkerLanguage(target, config); + auto ipoEnabled = target->IsIPOEnabled(linklang, config); + if (!ipoEnabled) { + ipoEnabled = linkLineComputer.ComputeRequiresDeviceLinkingIPOFlag(*pcli); + } + if (ipoEnabled) { + if (cmValue cudaIPOFlags = this->Makefile->GetDefinition( + "CMAKE_CUDA_DEVICE_LINK_OPTIONS_IPO")) { + linkFlags += cudaIPOFlags; + } + } + if (pcli) { // Compute the required device link libraries when // resolving gpu lang device symbols @@ -1396,6 +1409,8 @@ void cmLocalGenerator::GetDeviceLinkFlags( linkPath); } + // iterate link deps and see if any of them need IPO + std::vector linkOpts; target->GetLinkOptions(linkOpts, config, "CUDA"); // LINK_OPTIONS are escaped. @@ -1590,7 +1605,8 @@ std::vector> cmLocalGenerator::GetTargetCompileFlags( cmMakefile* mf = this->GetMakefile(); // Add language-specific flags. - this->AddLanguageFlags(compileFlags, target, lang, config); + this->AddLanguageFlags(compileFlags, target, cmBuildStep::Compile, lang, + config); if (target->IsIPOEnabled(lang, config)) { this->AppendFeatureOptions(compileFlags, lang, "IPO"); @@ -1903,6 +1919,7 @@ void cmLocalGenerator::AddArchitectureFlags(std::string& flags, void cmLocalGenerator::AddLanguageFlags(std::string& flags, cmGeneratorTarget const* target, + cmBuildStep compileOrLink, const std::string& lang, const std::string& config) { @@ -1926,7 +1943,7 @@ void cmLocalGenerator::AddLanguageFlags(std::string& flags, } } } else if (lang == "CUDA") { - target->AddCUDAArchitectureFlags(flags); + target->AddCUDAArchitectureFlags(compileOrLink, config, flags); target->AddCUDAToolkitFlags(flags); } else if (lang == "ISPC") { target->AddISPCTargetFlags(flags); @@ -2038,7 +2055,7 @@ void cmLocalGenerator::AddLanguageFlagsForLinking( this->AddCompilerRequirementFlag(flags, target, lang, config); } - this->AddLanguageFlags(flags, target, lang, config); + this->AddLanguageFlags(flags, target, cmBuildStep::Link, lang, config); if (target->IsIPOEnabled(lang, config)) { this->AppendFeatureOptions(flags, lang, "IPO"); diff --git a/Source/cmLocalGenerator.h b/Source/cmLocalGenerator.h index 7cae1fc..0529431 100644 --- a/Source/cmLocalGenerator.h +++ b/Source/cmLocalGenerator.h @@ -35,6 +35,7 @@ class cmGeneratorTarget; class cmGlobalGenerator; class cmImplicitDependsList; class cmLinkLineComputer; +class cmLinkLineDeviceComputer; class cmMakefile; class cmRulePlaceholderExpander; class cmSourceFile; @@ -59,6 +60,13 @@ enum class cmDependencyScannerKind Compiler }; +/** What to compute language flags for */ +enum class cmBuildStep +{ + Compile, + Link +}; + /** Target and source file which have a specific output. */ struct cmSourcesWithOutput { @@ -143,7 +151,8 @@ public: const std::string& filterArch = std::string()); void AddLanguageFlags(std::string& flags, cmGeneratorTarget const* target, - const std::string& lang, const std::string& config); + cmBuildStep compileOrLink, const std::string& lang, + const std::string& config); void AddLanguageFlagsForLinking(std::string& flags, cmGeneratorTarget const* target, const std::string& lang, @@ -476,7 +485,7 @@ public: /** Fill out these strings for the given target. Libraries to link, * flags, and linkflags. */ - void GetDeviceLinkFlags(cmLinkLineComputer& linkLineComputer, + void GetDeviceLinkFlags(cmLinkLineDeviceComputer& linkLineComputer, const std::string& config, std::string& linkLibs, std::string& linkFlags, std::string& frameworkPath, std::string& linkPath, cmGeneratorTarget* target); diff --git a/Source/cmLocalVisualStudio7Generator.cxx b/Source/cmLocalVisualStudio7Generator.cxx index f65add1..0451d96 100644 --- a/Source/cmLocalVisualStudio7Generator.cxx +++ b/Source/cmLocalVisualStudio7Generator.cxx @@ -680,7 +680,8 @@ void cmLocalVisualStudio7Generator::WriteConfiguration( langForClCompile = linkLanguage; if (langForClCompile == "C" || langForClCompile == "CXX" || langForClCompile == "Fortran") { - this->AddLanguageFlags(flags, target, langForClCompile, configName); + this->AddLanguageFlags(flags, target, cmBuildStep::Compile, + langForClCompile, configName); } // set the correct language if (linkLanguage == "C") { diff --git a/Source/cmMakefileExecutableTargetGenerator.cxx b/Source/cmMakefileExecutableTargetGenerator.cxx index 74574f7..54f03b9 100644 --- a/Source/cmMakefileExecutableTargetGenerator.cxx +++ b/Source/cmMakefileExecutableTargetGenerator.cxx @@ -136,17 +136,11 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule( std::vector depends; this->AppendLinkDepends(depends, linkLanguage); - // Build a list of compiler flags and linker flags. - std::string langFlags; - std::string linkFlags; - // Add language feature flags. + std::string langFlags; this->LocalGenerator->AddLanguageFlagsForLinking( langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName()); - // Add device-specific linker flags. - this->GetDeviceLinkFlags(linkFlags, linkLanguage); - // Construct a list of files associated with this executable that // may need to be cleaned. std::vector exeCleanFiles; @@ -173,13 +167,20 @@ void cmMakefileExecutableTargetGenerator::WriteNvidiaDeviceExecutableRule( // Set path conversion for link script shells. this->LocalGenerator->SetLinkScriptShell(useLinkScript); - std::unique_ptr linkLineComputer( + std::unique_ptr linkLineComputer( new cmLinkLineDeviceComputer( this->LocalGenerator, this->LocalGenerator->GetStateSnapshot().GetDirectory())); linkLineComputer->SetForResponse(useResponseFileForLibs); linkLineComputer->SetRelink(relink); + // Create set of linking flags. + std::string linkFlags; + std::string ignored_; + this->LocalGenerator->GetDeviceLinkFlags( + *linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_, + ignored_, this->GeneratorTarget); + // Collect up flags to link in needed libraries. std::string linkLibs; this->CreateLinkLibs( diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx index 3f7d87d..45ef8c8 100644 --- a/Source/cmMakefileLibraryTargetGenerator.cxx +++ b/Source/cmMakefileLibraryTargetGenerator.cxx @@ -287,10 +287,6 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules( this->LocalGenerator->AddLanguageFlagsForLinking( langFlags, this->GeneratorTarget, linkLanguage, this->GetConfigName()); - // Create set of linking flags. - std::string linkFlags; - this->GetDeviceLinkFlags(linkFlags, linkLanguage); - // Clean files associated with this library. std::set libCleanFiles; libCleanFiles.insert( @@ -315,13 +311,20 @@ void cmMakefileLibraryTargetGenerator::WriteNvidiaDeviceLibraryRules( // Collect up flags to link in needed libraries. std::string linkLibs; - std::unique_ptr linkLineComputer( + std::unique_ptr linkLineComputer( new cmLinkLineDeviceComputer( this->LocalGenerator, this->LocalGenerator->GetStateSnapshot().GetDirectory())); linkLineComputer->SetForResponse(useResponseFileForLibs); linkLineComputer->SetRelink(relink); + // Create set of linking flags. + std::string linkFlags; + std::string ignored_; + this->LocalGenerator->GetDeviceLinkFlags( + *linkLineComputer, this->GetConfigName(), ignored_, linkFlags, ignored_, + ignored_, this->GeneratorTarget); + this->CreateLinkLibs( linkLineComputer.get(), linkLibs, useResponseFileForLibs, depends, cmMakefileTargetGenerator::ResponseFlagFor::DeviceLink); diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx index a7460e8..020691d 100644 --- a/Source/cmVisualStudio10TargetGenerator.cxx +++ b/Source/cmVisualStudio10TargetGenerator.cxx @@ -3300,6 +3300,7 @@ bool cmVisualStudio10TargetGenerator::ComputeClOptions( this->LangForClCompile = langForClCompile; if (!langForClCompile.empty()) { this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, + cmBuildStep::Compile, langForClCompile, configName); this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget, langForClCompile, configName); @@ -3675,8 +3676,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions( // Get compile flags for CUDA in this directory. std::string flags; - this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA", - configName); + this->LocalGenerator->AddLanguageFlags( + flags, this->GeneratorTarget, cmBuildStep::Compile, "CUDA", configName); this->LocalGenerator->AddCompileOptions(flags, this->GeneratorTarget, "CUDA", configName); @@ -3947,7 +3948,8 @@ bool cmVisualStudio10TargetGenerator::ComputeMasmOptions( std::string flags; this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, - "ASM_MASM", configName); + cmBuildStep::Compile, "ASM_MASM", + configName); masmOptions.Parse(flags); @@ -3999,7 +4001,8 @@ bool cmVisualStudio10TargetGenerator::ComputeNasmOptions( std::string flags; this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, - "ASM_NASM", configName); + cmBuildStep::Compile, "ASM_NASM", + configName); flags += " -f"; flags += this->Makefile->GetSafeDefinition("CMAKE_ASM_NASM_OBJECT_FORMAT"); nasmOptions.Parse(flags); diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index d2ded37..f98e7e9 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -618,6 +618,11 @@ if(BUILD_TESTING) set(Module.CheckIPOSupported-CXX_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_CXX=${CMake_TEST_IPO_WORKS_CXX}) ADD_TEST_MACRO(Module.CheckIPOSupported-CXX CheckIPOSupported-CXX) + if(CMake_TEST_CUDA) + ADD_TEST_MACRO(Module.CheckIPOSupported-CUDA CheckIPOSupported-CUDA) + set_property(TEST Module.CheckIPOSupported-CUDA APPEND PROPERTY LABELS "CUDA") + endif() + if(CMAKE_Fortran_COMPILER) set(Module.CheckIPOSupported-Fortran_BUILD_OPTIONS -DCMake_TEST_IPO_WORKS_Fortran=${CMake_TEST_IPO_WORKS_Fortran}) ADD_TEST_MACRO(Module.CheckIPOSupported-Fortran CheckIPOSupported-Fortran) diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index aa4755d..091872d 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -7,7 +7,6 @@ endmacro () add_cuda_test_macro(CudaOnly.Architecture Architecture) add_cuda_test_macro(CudaOnly.ArchSpecial CudaOnlyArchSpecial) add_cuda_test_macro(CudaOnly.CompileFlags CudaOnlyCompileFlags) - add_cuda_test_macro(CudaOnly.EnableStandard CudaOnlyEnableStandard) add_cuda_test_macro(CudaOnly.ExportPTX CudaOnlyExportPTX) add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit) @@ -28,6 +27,19 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) endif() +# The CUDA only ships the shared version of the toolkit libraries +# on windows +if(NOT WIN32) + add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit) +endif() + +add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO) + +if(MSVC) + # Tests for features that only work with MSVC + add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB) +endif() + add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND ${CMAKE_CTEST_COMMAND} -C $ --build-and-test @@ -41,16 +53,6 @@ add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND set_property(TEST "CudaOnly.DontResolveDeviceSymbols" APPEND PROPERTY LABELS "CUDA") -# The CUDA only ships the shared version of the toolkit libraries -# on windows -if(NOT WIN32) - add_cuda_test_macro(CudaOnly.StaticRuntimePlusToolkit CudaOnlyStaticRuntimePlusToolkit) -endif() - -if(MSVC) - add_cuda_test_macro(CudaOnly.PDB CudaOnlyPDB) -endif() - add_test(NAME CudaOnly.RuntimeControls COMMAND ${CMAKE_CTEST_COMMAND} -C $ --build-and-test diff --git a/Tests/CudaOnly/DeviceLTO/CMakeLists.txt b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt new file mode 100644 index 0000000..653b35d --- /dev/null +++ b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt @@ -0,0 +1,37 @@ +cmake_minimum_required(VERSION 3.18) +project(DeviceLTO CUDA) + +# Goal: +# Verify that we correctly compile with device LTO +# Verify that device LTO requirements are propagated to +# the final device link line + +add_library(CUDA_dlto STATIC file1.cu file2.cu file3.cu) +add_executable(CudaOnlyDeviceLTO main.cu) + +set_target_properties(CUDA_dlto + PROPERTIES + CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}" + CUDA_SEPARABLE_COMPILATION ON + POSITION_INDEPENDENT_CODE ON) + +set_target_properties(CudaOnlyDeviceLTO + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}" + ) + +target_link_libraries(CudaOnlyDeviceLTO PRIVATE CUDA_dlto) + +include(CheckIPOSupported) +check_ipo_supported(LANGUAGES CUDA RESULT ipo_supported) +if(ipo_supported) + set_target_properties(CUDA_dlto + PROPERTIES + INTERPROCEDURAL_OPTIMIZATION ON) + + # When non-LTO variants (i.e. virtual) are built together with LTO ones the + # linker warns about missing device LTO for the virtual architectures. + # Ignore these warnings. + target_link_options(CudaOnlyDeviceLTO PRIVATE "$") +endif() diff --git a/Tests/CudaOnly/DeviceLTO/file1.cu b/Tests/CudaOnly/DeviceLTO/file1.cu new file mode 100644 index 0000000..703927c --- /dev/null +++ b/Tests/CudaOnly/DeviceLTO/file1.cu @@ -0,0 +1,17 @@ +#ifdef _WIN32 +# define EXPORT __declspec(dllexport) +#else +# define EXPORT +#endif + +extern __device__ int file2_func(int); +void __global__ kernel(int x) +{ + file2_func(x); +} + +EXPORT int launch_kernel(int x) +{ + kernel<<<1, 1>>>(x); + return x; +} diff --git a/Tests/CudaOnly/DeviceLTO/file2.cu b/Tests/CudaOnly/DeviceLTO/file2.cu new file mode 100644 index 0000000..73d6468 --- /dev/null +++ b/Tests/CudaOnly/DeviceLTO/file2.cu @@ -0,0 +1,5 @@ +extern __device__ int file3_func(int); +int __device__ file2_func(int x) +{ + return x + file3_func(x); +} diff --git a/Tests/CudaOnly/DeviceLTO/file3.cu b/Tests/CudaOnly/DeviceLTO/file3.cu new file mode 100644 index 0000000..235ac06 --- /dev/null +++ b/Tests/CudaOnly/DeviceLTO/file3.cu @@ -0,0 +1,4 @@ +int __device__ file3_func(int x) +{ + return x * x * x; +} diff --git a/Tests/CudaOnly/DeviceLTO/main.cu b/Tests/CudaOnly/DeviceLTO/main.cu new file mode 100644 index 0000000..8ef4873 --- /dev/null +++ b/Tests/CudaOnly/DeviceLTO/main.cu @@ -0,0 +1,62 @@ +#include + +#include "cuda.h" + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +#else +# define IMPORT +#endif + +IMPORT int launch_kernel(int x); + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + std::cout << "prop.major: " << prop.major << std::endl; + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + + std::cout << "Could not find a CUDA enabled card" << std::endl; + + return 1; +} + +int main() +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; + launch_kernel(1); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "launch_kernel: kernel launch should have passed.\n " + "Error message: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + return 0; +} diff --git a/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt new file mode 100644 index 0000000..9dd670e --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.8) +project(CheckIPOSupported-CUDA LANGUAGES CUDA) + +cmake_policy(SET CMP0069 NEW) + +include(CheckIPOSupported) +check_ipo_supported(RESULT ipo_supported OUTPUT ipo_output) +if(ipo_supported) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON) +endif() + +if(NOT ipo_supported AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" + AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2) + message(FATAL_ERROR "CheckIPOSupported failed to correctly identify NVIDIA CUDA IPO support") +endif() + +set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + +add_library(foo STATIC foo.cu) +set_target_properties(foo PROPERTIES + WINDOWS_EXPORT_ALL_SYMBOLS ON + POSITION_INDEPENDENT_CODE ON) + +add_library(bar SHARED bar.cu) +set_target_properties(bar PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS ON) +target_link_libraries(bar PRIVATE foo) + +add_executable(CheckIPOSupported-CUDA main.cu) +target_link_libraries(CheckIPOSupported-CUDA PUBLIC bar) + +enable_testing() +add_test(NAME CheckIPOSupported-CUDA COMMAND CheckIPOSupported-CUDA) diff --git a/Tests/Module/CheckIPOSupported-CUDA/bar.cu b/Tests/Module/CheckIPOSupported-CUDA/bar.cu new file mode 100644 index 0000000..79b276d --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/bar.cu @@ -0,0 +1,12 @@ +__device__ int foo_func(int); + +void __global__ bar_kernel(int x) +{ + foo_func(x); +} + +int launch_kernel(int x) +{ + bar_kernel<<<1, 1>>>(x); + return x; +} diff --git a/Tests/Module/CheckIPOSupported-CUDA/foo.cu b/Tests/Module/CheckIPOSupported-CUDA/foo.cu new file mode 100644 index 0000000..416607b --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/foo.cu @@ -0,0 +1,4 @@ +extern __device__ int foo_func(int a) +{ + return a * 42 + 9; +} diff --git a/Tests/Module/CheckIPOSupported-CUDA/main.cu b/Tests/Module/CheckIPOSupported-CUDA/main.cu new file mode 100644 index 0000000..8ef4873 --- /dev/null +++ b/Tests/Module/CheckIPOSupported-CUDA/main.cu @@ -0,0 +1,62 @@ +#include + +#include "cuda.h" + +#ifdef _WIN32 +# define IMPORT __declspec(dllimport) +#else +# define IMPORT +#endif + +IMPORT int launch_kernel(int x); + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + std::cout << "prop.major: " << prop.major << std::endl; + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + + std::cout << "Could not find a CUDA enabled card" << std::endl; + + return 1; +} + +int main() +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; + launch_kernel(1); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "launch_kernel: kernel launch should have passed.\n " + "Error message: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + return 0; +} diff --git a/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt b/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt index dc2c3ad..9a1ba04 100644 --- a/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt +++ b/Tests/RunCMake/CheckIPOSupported/default-lang-none-stderr.txt @@ -1,6 +1,6 @@ ^CMake Error at .*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(message\): - IPO is not supported \(no C/CXX/Fortran languages found in ENABLED_LANGUAGES - global property\)\. + IPO is not supported \(no C/CXX/CUDA/Fortran languages found in + ENABLED_LANGUAGES global property\)\. Call Stack \(most recent call first\): .*/Modules/CheckIPOSupported\.cmake:[0-9]+ \(_ipo_not_supported\) default-lang-none\.cmake:[0-9]+ \(check_ipo_supported\) -- cgit v0.12