diff options
author | Brad King <brad.king@kitware.com> | 2021-07-29 12:45:27 (GMT) |
---|---|---|
committer | Kitware Robot <kwrobot@kitware.com> | 2021-07-29 12:45:40 (GMT) |
commit | 0a959bb271207f6f74024b4d8a06192db209f8ab (patch) | |
tree | 2eace9449b9396ac1ed62deb520099fa688d52b9 | |
parent | 5f1afff9f79784d59618abc16d8e56a3c5f3f1e3 (diff) | |
parent | 3975678fcc3928f2a7dcd79fe9b9e9ebf3abe2b2 (diff) | |
download | CMake-0a959bb271207f6f74024b4d8a06192db209f8ab.zip CMake-0a959bb271207f6f74024b4d8a06192db209f8ab.tar.gz CMake-0a959bb271207f6f74024b4d8a06192db209f8ab.tar.bz2 |
Merge topic 'cuda_separable_clang_make' into release-3.21
3975678fcc CUDA/Clang: Simplify --register-link-binaries logic
0b1cea66cd CUDA/Clang: Fix separable compilation in non-root directories with Makefiles
Acked-by: Kitware Robot <kwrobot@kitware.com>
Merge-request: !6400
-rw-r--r-- | Help/release/3.21.rst | 6 | ||||
-rw-r--r-- | Source/cmMakefileTargetGenerator.cxx | 42 | ||||
-rw-r--r-- | Source/cmNinjaNormalTargetGenerator.cxx | 10 | ||||
-rw-r--r-- | Tests/CudaOnly/CMakeLists.txt | 2 | ||||
-rw-r--r-- | Tests/CudaOnly/SeparateCompilation/CMakeLists.txt | 19 | ||||
-rw-r--r-- | Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt | 18 | ||||
-rw-r--r-- | Tests/CudaOnly/SeparateCompilation/main/main.cu (renamed from Tests/CudaOnly/SeparateCompilation/main.cu) | 4 |
7 files changed, 56 insertions, 45 deletions
diff --git a/Help/release/3.21.rst b/Help/release/3.21.rst index 3e70552..fc5d6ac 100644 --- a/Help/release/3.21.rst +++ b/Help/release/3.21.rst @@ -304,3 +304,9 @@ Changes made since CMake 3.21.0 include the following. * The :generator:`Visual Studio 17 2022` generator is now based on "Visual Studio 2022 Preview 2". Previously it was based on "Preview 1.1". + +3.21.2 +------ + +* ``CUDA`` targets with :prop_tgt:`CUDA_SEPARABLE_COMPILATION` enabled are now + correctly generated in non-root directories. diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx index 6d8376c..98c61fe 100644 --- a/Source/cmMakefileTargetGenerator.cxx +++ b/Source/cmMakefileTargetGenerator.cxx @@ -1484,14 +1484,18 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( } std::vector<std::string> architectures = cmExpandedList(architecturesStr); + std::string const& relPath = + this->LocalGenerator->GetHomeRelativeOutputPath(); // Ensure there are no duplicates. const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> { std::vector<std::string> deps; this->AppendTargetDepends(deps, true); this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA"); - std::copy(this->Objects.begin(), this->Objects.end(), - std::back_inserter(deps)); + + for (std::string const& obj : this->Objects) { + deps.emplace_back(cmStrCat(relPath, obj)); + } std::unordered_set<std::string> depsSet(deps.begin(), deps.end()); deps.clear(); @@ -1510,33 +1514,34 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( std::string profiles; std::vector<std::string> fatbinaryDepends; - std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h"); + std::string const registerFile = + cmStrCat(objectDir, "cmake_cuda_register.h"); // Link device code for each architecture. for (const std::string& architectureKind : architectures) { - // Clang always generates real code, so strip the specifier. - const std::string architecture = - architectureKind.substr(0, architectureKind.find('-')); - const std::string cubin = - cmStrCat(relObjectDir, "sm_", architecture, ".cubin"); - - profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); - fatbinaryDepends.emplace_back(cubin); - std::string registerFileCmd; // The generated register file contains macros that when expanded // register the device routines. Because the routines are the same for // all architectures the register file will be the same too. Thus // generate it only on the first invocation to reduce overhead. - if (fatbinaryDepends.size() == 1) { - std::string registerFileRel = - this->LocalGenerator->MaybeRelativeToCurBinDir(registerFile); + if (fatbinaryDepends.empty()) { + std::string const registerFileRel = + cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h"); registerFileCmd = cmStrCat(" --register-link-binaries=", registerFileRel); cleanFiles.push_back(registerFileRel); } + // Clang always generates real code, so strip the specifier. + const std::string architecture = + architectureKind.substr(0, architectureKind.find('-')); + const std::string cubin = + cmStrCat(objectDir, "sm_", architecture, ".cubin"); + + profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); + fatbinaryDepends.emplace_back(cubin); + std::string command = cmStrCat( this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"), " -arch=sm_", architecture, registerFileCmd, " -o=$@ ", @@ -1555,7 +1560,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( const std::string fatbinaryOutput = cmStrCat(objectDir, "cmake_cuda_fatbin.h"); const std::string fatbinaryOutputRel = - this->LocalGenerator->MaybeRelativeToCurBinDir(fatbinaryOutput); + cmStrCat(relPath, relObjectDir, "cmake_cuda_fatbin.h"); this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, fatbinaryOutputRel, fatbinaryDepends, @@ -1583,9 +1588,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( compileCmd, vars); commands.emplace_back(compileCmd); - this->LocalGenerator->WriteMakeRule( - *this->BuildFileStream, nullptr, output, - { cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false); + this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, output, + { fatbinaryOutputRel }, commands, false); // Clean all the possible executable names and symlinks. this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end()); diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx index 5a4c652..493bd4a 100644 --- a/Source/cmNinjaNormalTargetGenerator.cxx +++ b/Source/cmNinjaNormalTargetGenerator.cxx @@ -753,10 +753,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( const std::string cubin = cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin"); - fatbinary.Variables["PROFILES"] += - cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); - fatbinary.ExplicitDeps.emplace_back(cubin); - cmNinjaBuild dlink(this->LanguageLinkerCudaDeviceRule(config)); dlink.ExplicitDeps = explicitDeps; dlink.Outputs = { cubin }; @@ -766,11 +762,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( // the device routines. Because the routines are the same for all // architectures the register file will be the same too. Thus generate it // only on the first invocation to reduce overhead. - if (fatbinary.ExplicitDeps.size() == 1) { + if (fatbinary.ExplicitDeps.empty()) { dlink.Variables["REGISTER"] = cmStrCat( "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h"); } + fatbinary.Variables["PROFILES"] += + cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); + fatbinary.ExplicitDeps.emplace_back(cubin); + this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink); } diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index fdb7a6e..a3fb409 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -15,7 +15,7 @@ add_cuda_test_macro(CudaOnly.ToolkitBeforeLang CudaOnlyToolkitBeforeLang) add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs) add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine) add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) -add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) +add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation) if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") # Clang doesn't have flags for selecting the runtime. diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt index 864ecbf..17069e3 100644 --- a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt +++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt @@ -34,26 +34,9 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu) target_compile_features(CUDASeparateLibB PRIVATE cuda_std_11) target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA) -add_executable(CudaOnlySeparateCompilation main.cu) -target_link_libraries(CudaOnlySeparateCompilation - PRIVATE CUDASeparateLibB) -set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD 11) -set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD_REQUIRED TRUE) - set_target_properties(CUDASeparateLibA CUDASeparateLibB PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON) -if (CMAKE_GENERATOR MATCHES "^Visual Studio") - #Visual Studio CUDA integration will not perform device linking - #on a target that itself does not have GenerateRelocatableDeviceCode - #enabled. - set_target_properties(CudaOnlySeparateCompilation - PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -endif() - -if(APPLE) - # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. - set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) -endif() +add_subdirectory(main) diff --git a/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt new file mode 100644 index 0000000..c181078 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt @@ -0,0 +1,18 @@ +add_executable(CudaOnlySeparateCompilation main.cu) +target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB) +set_target_properties(CudaOnlySeparateCompilation PROPERTIES + CUDA_STANDARD 11 + CUDA_STANDARD_REQUIRED TRUE +) + +if(CMAKE_GENERATOR MATCHES "^Visual Studio") + # Visual Studio CUDA integration will not perform device linking + # on a target that itself does not have GenerateRelocatableDeviceCode + # enabled. + set_property(TARGET CudaOnlySeparateCompilation PROPERTY CUDA_SEPARABLE_COMPILATION ON) +endif() + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main/main.cu index 40dbe5d..2b6e8f4 100644 --- a/Tests/CudaOnly/SeparateCompilation/main.cu +++ b/Tests/CudaOnly/SeparateCompilation/main/main.cu @@ -1,8 +1,8 @@ #include <iostream> -#include "file1.h" -#include "file2.h" +#include "../file1.h" +#include "../file2.h" int file4_launch_kernel(int x); int file5_launch_kernel(int x); |