summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrad King <brad.king@kitware.com>2021-07-29 12:45:27 (GMT)
committerKitware Robot <kwrobot@kitware.com>2021-07-29 12:45:40 (GMT)
commit0a959bb271207f6f74024b4d8a06192db209f8ab (patch)
tree2eace9449b9396ac1ed62deb520099fa688d52b9
parent5f1afff9f79784d59618abc16d8e56a3c5f3f1e3 (diff)
parent3975678fcc3928f2a7dcd79fe9b9e9ebf3abe2b2 (diff)
downloadCMake-0a959bb271207f6f74024b4d8a06192db209f8ab.zip
CMake-0a959bb271207f6f74024b4d8a06192db209f8ab.tar.gz
CMake-0a959bb271207f6f74024b4d8a06192db209f8ab.tar.bz2
Merge topic 'cuda_separable_clang_make' into release-3.21
3975678fcc CUDA/Clang: Simplify --register-link-binaries logic 0b1cea66cd CUDA/Clang: Fix separable compilation in non-root directories with Makefiles Acked-by: Kitware Robot <kwrobot@kitware.com> Merge-request: !6400
-rw-r--r--Help/release/3.21.rst6
-rw-r--r--Source/cmMakefileTargetGenerator.cxx42
-rw-r--r--Source/cmNinjaNormalTargetGenerator.cxx10
-rw-r--r--Tests/CudaOnly/CMakeLists.txt2
-rw-r--r--Tests/CudaOnly/SeparateCompilation/CMakeLists.txt19
-rw-r--r--Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt18
-rw-r--r--Tests/CudaOnly/SeparateCompilation/main/main.cu (renamed from Tests/CudaOnly/SeparateCompilation/main.cu)4
7 files changed, 56 insertions, 45 deletions
diff --git a/Help/release/3.21.rst b/Help/release/3.21.rst
index 3e70552..fc5d6ac 100644
--- a/Help/release/3.21.rst
+++ b/Help/release/3.21.rst
@@ -304,3 +304,9 @@ Changes made since CMake 3.21.0 include the following.
* The :generator:`Visual Studio 17 2022` generator is now based on
"Visual Studio 2022 Preview 2". Previously it was based on "Preview 1.1".
+
+3.21.2
+------
+
+* ``CUDA`` targets with :prop_tgt:`CUDA_SEPARABLE_COMPILATION` enabled are now
+ correctly generated in non-root directories.
diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx
index 6d8376c..98c61fe 100644
--- a/Source/cmMakefileTargetGenerator.cxx
+++ b/Source/cmMakefileTargetGenerator.cxx
@@ -1484,14 +1484,18 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
}
std::vector<std::string> architectures = cmExpandedList(architecturesStr);
+ std::string const& relPath =
+ this->LocalGenerator->GetHomeRelativeOutputPath();
// Ensure there are no duplicates.
const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
std::vector<std::string> deps;
this->AppendTargetDepends(deps, true);
this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
- std::copy(this->Objects.begin(), this->Objects.end(),
- std::back_inserter(deps));
+
+ for (std::string const& obj : this->Objects) {
+ deps.emplace_back(cmStrCat(relPath, obj));
+ }
std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
deps.clear();
@@ -1510,33 +1514,34 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
std::string profiles;
std::vector<std::string> fatbinaryDepends;
- std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h");
+ std::string const registerFile =
+ cmStrCat(objectDir, "cmake_cuda_register.h");
// Link device code for each architecture.
for (const std::string& architectureKind : architectures) {
- // Clang always generates real code, so strip the specifier.
- const std::string architecture =
- architectureKind.substr(0, architectureKind.find('-'));
- const std::string cubin =
- cmStrCat(relObjectDir, "sm_", architecture, ".cubin");
-
- profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
- fatbinaryDepends.emplace_back(cubin);
-
std::string registerFileCmd;
// The generated register file contains macros that when expanded
// register the device routines. Because the routines are the same for
// all architectures the register file will be the same too. Thus
// generate it only on the first invocation to reduce overhead.
- if (fatbinaryDepends.size() == 1) {
- std::string registerFileRel =
- this->LocalGenerator->MaybeRelativeToCurBinDir(registerFile);
+ if (fatbinaryDepends.empty()) {
+ std::string const registerFileRel =
+ cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h");
registerFileCmd =
cmStrCat(" --register-link-binaries=", registerFileRel);
cleanFiles.push_back(registerFileRel);
}
+ // Clang always generates real code, so strip the specifier.
+ const std::string architecture =
+ architectureKind.substr(0, architectureKind.find('-'));
+ const std::string cubin =
+ cmStrCat(objectDir, "sm_", architecture, ".cubin");
+
+ profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
+ fatbinaryDepends.emplace_back(cubin);
+
std::string command = cmStrCat(
this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"),
" -arch=sm_", architecture, registerFileCmd, " -o=$@ ",
@@ -1555,7 +1560,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
const std::string fatbinaryOutput =
cmStrCat(objectDir, "cmake_cuda_fatbin.h");
const std::string fatbinaryOutputRel =
- this->LocalGenerator->MaybeRelativeToCurBinDir(fatbinaryOutput);
+ cmStrCat(relPath, relObjectDir, "cmake_cuda_fatbin.h");
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
fatbinaryOutputRel, fatbinaryDepends,
@@ -1583,9 +1588,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
compileCmd, vars);
commands.emplace_back(compileCmd);
- this->LocalGenerator->WriteMakeRule(
- *this->BuildFileStream, nullptr, output,
- { cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false);
+ this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, output,
+ { fatbinaryOutputRel }, commands, false);
// Clean all the possible executable names and symlinks.
this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());
diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx
index 5a4c652..493bd4a 100644
--- a/Source/cmNinjaNormalTargetGenerator.cxx
+++ b/Source/cmNinjaNormalTargetGenerator.cxx
@@ -753,10 +753,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
const std::string cubin =
cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin");
- fatbinary.Variables["PROFILES"] +=
- cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
- fatbinary.ExplicitDeps.emplace_back(cubin);
-
cmNinjaBuild dlink(this->LanguageLinkerCudaDeviceRule(config));
dlink.ExplicitDeps = explicitDeps;
dlink.Outputs = { cubin };
@@ -766,11 +762,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements(
// the device routines. Because the routines are the same for all
// architectures the register file will be the same too. Thus generate it
// only on the first invocation to reduce overhead.
- if (fatbinary.ExplicitDeps.size() == 1) {
+ if (fatbinary.ExplicitDeps.empty()) {
dlink.Variables["REGISTER"] = cmStrCat(
"--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h");
}
+ fatbinary.Variables["PROFILES"] +=
+ cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
+ fatbinary.ExplicitDeps.emplace_back(cubin);
+
this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink);
}
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
index fdb7a6e..a3fb409 100644
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -15,7 +15,7 @@ add_cuda_test_macro(CudaOnly.ToolkitBeforeLang CudaOnlyToolkitBeforeLang)
add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
-add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
+add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation)
if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
# Clang doesn't have flags for selecting the runtime.
diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
index 864ecbf..17069e3 100644
--- a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
+++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
@@ -34,26 +34,9 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
target_compile_features(CUDASeparateLibB PRIVATE cuda_std_11)
target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
-add_executable(CudaOnlySeparateCompilation main.cu)
-target_link_libraries(CudaOnlySeparateCompilation
- PRIVATE CUDASeparateLibB)
-set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD 11)
-set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD_REQUIRED TRUE)
-
set_target_properties(CUDASeparateLibA
CUDASeparateLibB
PROPERTIES CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON)
-if (CMAKE_GENERATOR MATCHES "^Visual Studio")
- #Visual Studio CUDA integration will not perform device linking
- #on a target that itself does not have GenerateRelocatableDeviceCode
- #enabled.
- set_target_properties(CudaOnlySeparateCompilation
- PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-endif()
-
-if(APPLE)
- # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
- set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
-endif()
+add_subdirectory(main)
diff --git a/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt
new file mode 100644
index 0000000..c181078
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt
@@ -0,0 +1,18 @@
+add_executable(CudaOnlySeparateCompilation main.cu)
+target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
+set_target_properties(CudaOnlySeparateCompilation PROPERTIES
+ CUDA_STANDARD 11
+ CUDA_STANDARD_REQUIRED TRUE
+)
+
+if(CMAKE_GENERATOR MATCHES "^Visual Studio")
+ # Visual Studio CUDA integration will not perform device linking
+ # on a target that itself does not have GenerateRelocatableDeviceCode
+ # enabled.
+ set_property(TARGET CudaOnlySeparateCompilation PROPERTY CUDA_SEPARABLE_COMPILATION ON)
+endif()
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main/main.cu
index 40dbe5d..2b6e8f4 100644
--- a/Tests/CudaOnly/SeparateCompilation/main.cu
+++ b/Tests/CudaOnly/SeparateCompilation/main/main.cu
@@ -1,8 +1,8 @@
#include <iostream>
-#include "file1.h"
-#include "file2.h"
+#include "../file1.h"
+#include "../file2.h"
int file4_launch_kernel(int x);
int file5_launch_kernel(int x);