summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorroot <raul@tambre.ee>2021-07-27 19:58:03 (GMT)
committerroot <raul@tambre.ee>2021-07-29 10:19:37 (GMT)
commit0b1cea66cd1f80458f0da579d0182d908874939d (patch)
tree182874aefd315752ab7a5e3c583a4a13d21e5ae4
parentf7cf69e34a1607e8ea2b6d10fef6a6058377c24e (diff)
downloadCMake-0b1cea66cd1f80458f0da579d0182d908874939d.zip
CMake-0b1cea66cd1f80458f0da579d0182d908874939d.tar.gz
CMake-0b1cea66cd1f80458f0da579d0182d908874939d.tar.bz2
CUDA/Clang: Fix separable compilation in non-root directories with Makefiles
Seems the relative paths were wrong basically all around such that only compiling files in the top-level directory would work. I've modified CudaOnly.SeparateCompilation to cover this. Fixes #22482.
-rw-r--r--Help/release/3.21.rst6
-rw-r--r--Source/cmMakefileTargetGenerator.cxx24
-rw-r--r--Tests/CudaOnly/CMakeLists.txt2
-rw-r--r--Tests/CudaOnly/SeparateCompilation/CMakeLists.txt19
-rw-r--r--Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt18
-rw-r--r--Tests/CudaOnly/SeparateCompilation/main/main.cu (renamed from Tests/CudaOnly/SeparateCompilation/main.cu)4
6 files changed, 42 insertions, 31 deletions
diff --git a/Help/release/3.21.rst b/Help/release/3.21.rst
index 3e70552..fc5d6ac 100644
--- a/Help/release/3.21.rst
+++ b/Help/release/3.21.rst
@@ -304,3 +304,9 @@ Changes made since CMake 3.21.0 include the following.
* The :generator:`Visual Studio 17 2022` generator is now based on
"Visual Studio 2022 Preview 2". Previously it was based on "Preview 1.1".
+
+3.21.2
+------
+
+* ``CUDA`` targets with :prop_tgt:`CUDA_SEPARABLE_COMPILATION` enabled are now
+ correctly generated in non-root directories.
diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx
index 6d8376c..6324b2e 100644
--- a/Source/cmMakefileTargetGenerator.cxx
+++ b/Source/cmMakefileTargetGenerator.cxx
@@ -1484,14 +1484,18 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
}
std::vector<std::string> architectures = cmExpandedList(architecturesStr);
+ std::string const& relPath =
+ this->LocalGenerator->GetHomeRelativeOutputPath();
// Ensure there are no duplicates.
const std::vector<std::string> linkDeps = [&]() -> std::vector<std::string> {
std::vector<std::string> deps;
this->AppendTargetDepends(deps, true);
this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA");
- std::copy(this->Objects.begin(), this->Objects.end(),
- std::back_inserter(deps));
+
+ for (std::string const& obj : this->Objects) {
+ deps.emplace_back(cmStrCat(relPath, obj));
+ }
std::unordered_set<std::string> depsSet(deps.begin(), deps.end());
deps.clear();
@@ -1510,7 +1514,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
std::string profiles;
std::vector<std::string> fatbinaryDepends;
- std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h");
+ std::string const registerFile =
+ cmStrCat(objectDir, "cmake_cuda_register.h");
// Link device code for each architecture.
for (const std::string& architectureKind : architectures) {
@@ -1518,7 +1523,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
const std::string architecture =
architectureKind.substr(0, architectureKind.find('-'));
const std::string cubin =
- cmStrCat(relObjectDir, "sm_", architecture, ".cubin");
+ cmStrCat(objectDir, "sm_", architecture, ".cubin");
profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin);
fatbinaryDepends.emplace_back(cubin);
@@ -1530,8 +1535,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
// all architectures the register file will be the same too. Thus
// generate it only on the first invocation to reduce overhead.
if (fatbinaryDepends.size() == 1) {
- std::string registerFileRel =
- this->LocalGenerator->MaybeRelativeToCurBinDir(registerFile);
+ std::string const registerFileRel =
+ cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h");
registerFileCmd =
cmStrCat(" --register-link-binaries=", registerFileRel);
cleanFiles.push_back(registerFileRel);
@@ -1555,7 +1560,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
const std::string fatbinaryOutput =
cmStrCat(objectDir, "cmake_cuda_fatbin.h");
const std::string fatbinaryOutputRel =
- this->LocalGenerator->MaybeRelativeToCurBinDir(fatbinaryOutput);
+ cmStrCat(relPath, relObjectDir, "cmake_cuda_fatbin.h");
this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr,
fatbinaryOutputRel, fatbinaryDepends,
@@ -1583,9 +1588,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule(
compileCmd, vars);
commands.emplace_back(compileCmd);
- this->LocalGenerator->WriteMakeRule(
- *this->BuildFileStream, nullptr, output,
- { cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false);
+ this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, output,
+ { fatbinaryOutputRel }, commands, false);
// Clean all the possible executable names and symlinks.
this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end());
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
index fdb7a6e..a3fb409 100644
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -15,7 +15,7 @@ add_cuda_test_macro(CudaOnly.ToolkitBeforeLang CudaOnlyToolkitBeforeLang)
add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
-add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
+add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation)
if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
# Clang doesn't have flags for selecting the runtime.
diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
index 864ecbf..17069e3 100644
--- a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
+++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
@@ -34,26 +34,9 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
target_compile_features(CUDASeparateLibB PRIVATE cuda_std_11)
target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
-add_executable(CudaOnlySeparateCompilation main.cu)
-target_link_libraries(CudaOnlySeparateCompilation
- PRIVATE CUDASeparateLibB)
-set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD 11)
-set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD_REQUIRED TRUE)
-
set_target_properties(CUDASeparateLibA
CUDASeparateLibB
PROPERTIES CUDA_SEPARABLE_COMPILATION ON
POSITION_INDEPENDENT_CODE ON)
-if (CMAKE_GENERATOR MATCHES "^Visual Studio")
- #Visual Studio CUDA integration will not perform device linking
- #on a target that itself does not have GenerateRelocatableDeviceCode
- #enabled.
- set_target_properties(CudaOnlySeparateCompilation
- PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-endif()
-
-if(APPLE)
- # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
- set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
-endif()
+add_subdirectory(main)
diff --git a/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt
new file mode 100644
index 0000000..c181078
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt
@@ -0,0 +1,18 @@
+add_executable(CudaOnlySeparateCompilation main.cu)
+target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
+set_target_properties(CudaOnlySeparateCompilation PROPERTIES
+ CUDA_STANDARD 11
+ CUDA_STANDARD_REQUIRED TRUE
+)
+
+if(CMAKE_GENERATOR MATCHES "^Visual Studio")
+ # Visual Studio CUDA integration will not perform device linking
+ # on a target that itself does not have GenerateRelocatableDeviceCode
+ # enabled.
+ set_property(TARGET CudaOnlySeparateCompilation PROPERTY CUDA_SEPARABLE_COMPILATION ON)
+endif()
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main/main.cu
index 40dbe5d..2b6e8f4 100644
--- a/Tests/CudaOnly/SeparateCompilation/main.cu
+++ b/Tests/CudaOnly/SeparateCompilation/main/main.cu
@@ -1,8 +1,8 @@
#include <iostream>
-#include "file1.h"
-#include "file2.h"
+#include "../file1.h"
+#include "../file2.h"
int file4_launch_kernel(int x);
int file5_launch_kernel(int x);