From 0b1cea66cd1f80458f0da579d0182d908874939d Mon Sep 17 00:00:00 2001 From: root Date: Tue, 27 Jul 2021 22:58:03 +0300 Subject: CUDA/Clang: Fix separable compilation in non-root directories with Makefiles Seems the relative paths were wrong basically all around such that only compiling files in the top-level directory would work. I've modified CudaOnly.SeparateCompilation to cover this. Fixes #22482. --- Help/release/3.21.rst | 6 ++ Source/cmMakefileTargetGenerator.cxx | 24 ++++---- Tests/CudaOnly/CMakeLists.txt | 2 +- Tests/CudaOnly/SeparateCompilation/CMakeLists.txt | 19 +----- Tests/CudaOnly/SeparateCompilation/main.cu | 68 ---------------------- .../SeparateCompilation/main/CMakeLists.txt | 18 ++++++ Tests/CudaOnly/SeparateCompilation/main/main.cu | 68 ++++++++++++++++++++++ 7 files changed, 108 insertions(+), 97 deletions(-) delete mode 100644 Tests/CudaOnly/SeparateCompilation/main.cu create mode 100644 Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt create mode 100644 Tests/CudaOnly/SeparateCompilation/main/main.cu diff --git a/Help/release/3.21.rst b/Help/release/3.21.rst index 3e70552..fc5d6ac 100644 --- a/Help/release/3.21.rst +++ b/Help/release/3.21.rst @@ -304,3 +304,9 @@ Changes made since CMake 3.21.0 include the following. * The :generator:`Visual Studio 17 2022` generator is now based on "Visual Studio 2022 Preview 2". Previously it was based on "Preview 1.1". + +3.21.2 +------ + +* ``CUDA`` targets with :prop_tgt:`CUDA_SEPARABLE_COMPILATION` enabled are now + correctly generated in non-root directories. diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx index 6d8376c..6324b2e 100644 --- a/Source/cmMakefileTargetGenerator.cxx +++ b/Source/cmMakefileTargetGenerator.cxx @@ -1484,14 +1484,18 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( } std::vector architectures = cmExpandedList(architecturesStr); + std::string const& relPath = + this->LocalGenerator->GetHomeRelativeOutputPath(); // Ensure there are no duplicates. const std::vector linkDeps = [&]() -> std::vector { std::vector deps; this->AppendTargetDepends(deps, true); this->GeneratorTarget->GetLinkDepends(deps, this->GetConfigName(), "CUDA"); - std::copy(this->Objects.begin(), this->Objects.end(), - std::back_inserter(deps)); + + for (std::string const& obj : this->Objects) { + deps.emplace_back(cmStrCat(relPath, obj)); + } std::unordered_set depsSet(deps.begin(), deps.end()); deps.clear(); @@ -1510,7 +1514,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( std::string profiles; std::vector fatbinaryDepends; - std::string registerFile = cmStrCat(objectDir, "cmake_cuda_register.h"); + std::string const registerFile = + cmStrCat(objectDir, "cmake_cuda_register.h"); // Link device code for each architecture. for (const std::string& architectureKind : architectures) { @@ -1518,7 +1523,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( const std::string architecture = architectureKind.substr(0, architectureKind.find('-')); const std::string cubin = - cmStrCat(relObjectDir, "sm_", architecture, ".cubin"); + cmStrCat(objectDir, "sm_", architecture, ".cubin"); profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); fatbinaryDepends.emplace_back(cubin); @@ -1530,8 +1535,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( // all architectures the register file will be the same too. Thus // generate it only on the first invocation to reduce overhead. if (fatbinaryDepends.size() == 1) { - std::string registerFileRel = - this->LocalGenerator->MaybeRelativeToCurBinDir(registerFile); + std::string const registerFileRel = + cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h"); registerFileCmd = cmStrCat(" --register-link-binaries=", registerFileRel); cleanFiles.push_back(registerFileRel); @@ -1555,7 +1560,7 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( const std::string fatbinaryOutput = cmStrCat(objectDir, "cmake_cuda_fatbin.h"); const std::string fatbinaryOutputRel = - this->LocalGenerator->MaybeRelativeToCurBinDir(fatbinaryOutput); + cmStrCat(relPath, relObjectDir, "cmake_cuda_fatbin.h"); this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, fatbinaryOutputRel, fatbinaryDepends, @@ -1583,9 +1588,8 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( compileCmd, vars); commands.emplace_back(compileCmd); - this->LocalGenerator->WriteMakeRule( - *this->BuildFileStream, nullptr, output, - { cmStrCat(relObjectDir, "cmake_cuda_fatbin.h") }, commands, false); + this->LocalGenerator->WriteMakeRule(*this->BuildFileStream, nullptr, output, + { fatbinaryOutputRel }, commands, false); // Clean all the possible executable names and symlinks. this->CleanFiles.insert(cleanFiles.begin(), cleanFiles.end()); diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index fdb7a6e..a3fb409 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -15,7 +15,7 @@ add_cuda_test_macro(CudaOnly.ToolkitBeforeLang CudaOnlyToolkitBeforeLang) add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs) add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine) add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) -add_cuda_test_macro(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) +add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation) if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") # Clang doesn't have flags for selecting the runtime. diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt index 864ecbf..17069e3 100644 --- a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt +++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt @@ -34,26 +34,9 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu) target_compile_features(CUDASeparateLibB PRIVATE cuda_std_11) target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA) -add_executable(CudaOnlySeparateCompilation main.cu) -target_link_libraries(CudaOnlySeparateCompilation - PRIVATE CUDASeparateLibB) -set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD 11) -set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD_REQUIRED TRUE) - set_target_properties(CUDASeparateLibA CUDASeparateLibB PROPERTIES CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON) -if (CMAKE_GENERATOR MATCHES "^Visual Studio") - #Visual Studio CUDA integration will not perform device linking - #on a target that itself does not have GenerateRelocatableDeviceCode - #enabled. - set_target_properties(CudaOnlySeparateCompilation - PROPERTIES CUDA_SEPARABLE_COMPILATION ON) -endif() - -if(APPLE) - # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. - set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) -endif() +add_subdirectory(main) diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main.cu deleted file mode 100644 index 40dbe5d..0000000 --- a/Tests/CudaOnly/SeparateCompilation/main.cu +++ /dev/null @@ -1,68 +0,0 @@ - -#include - -#include "file1.h" -#include "file2.h" - -int file4_launch_kernel(int x); -int file5_launch_kernel(int x); - -int choose_cuda_device() -{ - int nDevices = 0; - cudaError_t err = cudaGetDeviceCount(&nDevices); - if (err != cudaSuccess) { - std::cerr << "Failed to retrieve the number of CUDA enabled devices" - << std::endl; - return 1; - } - for (int i = 0; i < nDevices; ++i) { - cudaDeviceProp prop; - cudaError_t err = cudaGetDeviceProperties(&prop, i); - if (err != cudaSuccess) { - std::cerr << "Could not retrieve properties from CUDA device " << i - << std::endl; - return 1; - } - if (prop.major >= 3) { - err = cudaSetDevice(i); - if (err != cudaSuccess) { - std::cout << "Could not select CUDA device " << i << std::endl; - } else { - return 0; - } - } - } - - std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" - << std::endl; - - return 1; -} - -int main(int argc, char** argv) -{ - int ret = choose_cuda_device(); - if (ret) { - return 0; - } - - cudaError_t err; - file4_launch_kernel(42); - err = cudaGetLastError(); - if (err != cudaSuccess) { - std::cerr << "file4_launch_kernel: kernel launch failed: " - << cudaGetErrorString(err) << std::endl; - return 1; - } - - file5_launch_kernel(42); - err = cudaGetLastError(); - if (err != cudaSuccess) { - std::cerr << "file5_launch_kernel: kernel launch failed: " - << cudaGetErrorString(err) << std::endl; - return 1; - } - - return 0; -} diff --git a/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt new file mode 100644 index 0000000..c181078 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt @@ -0,0 +1,18 @@ +add_executable(CudaOnlySeparateCompilation main.cu) +target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB) +set_target_properties(CudaOnlySeparateCompilation PROPERTIES + CUDA_STANDARD 11 + CUDA_STANDARD_REQUIRED TRUE +) + +if(CMAKE_GENERATOR MATCHES "^Visual Studio") + # Visual Studio CUDA integration will not perform device linking + # on a target that itself does not have GenerateRelocatableDeviceCode + # enabled. + set_property(TARGET CudaOnlySeparateCompilation PROPERTY CUDA_SEPARABLE_COMPILATION ON) +endif() + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlySeparateCompilation PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/SeparateCompilation/main/main.cu b/Tests/CudaOnly/SeparateCompilation/main/main.cu new file mode 100644 index 0000000..2b6e8f4 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilation/main/main.cu @@ -0,0 +1,68 @@ + +#include + +#include "../file1.h" +#include "../file2.h" + +int file4_launch_kernel(int x); +int file5_launch_kernel(int x); + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + if (prop.major >= 3) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + +int main(int argc, char** argv) +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; + file4_launch_kernel(42); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file4_launch_kernel: kernel launch failed: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + file5_launch_kernel(42); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file5_launch_kernel: kernel launch failed: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + return 0; +} -- cgit v0.12