From 2d7bb13da7ec13ce73facaff07847d75d8a20e91 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Tue, 27 Aug 2019 13:52:55 -0400 Subject: CUDA: static lib device linking computes required static libs Previously the CMake didn't compute the required set of libraries needed to properly device link a static library when CUDA_RESOLVE_DEVICE_SYMBOLS was enabled. --- Source/cmLinkLineDeviceComputer.cxx | 4 ++ Source/cmLocalGenerator.cxx | 7 ++ Source/cmMakefileLibraryTargetGenerator.cxx | 23 +++---- Source/cmVisualStudio10TargetGenerator.cxx | 76 ++++++++++++++++++++++ Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt | 23 +++++-- Tests/CudaOnly/ResolveDeviceSymbols/file1.h | 3 + Tests/CudaOnly/ResolveDeviceSymbols/file2.cu | 16 ----- Tests/CudaOnly/ResolveDeviceSymbols/file2.h | 2 + .../CudaOnly/ResolveDeviceSymbols/file2_launch.cu | 18 +++++ Tests/CudaOnly/ResolveDeviceSymbols/main.cu | 24 +------ 10 files changed, 139 insertions(+), 57 deletions(-) create mode 100644 Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu diff --git a/Source/cmLinkLineDeviceComputer.cxx b/Source/cmLinkLineDeviceComputer.cxx index 9c0e20f..1a602ca 100644 --- a/Source/cmLinkLineDeviceComputer.cxx +++ b/Source/cmLinkLineDeviceComputer.cxx @@ -82,6 +82,9 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries( ItemVector const& items = cli.GetItems(); std::string config = cli.GetConfig(); bool skipItemAfterFramework = false; + // Note: + // Any modification of this algorithm should be reflected also in + // cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions for (auto const& item : items) { if (skipItemAfterFramework) { skipItemAfterFramework = false; @@ -91,6 +94,7 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries( if (item.Target) { bool skip = false; switch (item.Target->GetType()) { + case cmStateEnums::SHARED_LIBRARY: case cmStateEnums::MODULE_LIBRARY: case cmStateEnums::INTERFACE_LIBRARY: skip = true; diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx index f0145c5..ee0d4da 100644 --- a/Source/cmLocalGenerator.cxx +++ b/Source/cmLocalGenerator.cxx @@ -14,6 +14,7 @@ #include "cmInstallScriptGenerator.h" #include "cmInstallTargetGenerator.h" #include "cmLinkLineComputer.h" +#include "cmLinkLineDeviceComputer.h" #include "cmMakefile.h" #include "cmRulePlaceholderExpander.h" #include "cmSourceFile.h" @@ -1152,6 +1153,12 @@ void cmLocalGenerator::GetTargetFlags( switch (target->GetType()) { case cmStateEnums::STATIC_LIBRARY: this->GetStaticLibraryFlags(linkFlags, buildType, linkLanguage, target); + if (pcli && dynamic_cast(linkLineComputer)) { + // Compute the required cuda device link libraries when + // resolving cuda device symbols + this->OutputLinkLibraries(pcli, linkLineComputer, linkLibs, + frameworkPath, linkPath); + } break; case cmStateEnums::MODULE_LIBRARY: libraryLinkVariable = "CMAKE_MODULE_LINKER_FLAGS"; diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx index 4244402..d51bba4 100644 --- a/Source/cmMakefileLibraryTargetGenerator.cxx +++ b/Source/cmMakefileLibraryTargetGenerator.cxx @@ -300,19 +300,16 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules( // Collect up flags to link in needed libraries. std::string linkLibs; - if (this->GeneratorTarget->GetType() != cmStateEnums::STATIC_LIBRARY) { - - std::unique_ptr linkLineComputer( - new cmLinkLineDeviceComputer( - this->LocalGenerator, - this->LocalGenerator->GetStateSnapshot().GetDirectory())); - linkLineComputer->SetForResponse(useResponseFileForLibs); - linkLineComputer->SetUseWatcomQuote(useWatcomQuote); - linkLineComputer->SetRelink(relink); - - this->CreateLinkLibs(linkLineComputer.get(), linkLibs, - useResponseFileForLibs, depends); - } + std::unique_ptr linkLineComputer( + new cmLinkLineDeviceComputer( + this->LocalGenerator, + this->LocalGenerator->GetStateSnapshot().GetDirectory())); + linkLineComputer->SetForResponse(useResponseFileForLibs); + linkLineComputer->SetUseWatcomQuote(useWatcomQuote); + linkLineComputer->SetRelink(relink); + + this->CreateLinkLibs(linkLineComputer.get(), linkLibs, + useResponseFileForLibs, depends); // Construct object file lists that may be needed to expand the // rule. diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx index 06e1798..209ebb1 100644 --- a/Source/cmVisualStudio10TargetGenerator.cxx +++ b/Source/cmVisualStudio10TargetGenerator.cxx @@ -3101,6 +3101,82 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions( "-Wno-deprecated-gpu-targets"); } + // For static libraries that have device linking enabled compute + // the libraries + if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY && + doDeviceLinking) { + cmComputeLinkInformation* pcli = + this->GeneratorTarget->GetLinkInformation(configName); + if (!pcli) { + cmSystemTools::Error( + "CMake can not compute cmComputeLinkInformation for target: " + + this->Name); + return false; + } + + // Would like to use: + // cmLinkLineDeviceComputer computer(this->LocalGenerator, + // this->LocalGenerator->GetStateSnapshot().GetDirectory()); + // std::string computed_libs = computer.ComputeLinkLibraries(cli, + // std::string{}); but it outputs in " " format instead of + // ";" + // Note: + // Any modification of this algorithm should be reflected also in + // cmLinkLineDeviceComputer + cmComputeLinkInformation& cli = *pcli; + std::vector libVec; + const std::string currentBinDir = + this->LocalGenerator->GetCurrentBinaryDirectory(); + const auto& libs = cli.GetItems(); + for (cmComputeLinkInformation::Item const& l : libs) { + + if (l.Target) { + auto managedType = l.Target->GetManagedType(configName); + // Do not allow C# targets to be added to the LIB listing. LIB files + // are used for linking C++ dependencies. C# libraries do not have lib + // files. Instead, they compile down to C# reference libraries (DLL + // files). The + // `` elements added to the vcxproj are enough for + // the IDE to deduce the DLL file required by other C# projects that + // need its reference library. + if (managedType == cmGeneratorTarget::ManagedType::Managed) { + continue; + } + const auto type = l.Target->GetType(); + + bool skip = false; + switch (type) { + case cmStateEnums::SHARED_LIBRARY: + case cmStateEnums::MODULE_LIBRARY: + case cmStateEnums::INTERFACE_LIBRARY: + skip = true; + break; + case cmStateEnums::STATIC_LIBRARY: + skip = l.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS"); + break; + default: + break; + } + if (skip) { + continue; + } + } + + if (l.IsPath) { + std::string path = this->LocalGenerator->MaybeConvertToRelativePath( + currentBinDir, l.Value); + ConvertToWindowsSlash(path); + if (!cmVS10IsTargetsFile(l.Value)) { + libVec.push_back(path); + } + } else { + libVec.push_back(l.Value); + } + } + + cudaLinkOptions.AddFlag("AdditionalDependencies", libVec); + } + this->CudaLinkOptions[configName] = std::move(pOptions); return true; } diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt index 796e133..64845c5 100644 --- a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt +++ b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt @@ -16,21 +16,29 @@ else() endif() #Goal for this example: -# Build a static library that defines multiple methods and kernels that -# use each other. -# Resolve the device symbols into that static library -# Verify that we can't use those device symbols from anything that links +# 1. Build two static libraries that defines multiple methods and kernels +# 2. Resolve the device symbols into the second static library, therefore +# confirming that the first static library is on the device link line +# 3. Verify that we can't use those device symbols from anything that links # to the static library -string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"") +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[sm_30] -gencode arch=compute_50,code=\\\"compute_50\\\"") set(CMAKE_CXX_STANDARD 11) set(CMAKE_CUDA_STANDARD 11) -add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu) +add_library(CUDAResolveDeviceDepsA STATIC file1.cu) +add_library(CUDAResolveDeviceDepsB STATIC file2.cu) +set_target_properties(CUDAResolveDeviceDepsA CUDAResolveDeviceDepsB + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + POSITION_INDEPENDENT_CODE ON) + +add_library(CUDAResolveDeviceLib STATIC file2_launch.cu) set_target_properties(CUDAResolveDeviceLib PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON POSITION_INDEPENDENT_CODE ON) +target_link_libraries(CUDAResolveDeviceLib PRIVATE CUDAResolveDeviceDepsA CUDAResolveDeviceDepsB) if(dump_command) add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD @@ -45,7 +53,8 @@ endif() add_executable(CudaOnlyResolveDeviceSymbols main.cu) set_target_properties(CudaOnlyResolveDeviceSymbols PROPERTIES - CUDA_SEPARABLE_COMPILATION ON) + CUDA_SEPARABLE_COMPILATION OFF + CUDA_RESOLVE_DEVICE_SYMBOLS OFF) target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib) diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h index ff1945c..b33bcae 100644 --- a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h @@ -1,7 +1,10 @@ #pragma once + struct result_type { int input; int sum; }; + +result_type __device__ file1_func(int x); diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu index 278fd6c..0e5e7aa 100644 --- a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu @@ -1,25 +1,9 @@ #include "file2.h" -result_type __device__ file1_func(int x); - result_type_dynamic __device__ file2_func(int x) { const result_type r = file1_func(x); const result_type_dynamic rd{ r.input, r.sum, true }; return rd; } - -static __global__ void file2_kernel(result_type_dynamic& r, int x) -{ - // call static_func which is a method that is defined in the - // static library that is always out of date - r = file2_func(x); -} - -int file2_launch_kernel(int x) -{ - result_type_dynamic r; - file2_kernel<<<1, 1>>>(r, x); - return r.sum; -} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h index d2dbaa4..c6e2875 100644 --- a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h @@ -8,3 +8,5 @@ struct result_type_dynamic int sum; bool from_static; }; + +result_type_dynamic __device__ file2_func(int x); diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu new file mode 100644 index 0000000..4e8da13 --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu @@ -0,0 +1,18 @@ + +#include "file2.h" + +static __global__ void file2_kernel(result_type_dynamic& r, int x) +{ + // call static_func which is a method that is defined in the + // static library that is always out of date + r = file2_func(x); +} + +static __global__ void file2_kernel(result_type_dynamic& r, int x); + +int file2_launch_kernel(int x) +{ + result_type_dynamic r; + file2_kernel<<<1, 1>>>(r, x); + return r.sum; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu index d464f96..ea842cc 100644 --- a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu +++ b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu @@ -1,26 +1,10 @@ #include -#include "file1.h" #include "file2.h" int file2_launch_kernel(int x); -result_type_dynamic __device__ file2_func(int x); -static __global__ void main_kernel(result_type_dynamic& r, int x) -{ - // call function that was not device linked to us, this will cause - // a runtime failure of "invalid device function" - r = file2_func(x); -} - -int main_launch_kernel(int x) -{ - result_type_dynamic r; - main_kernel<<<1, 1>>>(r, x); - return r.sum; -} - int choose_cuda_device() { int nDevices = 0; @@ -62,12 +46,10 @@ int main(int argc, char** argv) return 0; } - main_launch_kernel(1); + file2_launch_kernel(1); cudaError_t err = cudaGetLastError(); - if (err == cudaSuccess) { - // This kernel launch should fail as the file2_func was device linked - // into the static library and is not usable by the executable - std::cerr << "main_launch_kernel: kernel launch should have failed" + if (err != cudaSuccess) { + std::cerr << "file2_launch_kernel: kernel launch should have passed" << std::endl; return 1; } -- cgit v0.12