From 2d7bb13da7ec13ce73facaff07847d75d8a20e91 Mon Sep 17 00:00:00 2001
From: Robert Maynard <robert.maynard@kitware.com>
Date: Tue, 27 Aug 2019 13:52:55 -0400
Subject: CUDA: static lib device linking computes required static libs

Previously the CMake didn't compute the required set of libraries
needed to properly device link a static library when
CUDA_RESOLVE_DEVICE_SYMBOLS was enabled.
---
 Source/cmLinkLineDeviceComputer.cxx                |  4 ++
 Source/cmLocalGenerator.cxx                        |  7 ++
 Source/cmMakefileLibraryTargetGenerator.cxx        | 23 +++----
 Source/cmVisualStudio10TargetGenerator.cxx         | 76 ++++++++++++++++++++++
 Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt | 23 +++++--
 Tests/CudaOnly/ResolveDeviceSymbols/file1.h        |  3 +
 Tests/CudaOnly/ResolveDeviceSymbols/file2.cu       | 16 -----
 Tests/CudaOnly/ResolveDeviceSymbols/file2.h        |  2 +
 .../CudaOnly/ResolveDeviceSymbols/file2_launch.cu  | 18 +++++
 Tests/CudaOnly/ResolveDeviceSymbols/main.cu        | 24 +------
 10 files changed, 139 insertions(+), 57 deletions(-)
 create mode 100644 Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu

diff --git a/Source/cmLinkLineDeviceComputer.cxx b/Source/cmLinkLineDeviceComputer.cxx
index 9c0e20f..1a602ca 100644
--- a/Source/cmLinkLineDeviceComputer.cxx
+++ b/Source/cmLinkLineDeviceComputer.cxx
@@ -82,6 +82,9 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
   ItemVector const& items = cli.GetItems();
   std::string config = cli.GetConfig();
   bool skipItemAfterFramework = false;
+  // Note:
+  // Any modification of this algorithm should be reflected also in
+  // cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions
   for (auto const& item : items) {
     if (skipItemAfterFramework) {
       skipItemAfterFramework = false;
@@ -91,6 +94,7 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
     if (item.Target) {
       bool skip = false;
       switch (item.Target->GetType()) {
+        case cmStateEnums::SHARED_LIBRARY:
         case cmStateEnums::MODULE_LIBRARY:
         case cmStateEnums::INTERFACE_LIBRARY:
           skip = true;
diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx
index f0145c5..ee0d4da 100644
--- a/Source/cmLocalGenerator.cxx
+++ b/Source/cmLocalGenerator.cxx
@@ -14,6 +14,7 @@
 #include "cmInstallScriptGenerator.h"
 #include "cmInstallTargetGenerator.h"
 #include "cmLinkLineComputer.h"
+#include "cmLinkLineDeviceComputer.h"
 #include "cmMakefile.h"
 #include "cmRulePlaceholderExpander.h"
 #include "cmSourceFile.h"
@@ -1152,6 +1153,12 @@ void cmLocalGenerator::GetTargetFlags(
   switch (target->GetType()) {
     case cmStateEnums::STATIC_LIBRARY:
       this->GetStaticLibraryFlags(linkFlags, buildType, linkLanguage, target);
+      if (pcli && dynamic_cast<cmLinkLineDeviceComputer*>(linkLineComputer)) {
+        // Compute the required cuda device link libraries when
+        // resolving cuda device symbols
+        this->OutputLinkLibraries(pcli, linkLineComputer, linkLibs,
+                                  frameworkPath, linkPath);
+      }
       break;
     case cmStateEnums::MODULE_LIBRARY:
       libraryLinkVariable = "CMAKE_MODULE_LINKER_FLAGS";
diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx
index 4244402..d51bba4 100644
--- a/Source/cmMakefileLibraryTargetGenerator.cxx
+++ b/Source/cmMakefileLibraryTargetGenerator.cxx
@@ -300,19 +300,16 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
 
     // Collect up flags to link in needed libraries.
     std::string linkLibs;
-    if (this->GeneratorTarget->GetType() != cmStateEnums::STATIC_LIBRARY) {
-
-      std::unique_ptr<cmLinkLineComputer> linkLineComputer(
-        new cmLinkLineDeviceComputer(
-          this->LocalGenerator,
-          this->LocalGenerator->GetStateSnapshot().GetDirectory()));
-      linkLineComputer->SetForResponse(useResponseFileForLibs);
-      linkLineComputer->SetUseWatcomQuote(useWatcomQuote);
-      linkLineComputer->SetRelink(relink);
-
-      this->CreateLinkLibs(linkLineComputer.get(), linkLibs,
-                           useResponseFileForLibs, depends);
-    }
+    std::unique_ptr<cmLinkLineComputer> linkLineComputer(
+      new cmLinkLineDeviceComputer(
+        this->LocalGenerator,
+        this->LocalGenerator->GetStateSnapshot().GetDirectory()));
+    linkLineComputer->SetForResponse(useResponseFileForLibs);
+    linkLineComputer->SetUseWatcomQuote(useWatcomQuote);
+    linkLineComputer->SetRelink(relink);
+
+    this->CreateLinkLibs(linkLineComputer.get(), linkLibs,
+                         useResponseFileForLibs, depends);
 
     // Construct object file lists that may be needed to expand the
     // rule.
diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx
index 06e1798..209ebb1 100644
--- a/Source/cmVisualStudio10TargetGenerator.cxx
+++ b/Source/cmVisualStudio10TargetGenerator.cxx
@@ -3101,6 +3101,82 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
                                      "-Wno-deprecated-gpu-targets");
   }
 
+  // For static libraries that have device linking enabled compute
+  // the  libraries
+  if (this->GeneratorTarget->GetType() == cmStateEnums::STATIC_LIBRARY &&
+      doDeviceLinking) {
+    cmComputeLinkInformation* pcli =
+      this->GeneratorTarget->GetLinkInformation(configName);
+    if (!pcli) {
+      cmSystemTools::Error(
+        "CMake can not compute cmComputeLinkInformation for target: " +
+        this->Name);
+      return false;
+    }
+
+    // Would like to use:
+    // cmLinkLineDeviceComputer computer(this->LocalGenerator,
+    //                                   this->LocalGenerator->GetStateSnapshot().GetDirectory());
+    // std::string computed_libs = computer.ComputeLinkLibraries(cli,
+    // std::string{}); but it outputs in "<libA> <libB>" format instead of
+    // "<libA>;<libB>"
+    // Note:
+    // Any modification of this algorithm should be reflected also in
+    // cmLinkLineDeviceComputer
+    cmComputeLinkInformation& cli = *pcli;
+    std::vector<std::string> libVec;
+    const std::string currentBinDir =
+      this->LocalGenerator->GetCurrentBinaryDirectory();
+    const auto& libs = cli.GetItems();
+    for (cmComputeLinkInformation::Item const& l : libs) {
+
+      if (l.Target) {
+        auto managedType = l.Target->GetManagedType(configName);
+        // Do not allow C# targets to be added to the LIB listing. LIB files
+        // are used for linking C++ dependencies. C# libraries do not have lib
+        // files. Instead, they compile down to C# reference libraries (DLL
+        // files). The
+        // `<ProjectReference>` elements added to the vcxproj are enough for
+        // the IDE to deduce the DLL file required by other C# projects that
+        // need its reference library.
+        if (managedType == cmGeneratorTarget::ManagedType::Managed) {
+          continue;
+        }
+        const auto type = l.Target->GetType();
+
+        bool skip = false;
+        switch (type) {
+          case cmStateEnums::SHARED_LIBRARY:
+          case cmStateEnums::MODULE_LIBRARY:
+          case cmStateEnums::INTERFACE_LIBRARY:
+            skip = true;
+            break;
+          case cmStateEnums::STATIC_LIBRARY:
+            skip = l.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
+            break;
+          default:
+            break;
+        }
+        if (skip) {
+          continue;
+        }
+      }
+
+      if (l.IsPath) {
+        std::string path = this->LocalGenerator->MaybeConvertToRelativePath(
+          currentBinDir, l.Value);
+        ConvertToWindowsSlash(path);
+        if (!cmVS10IsTargetsFile(l.Value)) {
+          libVec.push_back(path);
+        }
+      } else {
+        libVec.push_back(l.Value);
+      }
+    }
+
+    cudaLinkOptions.AddFlag("AdditionalDependencies", libVec);
+  }
+
   this->CudaLinkOptions[configName] = std::move(pOptions);
   return true;
 }
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
index 796e133..64845c5 100644
--- a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
@@ -16,21 +16,29 @@ else()
 endif()
 
 #Goal for this example:
-# Build a static library that defines multiple methods and kernels that
-# use each other.
-# Resolve the device symbols into that static library
-# Verify that we can't use those device symbols from anything that links
+# 1. Build two static libraries that defines multiple methods and kernels
+# 2. Resolve the device symbols into the second static library, therefore
+# confirming that the first static library is on the device link line
+# 3. Verify that we can't use those device symbols from anything that links
 # to the static library
-string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[sm_30] -gencode arch=compute_50,code=\\\"compute_50\\\"")
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CUDA_STANDARD 11)
 
-add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu)
+add_library(CUDAResolveDeviceDepsA STATIC file1.cu)
+add_library(CUDAResolveDeviceDepsB STATIC file2.cu)
+set_target_properties(CUDAResolveDeviceDepsA CUDAResolveDeviceDepsB
+                      PROPERTIES
+                      CUDA_SEPARABLE_COMPILATION ON
+                      POSITION_INDEPENDENT_CODE ON)
+
+add_library(CUDAResolveDeviceLib STATIC file2_launch.cu)
 set_target_properties(CUDAResolveDeviceLib
                       PROPERTIES
                       CUDA_SEPARABLE_COMPILATION ON
                       CUDA_RESOLVE_DEVICE_SYMBOLS ON
                       POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(CUDAResolveDeviceLib PRIVATE CUDAResolveDeviceDepsA CUDAResolveDeviceDepsB)
 
 if(dump_command)
 add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD
@@ -45,7 +53,8 @@ endif()
 add_executable(CudaOnlyResolveDeviceSymbols main.cu)
 set_target_properties(CudaOnlyResolveDeviceSymbols
                       PROPERTIES
-                      CUDA_SEPARABLE_COMPILATION ON)
+                      CUDA_SEPARABLE_COMPILATION OFF
+                      CUDA_RESOLVE_DEVICE_SYMBOLS OFF)
 
 target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib)
 
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
index ff1945c..b33bcae 100644
--- a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
@@ -1,7 +1,10 @@
 
 #pragma once
+
 struct result_type
 {
   int input;
   int sum;
 };
+
+result_type __device__ file1_func(int x);
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
index 278fd6c..0e5e7aa 100644
--- a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
@@ -1,25 +1,9 @@
 
 #include "file2.h"
 
-result_type __device__ file1_func(int x);
-
 result_type_dynamic __device__ file2_func(int x)
 {
   const result_type r = file1_func(x);
   const result_type_dynamic rd{ r.input, r.sum, true };
   return rd;
 }
-
-static __global__ void file2_kernel(result_type_dynamic& r, int x)
-{
-  // call static_func which is a method that is defined in the
-  // static library that is always out of date
-  r = file2_func(x);
-}
-
-int file2_launch_kernel(int x)
-{
-  result_type_dynamic r;
-  file2_kernel<<<1, 1>>>(r, x);
-  return r.sum;
-}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
index d2dbaa4..c6e2875 100644
--- a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
@@ -8,3 +8,5 @@ struct result_type_dynamic
   int sum;
   bool from_static;
 };
+
+result_type_dynamic __device__ file2_func(int x);
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu
new file mode 100644
index 0000000..4e8da13
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2_launch.cu
@@ -0,0 +1,18 @@
+
+#include "file2.h"
+
+static __global__ void file2_kernel(result_type_dynamic& r, int x)
+{
+  // call static_func which is a method that is defined in the
+  // static library that is always out of date
+  r = file2_func(x);
+}
+
+static __global__ void file2_kernel(result_type_dynamic& r, int x);
+
+int file2_launch_kernel(int x)
+{
+  result_type_dynamic r;
+  file2_kernel<<<1, 1>>>(r, x);
+  return r.sum;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
index d464f96..ea842cc 100644
--- a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
@@ -1,26 +1,10 @@
 
 #include <iostream>
 
-#include "file1.h"
 #include "file2.h"
 
 int file2_launch_kernel(int x);
 
-result_type_dynamic __device__ file2_func(int x);
-static __global__ void main_kernel(result_type_dynamic& r, int x)
-{
-  // call function that was not device linked to us, this will cause
-  // a runtime failure of "invalid device function"
-  r = file2_func(x);
-}
-
-int main_launch_kernel(int x)
-{
-  result_type_dynamic r;
-  main_kernel<<<1, 1>>>(r, x);
-  return r.sum;
-}
-
 int choose_cuda_device()
 {
   int nDevices = 0;
@@ -62,12 +46,10 @@ int main(int argc, char** argv)
     return 0;
   }
 
-  main_launch_kernel(1);
+  file2_launch_kernel(1);
   cudaError_t err = cudaGetLastError();
-  if (err == cudaSuccess) {
-    // This kernel launch should fail as the file2_func was device linked
-    // into the static library and is not usable by the executable
-    std::cerr << "main_launch_kernel: kernel launch should have failed"
+  if (err != cudaSuccess) {
+    std::cerr << "file2_launch_kernel: kernel launch should have passed"
               << std::endl;
     return 1;
   }
-- 
cgit v0.12