From b07c71831c2ea42023dc27eb81e3099cefe35dd2 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 13 Jul 2018 14:43:49 -0400 Subject: CUDA: Add a test to verify device linking can handle circular deps --- Tests/CudaOnly/CMakeLists.txt | 1 + Tests/CudaOnly/CircularLinkLine/CMakeLists.txt | 35 ++++++++++++++++++++++++++ Tests/CudaOnly/CircularLinkLine/file1.cu | 6 +++++ Tests/CudaOnly/CircularLinkLine/file2.cu | 6 +++++ Tests/CudaOnly/CircularLinkLine/file3.cu | 8 ++++++ Tests/CudaOnly/CircularLinkLine/main.cu | 5 ++++ 6 files changed, 61 insertions(+) create mode 100644 Tests/CudaOnly/CircularLinkLine/CMakeLists.txt create mode 100644 Tests/CudaOnly/CircularLinkLine/file1.cu create mode 100644 Tests/CudaOnly/CircularLinkLine/file2.cu create mode 100644 Tests/CudaOnly/CircularLinkLine/file3.cu create mode 100644 Tests/CudaOnly/CircularLinkLine/main.cu diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index 59f3e84..5b7c0e6 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -1,4 +1,5 @@ +ADD_TEST_MACRO(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine) ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard) ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX) ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) diff --git a/Tests/CudaOnly/CircularLinkLine/CMakeLists.txt b/Tests/CudaOnly/CircularLinkLine/CMakeLists.txt new file mode 100644 index 0000000..8efbb0f --- /dev/null +++ b/Tests/CudaOnly/CircularLinkLine/CMakeLists.txt @@ -0,0 +1,35 @@ +cmake_minimum_required(VERSION 3.7) +project (CudaOnlyCircularLinkLine CUDA) + +#Goal for this example: +# Verify that we de-duplicate the device link line +# Verify that a de-duplicated link line still works with circular static libraries + +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30]") +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CUDA_STANDARD 11) + +add_library(CUDACircularDeviceLinking1 STATIC file1.cu) +add_library(CUDACircularDeviceLinking2 STATIC file2.cu) +add_library(CUDACircularDeviceLinking3 STATIC file3.cu) +add_executable(CudaOnlyCircularLinkLine main.cu) + +target_link_libraries(CUDACircularDeviceLinking1 PUBLIC CUDACircularDeviceLinking2) +target_link_libraries(CUDACircularDeviceLinking2 PUBLIC CUDACircularDeviceLinking3) +#FIXME: complete the loop once supported +#target_link_libraries(CUDACircularDeviceLinking3 PUBLIC CUDACircularDeviceLinking1) + +target_link_libraries(CudaOnlyCircularLinkLine PRIVATE CUDACircularDeviceLinking3) + + +set_target_properties(CUDACircularDeviceLinking1 + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON) + +set_target_properties(CUDACircularDeviceLinking2 + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON) + +set_target_properties(CUDACircularDeviceLinking3 + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON) diff --git a/Tests/CudaOnly/CircularLinkLine/file1.cu b/Tests/CudaOnly/CircularLinkLine/file1.cu new file mode 100644 index 0000000..88ac4e3 --- /dev/null +++ b/Tests/CudaOnly/CircularLinkLine/file1.cu @@ -0,0 +1,6 @@ + +extern __device__ int file2_func(int); +int __device__ file1_func(int x) +{ + return file2_func(x); +} diff --git a/Tests/CudaOnly/CircularLinkLine/file2.cu b/Tests/CudaOnly/CircularLinkLine/file2.cu new file mode 100644 index 0000000..b32dbff --- /dev/null +++ b/Tests/CudaOnly/CircularLinkLine/file2.cu @@ -0,0 +1,6 @@ + +extern __device__ int file3_func(int); +int __device__ file2_func(int x) +{ + return x + file3_func(x); +} diff --git a/Tests/CudaOnly/CircularLinkLine/file3.cu b/Tests/CudaOnly/CircularLinkLine/file3.cu new file mode 100644 index 0000000..7f67187 --- /dev/null +++ b/Tests/CudaOnly/CircularLinkLine/file3.cu @@ -0,0 +1,8 @@ + +extern __device__ int file1_func(int); +int __device__ file3_func(int x) +{ + if (x > 0) + return file1_func(-x); + return x; +} diff --git a/Tests/CudaOnly/CircularLinkLine/main.cu b/Tests/CudaOnly/CircularLinkLine/main.cu new file mode 100644 index 0000000..1c19e8d --- /dev/null +++ b/Tests/CudaOnly/CircularLinkLine/main.cu @@ -0,0 +1,5 @@ + +int main(int argc, char** argv) +{ + return 0; +} -- cgit v0.12 From fd0523a215c70b75d7830a18e050b79fcdf333aa Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 13 Jul 2018 14:44:16 -0400 Subject: CUDA: Properly de-duplicate libs when doing device linking The nvcc device linker is designed so that each static library with device symbols only needs to be listed once as it doesn't care about link order. If you provide the same static library multiple times it will error out. To make sure this occurs we find the unique set of link items. --- Source/cmLinkLineDeviceComputer.cxx | 22 +++++++++++++++++----- Tests/CudaOnly/CircularLinkLine/CMakeLists.txt | 3 +-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/Source/cmLinkLineDeviceComputer.cxx b/Source/cmLinkLineDeviceComputer.cxx index 557fa41..c9bbde1 100644 --- a/Source/cmLinkLineDeviceComputer.cxx +++ b/Source/cmLinkLineDeviceComputer.cxx @@ -3,7 +3,9 @@ #include "cmLinkLineDeviceComputer.h" +#include #include +#include #include "cmAlgorithms.h" #include "cmComputeLinkInformation.h" @@ -28,6 +30,12 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries( { // Write the library flags to the build rule. std::ostringstream fout; + + // Generate the unique set of link items when device linking. + // The nvcc device linker is designed so that each static library + // with device symbols only needs to be listed once as it doesn't + // care about link order. + std::set emitted; typedef cmComputeLinkInformation::ItemVector ItemVector; ItemVector const& items = cli.GetItems(); std::string config = cli.GetConfig(); @@ -50,20 +58,24 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries( } } + std::string out; if (item.IsPath) { // nvcc understands absolute paths to libraries ending in '.a' should // be passed to nvlink. Other extensions like '.so' or '.dylib' are // rejected by the nvcc front-end even though nvlink knows to ignore // them. Bypass the front-end via '-Xnvlink'. if (!cmHasLiteralSuffix(item.Value, ".a")) { - fout << "-Xnvlink "; + out += "-Xnvlink "; } - fout << this->ConvertToOutputFormat( - this->ConvertToLinkReference(item.Value)); + out += + this->ConvertToOutputFormat(this->ConvertToLinkReference(item.Value)); } else { - fout << item.Value; + out += item.Value; + } + + if (emitted.insert(out).second) { + fout << out << " "; } - fout << " "; } if (!stdLibString.empty()) { diff --git a/Tests/CudaOnly/CircularLinkLine/CMakeLists.txt b/Tests/CudaOnly/CircularLinkLine/CMakeLists.txt index 8efbb0f..c978e51 100644 --- a/Tests/CudaOnly/CircularLinkLine/CMakeLists.txt +++ b/Tests/CudaOnly/CircularLinkLine/CMakeLists.txt @@ -16,8 +16,7 @@ add_executable(CudaOnlyCircularLinkLine main.cu) target_link_libraries(CUDACircularDeviceLinking1 PUBLIC CUDACircularDeviceLinking2) target_link_libraries(CUDACircularDeviceLinking2 PUBLIC CUDACircularDeviceLinking3) -#FIXME: complete the loop once supported -#target_link_libraries(CUDACircularDeviceLinking3 PUBLIC CUDACircularDeviceLinking1) +target_link_libraries(CUDACircularDeviceLinking3 PUBLIC CUDACircularDeviceLinking1) target_link_libraries(CudaOnlyCircularLinkLine PRIVATE CUDACircularDeviceLinking3) -- cgit v0.12