From 0c56bdf91ed9c2b2fcf2ee7efb2f08b575ba4aae Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 12 Jan 2023 10:36:25 -0500 Subject: CUDA: device linking obeys CMAKE_CUDA_VISIBILITY_PRESET setting Fixes #24272 --- Source/cmLocalGenerator.cxx | 2 +- Tests/CudaOnly/SeparateCompilation/CMakeLists.txt | 26 +++++++++++++++------- Tests/CudaOnly/SeparateCompilation/file1.h | 9 ++++++++ Tests/CudaOnly/SeparateCompilation/file4.cu | 2 +- Tests/CudaOnly/SeparateCompilation/file5.cu | 2 +- .../SeparateCompilation/main/CMakeLists.txt | 2 +- Tests/CudaOnly/SeparateCompilation/main/main.cu | 4 ++-- 7 files changed, 33 insertions(+), 14 deletions(-) diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx index cedb367..13f2745 100644 --- a/Source/cmLocalGenerator.cxx +++ b/Source/cmLocalGenerator.cxx @@ -1408,7 +1408,7 @@ void cmLocalGenerator::GetDeviceLinkFlags( linkPath); } - // iterate link deps and see if any of them need IPO + this->AddVisibilityPresetFlags(linkFlags, target, "CUDA"); std::vector linkOpts; target->GetLinkOptions(linkOpts, config, "CUDA"); diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt index 17069e3..ca73b1a 100644 --- a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt +++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt @@ -15,6 +15,9 @@ get_property(sep_comp TARGET CUDASeparateLibA PROPERTY CUDA_SEPARABLE_COMPILATIO if(NOT sep_comp) message(FATAL_ERROR "CUDA_SEPARABLE_COMPILATION not initialized") endif() +set_target_properties(CUDASeparateLibA + PROPERTIES + POSITION_INDEPENDENT_CODE ON) unset(CMAKE_CUDA_SEPARABLE_COMPILATION) if(CMAKE_CUDA_SIMULATE_ID STREQUAL "MSVC") @@ -26,17 +29,24 @@ if(CMAKE_CUDA_SIMULATE_ID STREQUAL "MSVC") target_compile_options(CUDASeparateLibA PRIVATE -Xcompiler=-bigobj) endif() -#Having file4/file5 in a shared library causes serious problems -#with the nvcc linker and it will generate bad entries that will -#cause a segv when trying to run the executable +#Have file4 and file5 in different shared libraries so that we +#verify that hidden visibility is passed to the device linker. +#Otherwise we will get a segv when trying to run the executable # -add_library(CUDASeparateLibB STATIC file4.cu file5.cu) +add_library(CUDASeparateLibB SHARED file4.cu) target_compile_features(CUDASeparateLibB PRIVATE cuda_std_11) target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA) -set_target_properties(CUDASeparateLibA - CUDASeparateLibB - PROPERTIES CUDA_SEPARABLE_COMPILATION ON - POSITION_INDEPENDENT_CODE ON) +add_library(CUDASeparateLibC SHARED file5.cu) +target_compile_features(CUDASeparateLibC PRIVATE cuda_std_11) +target_link_libraries(CUDASeparateLibC PRIVATE CUDASeparateLibA) + +set_target_properties(CUDASeparateLibB + CUDASeparateLibC + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + POSITION_INDEPENDENT_CODE ON + CUDA_VISIBILITY_PRESET hidden + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/main") add_subdirectory(main) diff --git a/Tests/CudaOnly/SeparateCompilation/file1.h b/Tests/CudaOnly/SeparateCompilation/file1.h index ff1945c..1cedc20 100644 --- a/Tests/CudaOnly/SeparateCompilation/file1.h +++ b/Tests/CudaOnly/SeparateCompilation/file1.h @@ -1,5 +1,14 @@ #pragma once + +#ifdef _WIN32 +# define EXPORT __declspec(dllexport) +# define IMPORT __declspec(dllimport) +#else +# define EXPORT __attribute__((__visibility__("default"))) +# define IMPORT +#endif + struct result_type { int input; diff --git a/Tests/CudaOnly/SeparateCompilation/file4.cu b/Tests/CudaOnly/SeparateCompilation/file4.cu index 2e3e01e..cc24a46 100644 --- a/Tests/CudaOnly/SeparateCompilation/file4.cu +++ b/Tests/CudaOnly/SeparateCompilation/file4.cu @@ -15,7 +15,7 @@ static __global__ void file4_kernel(result_type& r, int x) result_type_dynamic rd = file2_func(x); } -int file4_launch_kernel(int x) +EXPORT int file4_launch_kernel(int x) { result_type r; file4_kernel<<<1, 1>>>(r, x); diff --git a/Tests/CudaOnly/SeparateCompilation/file5.cu b/Tests/CudaOnly/SeparateCompilation/file5.cu index fee8e9e..38cbeb2 100644 --- a/Tests/CudaOnly/SeparateCompilation/file5.cu +++ b/Tests/CudaOnly/SeparateCompilation/file5.cu @@ -15,7 +15,7 @@ static __global__ void file5_kernel(result_type& r, int x) result_type_dynamic rd = file2_func(x); } -int file5_launch_kernel(int x) +EXPORT int file5_launch_kernel(int x) { result_type r; file5_kernel<<<1, 1>>>(r, x); diff --git a/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt index c181078..ce066c6 100644 --- a/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt +++ b/Tests/CudaOnly/SeparateCompilation/main/CMakeLists.txt @@ -1,5 +1,5 @@ add_executable(CudaOnlySeparateCompilation main.cu) -target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB) +target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB CUDASeparateLibC) set_target_properties(CudaOnlySeparateCompilation PROPERTIES CUDA_STANDARD 11 CUDA_STANDARD_REQUIRED TRUE diff --git a/Tests/CudaOnly/SeparateCompilation/main/main.cu b/Tests/CudaOnly/SeparateCompilation/main/main.cu index 2b6e8f4..c3f7ce7 100644 --- a/Tests/CudaOnly/SeparateCompilation/main/main.cu +++ b/Tests/CudaOnly/SeparateCompilation/main/main.cu @@ -4,8 +4,8 @@ #include "../file1.h" #include "../file2.h" -int file4_launch_kernel(int x); -int file5_launch_kernel(int x); +IMPORT int file4_launch_kernel(int x); +IMPORT int file5_launch_kernel(int x); int choose_cuda_device() { -- cgit v0.12