From 850ef90a66a8f81369b3d11c74398ccaefbe5324 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 31 Jan 2019 17:34:41 -0500 Subject: CUDA: Honor CUDA_RESOLVE_DEVICE_SYMBOLS for more target types `CUDA_RESOLVE_DEVICE_SYMBOLS` can be used with shared, module, and executable target types. This relaxation is to allow for better interoperability with linkers that automatically do CUDA device symbol resolution and have no way to disable it. --- Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst | 12 +++- Help/release/dev/CUDA_RESOLVE_DEVICE_SYMBOLS.rst | 6 ++ Source/cmMakefileExecutableTargetGenerator.cxx | 8 ++- Source/cmMakefileLibraryTargetGenerator.cxx | 23 ++++++-- Source/cmNinjaNormalTargetGenerator.cxx | 29 ++++----- Source/cmVisualStudio10TargetGenerator.cxx | 25 ++++---- Tests/CudaOnly/CMakeLists.txt | 11 ++++ .../DontResolveDeviceSymbols/CMakeLists.txt | 50 ++++++++++++++++ Tests/CudaOnly/DontResolveDeviceSymbols/file1.cu | 69 ++++++++++++++++++++++ Tests/CudaOnly/DontResolveDeviceSymbols/main.cu | 7 +++ .../CudaOnly/DontResolveDeviceSymbols/verify.cmake | 14 +++++ 11 files changed, 219 insertions(+), 35 deletions(-) create mode 100644 Help/release/dev/CUDA_RESOLVE_DEVICE_SYMBOLS.rst create mode 100644 Tests/CudaOnly/DontResolveDeviceSymbols/CMakeLists.txt create mode 100644 Tests/CudaOnly/DontResolveDeviceSymbols/file1.cu create mode 100644 Tests/CudaOnly/DontResolveDeviceSymbols/main.cu create mode 100644 Tests/CudaOnly/DontResolveDeviceSymbols/verify.cmake diff --git a/Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst b/Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst index 127d79f..ef74ae2 100644 --- a/Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst +++ b/Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst @@ -1,12 +1,18 @@ CUDA_RESOLVE_DEVICE_SYMBOLS --------------------------- -CUDA only: Enables device linking for the specific static library target +CUDA only: Enables device linking for the specific library target -If set this will enable device linking on this static library target. Normally +If set this will enable device linking on the library target. Normally device linking is deferred until a shared library or executable is generated, allowing for multiple static libraries to resolve device symbols at the same -time. +time when they are used by a shared library or executable. + +By default static library targets have this property is disabled, +while shared, module, and executable targets have this property enabled. + +Note that device linking is not supported for :ref:`Object Libraries`. + For instance: diff --git a/Help/release/dev/CUDA_RESOLVE_DEVICE_SYMBOLS.rst b/Help/release/dev/CUDA_RESOLVE_DEVICE_SYMBOLS.rst new file mode 100644 index 0000000..32db233 --- /dev/null +++ b/Help/release/dev/CUDA_RESOLVE_DEVICE_SYMBOLS.rst @@ -0,0 +1,6 @@ +CUDA_RESOLVE_DEVICE_SYMBOLS +--------------------------- + +* The :prop_tgt:`CUDA_RESOLVE_DEVICE_SYMBOLS` target property is now supported + on shared library, module library, and executable targets. Previously it was + only honored on static libraries. diff --git a/Source/cmMakefileExecutableTargetGenerator.cxx b/Source/cmMakefileExecutableTargetGenerator.cxx index e576b5f..e8ae5ae 100644 --- a/Source/cmMakefileExecutableTargetGenerator.cxx +++ b/Source/cmMakefileExecutableTargetGenerator.cxx @@ -95,7 +95,13 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule( const bool hasCUDA = (std::find(closure->Languages.begin(), closure->Languages.end(), cuda_lang) != closure->Languages.end()); - if (!hasCUDA) { + + bool doDeviceLinking = true; + if (const char* resolveDeviceSymbols = + this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) { + doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols); + } + if (!hasCUDA || !doDeviceLinking) { return; } diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx index 72181ab..5a1ef4e 100644 --- a/Source/cmMakefileLibraryTargetGenerator.cxx +++ b/Source/cmMakefileLibraryTargetGenerator.cxx @@ -133,9 +133,12 @@ void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules() (std::find(closure->Languages.begin(), closure->Languages.end(), cuda_lang) != closure->Languages.end()); - const bool resolveDeviceSymbols = - this->GeneratorTarget->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS"); - if (hasCUDA && resolveDeviceSymbols) { + bool doDeviceLinking = false; + if (const char* resolveDeviceSymbols = + this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) { + doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols); + } + if (hasCUDA && doDeviceLinking) { std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY"; this->WriteDeviceLibraryRules(linkRuleVar, false); } @@ -168,7 +171,12 @@ void cmMakefileLibraryTargetGenerator::WriteSharedLibraryRules(bool relink) const bool hasCUDA = (std::find(closure->Languages.begin(), closure->Languages.end(), cuda_lang) != closure->Languages.end()); - if (hasCUDA) { + bool doDeviceLinking = true; + if (const char* resolveDeviceSymbols = + this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) { + doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols); + } + if (hasCUDA && doDeviceLinking) { std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY"; this->WriteDeviceLibraryRules(linkRuleVar, relink); } @@ -209,7 +217,12 @@ void cmMakefileLibraryTargetGenerator::WriteModuleLibraryRules(bool relink) const bool hasCUDA = (std::find(closure->Languages.begin(), closure->Languages.end(), cuda_lang) != closure->Languages.end()); - if (hasCUDA) { + bool doDeviceLinking = true; + if (const char* resolveDeviceSymbols = + this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) { + doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols); + } + if (hasCUDA && doDeviceLinking) { std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY"; this->WriteDeviceLibraryRules(linkRuleVar, relink); } diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx index 0d05782..cbc0103 100644 --- a/Source/cmNinjaNormalTargetGenerator.cxx +++ b/Source/cmNinjaNormalTargetGenerator.cxx @@ -566,22 +566,23 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement() (std::find(closure->Languages.begin(), closure->Languages.end(), cuda_lang) != closure->Languages.end()); - bool shouldHaveDeviceLinking = false; - switch (genTarget.GetType()) { - case cmStateEnums::SHARED_LIBRARY: - case cmStateEnums::MODULE_LIBRARY: - case cmStateEnums::EXECUTABLE: - shouldHaveDeviceLinking = true; - break; - case cmStateEnums::STATIC_LIBRARY: - shouldHaveDeviceLinking = - genTarget.GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS"); - break; - default: - break; + bool doDeviceLinking = false; + if (const char* resolveDeviceSymbols = + genTarget.GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) { + doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols); + } else { + switch (genTarget.GetType()) { + case cmStateEnums::SHARED_LIBRARY: + case cmStateEnums::MODULE_LIBRARY: + case cmStateEnums::EXECUTABLE: + doDeviceLinking = true; + break; + default: + break; + } } - if (!(shouldHaveDeviceLinking && hasCUDA)) { + if (!(doDeviceLinking && hasCUDA)) { return; } diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx index 178e717..8e08417 100644 --- a/Source/cmVisualStudio10TargetGenerator.cxx +++ b/Source/cmVisualStudio10TargetGenerator.cxx @@ -2998,18 +2998,19 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions( // Determine if we need to do a device link bool doDeviceLinking = false; - switch (this->GeneratorTarget->GetType()) { - case cmStateEnums::SHARED_LIBRARY: - case cmStateEnums::MODULE_LIBRARY: - case cmStateEnums::EXECUTABLE: - doDeviceLinking = true; - break; - case cmStateEnums::STATIC_LIBRARY: - doDeviceLinking = this->GeneratorTarget->GetPropertyAsBool( - "CUDA_RESOLVE_DEVICE_SYMBOLS"); - break; - default: - break; + if (const char* resolveDeviceSymbols = + this->GeneratorTarget->GetProperty("CUDA_RESOLVE_DEVICE_SYMBOLS")) { + doDeviceLinking = cmSystemTools::IsOn(resolveDeviceSymbols); + } else { + switch (this->GeneratorTarget->GetType()) { + case cmStateEnums::SHARED_LIBRARY: + case cmStateEnums::MODULE_LIBRARY: + case cmStateEnums::EXECUTABLE: + doDeviceLinking = true; + break; + default: + break; + } } cudaLinkOptions.AddFlag("PerformDeviceLink", diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index 9c4f86a..f1fd344 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -7,6 +7,17 @@ ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs) +add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND + ${CMAKE_CTEST_COMMAND} -C $ + --build-and-test + "${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/" + "${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/" + ${build_generator_args} + --build-project DontResolveDeviceSymbols + --build-options ${build_options} + --test-command ${CMAKE_CTEST_COMMAND} -V -C $ + ) + if(MSVC) ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB) endif() diff --git a/Tests/CudaOnly/DontResolveDeviceSymbols/CMakeLists.txt b/Tests/CudaOnly/DontResolveDeviceSymbols/CMakeLists.txt new file mode 100644 index 0000000..6190089 --- /dev/null +++ b/Tests/CudaOnly/DontResolveDeviceSymbols/CMakeLists.txt @@ -0,0 +1,50 @@ +cmake_minimum_required(VERSION 3.13) +project (DontResolveDeviceSymbols CUDA) + +# Find nm and dumpbin +if(CMAKE_NM) + set(dump_command ${CMAKE_NM}) + set(dump_args --defined-only) + set(symbol_name cudaRegisterLinkedBinary) +else() + include(GetPrerequisites) + message(STATUS "calling list_prerequisites to find dumpbin") + list_prerequisites("${CMAKE_COMMAND}" 0 0 0) + if(gp_dumpbin) + set(dump_command ${gp_dumpbin}) + set(dump_args /SYMBOLS) + set(symbol_name nv_fatb) + endif() +endif() + + +#Goal for this example: +# Build a static library that defines multiple methods and kernels that +# use each other. +# Don't resolve the device symbols in the static library +# Don't resolve the device symbols in the executable library +# Verify that we can't use those device symbols from anything +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=[compute_30] -gencode arch=compute_50,code=\\\"compute_50\\\"") +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CUDA_STANDARD 11) + +add_library(CUDANoDeviceResolve SHARED file1.cu) +set_target_properties(CUDANoDeviceResolve + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + CUDA_RESOLVE_DEVICE_SYMBOLS OFF + POSITION_INDEPENDENT_CODE ON) +if(MSVC) + target_link_options(CUDANoDeviceResolve PRIVATE "/FORCE:UNRESOLVED") +endif() + +if(dump_command) +add_custom_command(TARGET CUDANoDeviceResolve POST_BUILD + COMMAND ${CMAKE_COMMAND} + -DDUMP_COMMAND=${dump_command} + -DDUMP_ARGS=${dump_args} + -DSYMBOL_NAME=${symbol_name} + -DTEST_LIBRARY_PATH=$ + -P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake + ) +endif() diff --git a/Tests/CudaOnly/DontResolveDeviceSymbols/file1.cu b/Tests/CudaOnly/DontResolveDeviceSymbols/file1.cu new file mode 100644 index 0000000..3924f67 --- /dev/null +++ b/Tests/CudaOnly/DontResolveDeviceSymbols/file1.cu @@ -0,0 +1,69 @@ + +#include + +static __global__ void file1_kernel(int in, int* out) +{ + *out = in * in; +} + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + std::cout << "prop.major: " << prop.major << std::endl; + if (prop.major >= 3) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + +int file1_launch_kernel() +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + int input = 4; + + int* output; + cudaError_t err = cudaMallocManaged(&output, sizeof(int)); + cudaDeviceSynchronize(); + if (err != cudaSuccess) { + return 1; + } + + file1_kernel<<<1, 1>>>(input, output); + cudaDeviceSynchronize(); + err = cudaGetLastError(); + std::cout << err << " " << cudaGetErrorString(err) << std::endl; + if (err == cudaSuccess) { + // This kernel launch should failed as the device linking never occured + std::cerr << "file1_kernel: kernel launch should have failed" << std::endl; + return 1; + } + return 0; +} diff --git a/Tests/CudaOnly/DontResolveDeviceSymbols/main.cu b/Tests/CudaOnly/DontResolveDeviceSymbols/main.cu new file mode 100644 index 0000000..84a7a19 --- /dev/null +++ b/Tests/CudaOnly/DontResolveDeviceSymbols/main.cu @@ -0,0 +1,7 @@ + +#include + +int main(int argc, char** argv) +{ + return 0; +} diff --git a/Tests/CudaOnly/DontResolveDeviceSymbols/verify.cmake b/Tests/CudaOnly/DontResolveDeviceSymbols/verify.cmake new file mode 100644 index 0000000..9bb426d --- /dev/null +++ b/Tests/CudaOnly/DontResolveDeviceSymbols/verify.cmake @@ -0,0 +1,14 @@ +execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH} + RESULT_VARIABLE RESULT + OUTPUT_VARIABLE OUTPUT + ERROR_VARIABLE ERROR +) + +if(NOT "${RESULT}" STREQUAL "0") + message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]") +endif() + +if("${OUTPUT}" MATCHES "${SYMBOL_NAME}") + message(FATAL_ERROR + "The '${SYMBOL_NAME}' symbol is defined; device linking occurred!") +endif() -- cgit v0.12