diff options
author | Brad King <brad.king@kitware.com> | 2017-04-27 13:01:56 (GMT) |
---|---|---|
committer | Kitware Robot <kwrobot@kitware.com> | 2017-04-27 13:02:00 (GMT) |
commit | 7402bd38714e7930810f6bd61e20b836268be814 (patch) | |
tree | 49dbac3b900ec7ea171ad978e11fa921ff0be05b | |
parent | f38147aebc19cdca17df101a9ca6a10337508485 (diff) | |
parent | 493671a5212c6548b2d7376c7065f5f76692a792 (diff) | |
download | CMake-7402bd38714e7930810f6bd61e20b836268be814.zip CMake-7402bd38714e7930810f6bd61e20b836268be814.tar.gz CMake-7402bd38714e7930810f6bd61e20b836268be814.tar.bz2 |
Merge topic 'cuda-device-link-ar'
493671a5 CUDA: Static libraries can now explicitly resolve device symbols
8fb85c68 CUDA: Makefile uses relative path for device linking status messages
a36fb229 CUDA: Visual Studio now properly delays device linking
Acked-by: Kitware Robot <kwrobot@kitware.com>
Merge-request: !759
19 files changed, 435 insertions, 15 deletions
diff --git a/Help/manual/cmake-properties.7.rst b/Help/manual/cmake-properties.7.rst index 344bc09..38cc0d8 100644 --- a/Help/manual/cmake-properties.7.rst +++ b/Help/manual/cmake-properties.7.rst @@ -154,6 +154,7 @@ Properties on Targets /prop_tgt/CROSSCOMPILING_EMULATOR /prop_tgt/CUDA_PTX_COMPILATION /prop_tgt/CUDA_SEPARABLE_COMPILATION + /prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS /prop_tgt/CUDA_EXTENSIONS /prop_tgt/CUDA_STANDARD /prop_tgt/CUDA_STANDARD_REQUIRED diff --git a/Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst b/Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst new file mode 100644 index 0000000..127d79f --- /dev/null +++ b/Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst @@ -0,0 +1,15 @@ +CUDA_RESOLVE_DEVICE_SYMBOLS +--------------------------- + +CUDA only: Enables device linking for the specific static library target + +If set this will enable device linking on this static library target. Normally +device linking is deferred until a shared library or executable is generated, +allowing for multiple static libraries to resolve device symbols at the same +time. + +For instance: + +.. code-block:: cmake + + set_property(TARGET mystaticlib PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) diff --git a/Source/cmLinkLineDeviceComputer.cxx b/Source/cmLinkLineDeviceComputer.cxx index 6789555..bf30b39 100644 --- a/Source/cmLinkLineDeviceComputer.cxx +++ b/Source/cmLinkLineDeviceComputer.cxx @@ -39,9 +39,24 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries( continue; } - if (li->Target->GetType() == cmStateEnums::INTERFACE_LIBRARY || - li->Target->GetType() == cmStateEnums::SHARED_LIBRARY || - li->Target->GetType() == cmStateEnums::MODULE_LIBRARY) { + bool skippable = false; + switch (li->Target->GetType()) { + case cmStateEnums::SHARED_LIBRARY: + case cmStateEnums::MODULE_LIBRARY: + case cmStateEnums::INTERFACE_LIBRARY: + skippable = true; + break; + case cmStateEnums::STATIC_LIBRARY: + // If a static library is resolving its device linking, it should + // be removed for other device linking + skippable = + li->Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS"); + break; + default: + break; + } + + if (skippable) { continue; } diff --git a/Source/cmMakefileExecutableTargetGenerator.cxx b/Source/cmMakefileExecutableTargetGenerator.cxx index a93b42d..a719887 100644 --- a/Source/cmMakefileExecutableTargetGenerator.cxx +++ b/Source/cmMakefileExecutableTargetGenerator.cxx @@ -122,7 +122,11 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule( std::string buildEcho = "Linking "; buildEcho += linkLanguage; buildEcho += " device code "; - buildEcho += targetOutputReal; + buildEcho += this->LocalGenerator->ConvertToOutputFormat( + this->LocalGenerator->MaybeConvertToRelativePath( + this->LocalGenerator->GetCurrentBinaryDirectory(), + this->DeviceLinkObject), + cmOutputConverter::SHELL); this->LocalGenerator->AppendEcho( commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress); } diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx index e017b29..2823977 100644 --- a/Source/cmMakefileLibraryTargetGenerator.cxx +++ b/Source/cmMakefileLibraryTargetGenerator.cxx @@ -127,6 +127,24 @@ void cmMakefileLibraryTargetGenerator::WriteObjectLibraryRules() void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules() { + const std::string cuda_lang("CUDA"); + cmGeneratorTarget::LinkClosure const* closure = + this->GeneratorTarget->GetLinkClosure(this->ConfigName); + + const bool hasCUDA = + (std::find(closure->Languages.begin(), closure->Languages.end(), + cuda_lang) != closure->Languages.end()); + + const bool resolveDeviceSymbols = + this->GeneratorTarget->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS"); + if (hasCUDA && resolveDeviceSymbols) { + std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY"; + std::string extraFlags; + this->LocalGenerator->AppendFlags( + extraFlags, this->GeneratorTarget->GetProperty("LINK_FLAGS")); + this->WriteDeviceLibraryRules(linkRuleVar, extraFlags, false); + } + std::string linkLanguage = this->GeneratorTarget->GetLinkerLanguage(this->ConfigName); @@ -292,8 +310,12 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules( cmLocalUnixMakefileGenerator3::EchoProgress progress; this->MakeEchoProgress(progress); // Add the link message. - std::string buildEcho = "Linking " + linkLanguage + " device code"; - buildEcho += targetOutputReal; + std::string buildEcho = "Linking " + linkLanguage + " device code "; + buildEcho += this->LocalGenerator->ConvertToOutputFormat( + this->LocalGenerator->MaybeConvertToRelativePath( + this->LocalGenerator->GetCurrentBinaryDirectory(), + this->DeviceLinkObject), + cmOutputConverter::SHELL); this->LocalGenerator->AppendEcho( commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress); } @@ -857,6 +879,16 @@ void cmMakefileLibraryTargetGenerator::WriteLibraryRules( std::vector<std::string> object_strings; this->WriteObjectsStrings(object_strings, archiveCommandLimit); + // Add the cuda device object to the list of archive files. This will + // only occur on archives which have CUDA_RESOLVE_DEVICE_SYMBOLS enabled + if (!this->DeviceLinkObject.empty()) { + object_strings.push_back(this->LocalGenerator->ConvertToOutputFormat( + this->LocalGenerator->MaybeConvertToRelativePath( + this->LocalGenerator->GetCurrentBinaryDirectory(), + this->DeviceLinkObject), + cmOutputConverter::SHELL)); + } + // Create the archive with the first set of objects. std::vector<std::string>::iterator osi = object_strings.begin(); { diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx index cfc91bd..8206083 100644 --- a/Source/cmNinjaNormalTargetGenerator.cxx +++ b/Source/cmNinjaNormalTargetGenerator.cxx @@ -447,6 +447,7 @@ std::vector<std::string> cmNinjaNormalTargetGenerator::ComputeDeviceLinkCmd() // an executable or a dynamic library. std::string linkCmd; switch (this->GetGeneratorTarget()->GetType()) { + case cmStateEnums::STATIC_LIBRARY: case cmStateEnums::SHARED_LIBRARY: case cmStateEnums::MODULE_LIBRARY: { const std::string cudaLinkCmd( @@ -559,11 +560,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement() case cmStateEnums::EXECUTABLE: shouldHaveDeviceLinking = true; break; + case cmStateEnums::STATIC_LIBRARY: + shouldHaveDeviceLinking = + genTarget.GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS"); + break; default: break; } - if (!shouldHaveDeviceLinking || !hasCUDA) { + if (!(shouldHaveDeviceLinking && hasCUDA)) { return; } diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx index d83662e..5a09718 100644 --- a/Source/cmVisualStudio10TargetGenerator.cxx +++ b/Source/cmVisualStudio10TargetGenerator.cxx @@ -116,6 +116,10 @@ cmVisualStudio10TargetGenerator::~cmVisualStudio10TargetGenerator() i != this->CudaOptions.end(); ++i) { delete i->second; } + for (OptionsMap::iterator i = this->CudaLinkOptions.begin(); + i != this->CudaLinkOptions.end(); ++i) { + delete i->second; + } if (!this->BuildFileStream) { return; } @@ -213,6 +217,9 @@ void cmVisualStudio10TargetGenerator::Generate() if (!this->ComputeCudaOptions()) { return; } + if (!this->ComputeCudaLinkOptions()) { + return; + } if (!this->ComputeMasmOptions()) { return; } @@ -2524,6 +2531,70 @@ void cmVisualStudio10TargetGenerator::WriteCudaOptions( this->WriteString("</CudaCompile>\n", 2); } +bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions() +{ + if (!this->GlobalGenerator->IsCudaEnabled()) { + return true; + } + for (std::vector<std::string>::const_iterator i = + this->Configurations.begin(); + i != this->Configurations.end(); ++i) { + if (!this->ComputeCudaLinkOptions(*i)) { + return false; + } + } + return true; +} + +bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions( + std::string const& configName) +{ + cmGlobalVisualStudio10Generator* gg = + static_cast<cmGlobalVisualStudio10Generator*>(this->GlobalGenerator); + CM_AUTO_PTR<Options> pOptions(new Options( + this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable())); + Options& cudaLinkOptions = *pOptions; + + // Determine if we need to do a device link + bool doDeviceLinking = false; + switch (this->GeneratorTarget->GetType()) { + case cmStateEnums::SHARED_LIBRARY: + case cmStateEnums::MODULE_LIBRARY: + case cmStateEnums::EXECUTABLE: + doDeviceLinking = true; + break; + case cmStateEnums::STATIC_LIBRARY: + doDeviceLinking = this->GeneratorTarget->GetPropertyAsBool( + "CUDA_RESOLVE_DEVICE_SYMBOLS"); + break; + default: + break; + } + + cudaLinkOptions.AddFlag("PerformDeviceLink", + doDeviceLinking ? "true" : "false"); + + this->CudaLinkOptions[configName] = pOptions.release(); + return true; +} + +void cmVisualStudio10TargetGenerator::WriteCudaLinkOptions( + std::string const& configName) +{ + if (this->GeneratorTarget->GetType() > cmStateEnums::MODULE_LIBRARY) { + return; + } + + if (!this->MSTools || !this->GlobalGenerator->IsCudaEnabled()) { + return; + } + + this->WriteString("<CudaLink>\n", 2); + Options& cudaLinkOptions = *(this->CudaLinkOptions[configName]); + cudaLinkOptions.OutputFlagMap(*this->BuildFileStream, " "); + this->WriteString("</CudaLink>\n", 2); +} + bool cmVisualStudio10TargetGenerator::ComputeMasmOptions() { if (!this->GlobalGenerator->IsMasmEnabled()) { @@ -3283,6 +3354,7 @@ void cmVisualStudio10TargetGenerator::WriteItemDefinitionGroups() } // output link flags <Link></Link> this->WriteLinkOptions(*i); + this->WriteCudaLinkOptions(*i); // output lib flags <Lib></Lib> this->WriteLibOptions(*i); // output manifest flags <Manifest></Manifest> diff --git a/Source/cmVisualStudio10TargetGenerator.h b/Source/cmVisualStudio10TargetGenerator.h index bd270bf..6106615 100644 --- a/Source/cmVisualStudio10TargetGenerator.h +++ b/Source/cmVisualStudio10TargetGenerator.h @@ -101,6 +101,11 @@ private: bool ComputeCudaOptions(std::string const& config); void WriteCudaOptions(std::string const& config, std::vector<std::string> const& includes); + + bool ComputeCudaLinkOptions(); + bool ComputeCudaLinkOptions(std::string const& config); + void WriteCudaLinkOptions(std::string const& config); + bool ComputeMasmOptions(); bool ComputeMasmOptions(std::string const& config); void WriteMasmOptions(std::string const& config, @@ -154,6 +159,7 @@ private: OptionsMap ClOptions; OptionsMap RcOptions; OptionsMap CudaOptions; + OptionsMap CudaLinkOptions; OptionsMap MasmOptions; OptionsMap NasmOptions; OptionsMap LinkOptions; diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu index f677868..a76973d 100644 --- a/Tests/Cuda/Complex/dynamic.cu +++ b/Tests/Cuda/Complex/dynamic.cu @@ -37,7 +37,7 @@ EXPORT int choose_cuda_device() << std::endl; return 1; } - if (prop.major >= 4) { + if (prop.major >= 3) { err = cudaSetDevice(i); if (err != cudaSuccess) { std::cout << "Could not select CUDA device " << i << std::endl; diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index a3bd707..5f456fc 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -3,3 +3,4 @@ ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard) ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX) ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs) +ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt new file mode 100644 index 0000000..b96bb98 --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 3.7) +project (CudaOnlyResolveDeviceSymbols CUDA) + +# Find nm and dumpbin +if(CMAKE_NM) + set(dump_command ${CMAKE_NM}) + set(dump_args -g) +else() + include(GetPrerequisites) + message(STATUS "calling list_prerequisites to find dumpbin") + list_prerequisites("${CMAKE_COMMAND}" 0 0 0) + if(gp_dumpbin) + set(dump_command ${gp_dumpbin}) + set(dump_args /ARCHIVEMEMBERS) + endif() +endif() + +#Goal for this example: +#Build a static library that defines multiple methods and kernels that +#use each other. +#Use a custom command to build an executable that uses this static library +#We do these together to verify that we can get a static library to do +#device symbol linking, and not have it done when the executable is made +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CUDA_STANDARD 11) + +add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu) +set_target_properties(CUDAResolveDeviceLib + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + CUDA_RESOLVE_DEVICE_SYMBOLS ON + POSITION_INDEPENDENT_CODE ON) + +if(dump_command) +add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD + COMMAND ${CMAKE_COMMAND} + -DDUMP_COMMAND=${dump_command} + -DDUMP_ARGS=${dump_args} + -DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDAResolveDeviceLib> + -P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake + ) +endif() + +add_executable(CudaOnlyResolveDeviceSymbols main.cu) +target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib) + +if(APPLE) + # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that + # the static cuda runtime can find it at runtime. + target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE -Wl,-rpath,/usr/local/cuda/lib) +endif() diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu new file mode 100644 index 0000000..1ce63bf --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu @@ -0,0 +1,10 @@ + +#include "file1.h" + +result_type __device__ file1_func(int x) +{ + result_type r; + r.input = x; + r.sum = x * x; + return r; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h new file mode 100644 index 0000000..ff1945c --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h @@ -0,0 +1,7 @@ + +#pragma once +struct result_type +{ + int input; + int sum; +}; diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu new file mode 100644 index 0000000..278fd6c --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu @@ -0,0 +1,25 @@ + +#include "file2.h" + +result_type __device__ file1_func(int x); + +result_type_dynamic __device__ file2_func(int x) +{ + const result_type r = file1_func(x); + const result_type_dynamic rd{ r.input, r.sum, true }; + return rd; +} + +static __global__ void file2_kernel(result_type_dynamic& r, int x) +{ + // call static_func which is a method that is defined in the + // static library that is always out of date + r = file2_func(x); +} + +int file2_launch_kernel(int x) +{ + result_type_dynamic r; + file2_kernel<<<1, 1>>>(r, x); + return r.sum; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h new file mode 100644 index 0000000..d2dbaa4 --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h @@ -0,0 +1,10 @@ + +#pragma once +#include "file1.h" + +struct result_type_dynamic +{ + int input; + int sum; + bool from_static; +}; diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu new file mode 100644 index 0000000..b4b5b9e --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu @@ -0,0 +1,85 @@ + +#include <iostream> + +#include "file1.h" +#include "file2.h" + +int file2_launch_kernel(int x); + +result_type_dynamic __device__ file2_func(int x); +static __global__ void main_kernel(result_type_dynamic& r, int x) +{ + // call function that was not device linked to us, this will cause + // a runtime failure of "invalid device function" + r = file2_func(x); +} + +int main_launch_kernel(int x) +{ + result_type_dynamic r; + main_kernel<<<1, 1>>>(r, x); + return r.sum; +} + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + std::cout << "prop.major: " << prop.major << std::endl; + if (prop.major >= 3) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + +int main(int argc, char** argv) +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; + file2_launch_kernel(42); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file2_launch_kernel: kernel launch failed: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + main_launch_kernel(1); + err = cudaGetLastError(); + if (err == cudaSuccess) { + // This kernel launch should fail as the file2_func was device linked + // into the static library and is not usable by the executable + std::cerr << "main_launch_kernel: kernel launch should have failed" + << std::endl; + return 1; + } + + return 0; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake new file mode 100644 index 0000000..94d388b --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake @@ -0,0 +1,14 @@ +execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH} + RESULT_VARIABLE RESULT + OUTPUT_VARIABLE OUTPUT + ERROR_VARIABLE ERROR +) + +if(NOT "${RESULT}" STREQUAL "0") + message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]") +endif() + +if(NOT "${OUTPUT}" MATCHES "(cmake_device_link|device-link)") + message(FATAL_ERROR + "No cuda device objects found, device linking did not occur") +endif() diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt index 420d7a9..0a2542a 100644 --- a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt +++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt @@ -12,6 +12,7 @@ project (CudaOnlySeparateCompilation CUDA) string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") set(CMAKE_CXX_STANDARD 11) set(CMAKE_CUDA_STANDARD 11) + add_library(CUDASeparateLibA STATIC file1.cu file2.cu file3.cu) #Having file4/file5 in a shared library causes serious problems @@ -22,12 +23,24 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu) target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA) add_executable(CudaOnlySeparateCompilation main.cu) -target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB) +target_link_libraries(CudaOnlySeparateCompilation + PRIVATE CUDASeparateLibB) + +set_target_properties(CUDASeparateLibA + CUDASeparateLibB + PROPERTIES CUDA_SEPARABLE_COMPILATION ON + POSITION_INDEPENDENT_CODE ON) -set_target_properties( CUDASeparateLibA - CUDASeparateLibB - PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +if (CMAKE_GENERATOR MATCHES "^Visual Studio") + #Visual Studio CUDA integration will not perform device linking + #on a target that itself does not have GenerateRelocatableDeviceCode + #enabled. + set_target_properties(CudaOnlySeparateCompilation + PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +endif() -set_target_properties( CUDASeparateLibA - CUDASeparateLibB - PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (APPLE) + # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that + # the static cuda runtime can find it at runtime. + target_link_libraries(CudaOnlySeparateCompilation PRIVATE -Wl,-rpath,/usr/local/cuda/lib) +endif() diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main.cu index 03e0921..40dbe5d 100644 --- a/Tests/CudaOnly/SeparateCompilation/main.cu +++ b/Tests/CudaOnly/SeparateCompilation/main.cu @@ -7,9 +7,62 @@ int file4_launch_kernel(int x); int file5_launch_kernel(int x); +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + if (prop.major >= 3) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + int main(int argc, char** argv) { + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; file4_launch_kernel(42); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file4_launch_kernel: kernel launch failed: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + file5_launch_kernel(42); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file5_launch_kernel: kernel launch failed: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + return 0; } |