From a36fb229ba04321be3d0a2472a944c05fea987e9 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Tue, 25 Apr 2017 16:01:09 -0400 Subject: CUDA: Visual Studio now properly delays device linking --- Source/cmVisualStudio10TargetGenerator.cxx | 68 +++++++++++++++++++++++ Source/cmVisualStudio10TargetGenerator.h | 6 ++ Tests/Cuda/Complex/dynamic.cu | 2 +- Tests/CudaOnly/SeparateCompilation/CMakeLists.txt | 27 ++++++--- Tests/CudaOnly/SeparateCompilation/main.cu | 53 ++++++++++++++++++ 5 files changed, 148 insertions(+), 8 deletions(-) diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx index d83662e..9f48825 100644 --- a/Source/cmVisualStudio10TargetGenerator.cxx +++ b/Source/cmVisualStudio10TargetGenerator.cxx @@ -116,6 +116,10 @@ cmVisualStudio10TargetGenerator::~cmVisualStudio10TargetGenerator() i != this->CudaOptions.end(); ++i) { delete i->second; } + for (OptionsMap::iterator i = this->CudaLinkOptions.begin(); + i != this->CudaLinkOptions.end(); ++i) { + delete i->second; + } if (!this->BuildFileStream) { return; } @@ -213,6 +217,9 @@ void cmVisualStudio10TargetGenerator::Generate() if (!this->ComputeCudaOptions()) { return; } + if (!this->ComputeCudaLinkOptions()) { + return; + } if (!this->ComputeMasmOptions()) { return; } @@ -2524,6 +2531,66 @@ void cmVisualStudio10TargetGenerator::WriteCudaOptions( this->WriteString("\n", 2); } +bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions() +{ + if (!this->GlobalGenerator->IsCudaEnabled()) { + return true; + } + for (std::vector::const_iterator i = + this->Configurations.begin(); + i != this->Configurations.end(); ++i) { + if (!this->ComputeCudaLinkOptions(*i)) { + return false; + } + } + return true; +} + +bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions( + std::string const& configName) +{ + cmGlobalVisualStudio10Generator* gg = + static_cast(this->GlobalGenerator); + CM_AUTO_PTR pOptions(new Options( + this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable())); + Options& cudaLinkOptions = *pOptions; + + // Determine if we need to do a device link + bool doDeviceLinking = false; + switch (this->GeneratorTarget->GetType()) { + case cmStateEnums::SHARED_LIBRARY: + case cmStateEnums::MODULE_LIBRARY: + case cmStateEnums::EXECUTABLE: + doDeviceLinking = true; + break; + default: + break; + } + + cudaLinkOptions.AddFlag("PerformDeviceLink", + doDeviceLinking ? "true" : "false"); + + this->CudaLinkOptions[configName] = pOptions.release(); + return true; +} + +void cmVisualStudio10TargetGenerator::WriteCudaLinkOptions( + std::string const& configName) +{ + if (this->GeneratorTarget->GetType() > cmStateEnums::MODULE_LIBRARY) { + return; + } + + if (!this->MSTools || !this->GlobalGenerator->IsCudaEnabled()) { + return; + } + + this->WriteString("\n", 2); + Options& cudaLinkOptions = *(this->CudaLinkOptions[configName]); + cudaLinkOptions.OutputFlagMap(*this->BuildFileStream, " "); + this->WriteString("\n", 2); +} + bool cmVisualStudio10TargetGenerator::ComputeMasmOptions() { if (!this->GlobalGenerator->IsMasmEnabled()) { @@ -3283,6 +3350,7 @@ void cmVisualStudio10TargetGenerator::WriteItemDefinitionGroups() } // output link flags this->WriteLinkOptions(*i); + this->WriteCudaLinkOptions(*i); // output lib flags this->WriteLibOptions(*i); // output manifest flags diff --git a/Source/cmVisualStudio10TargetGenerator.h b/Source/cmVisualStudio10TargetGenerator.h index bd270bf..6106615 100644 --- a/Source/cmVisualStudio10TargetGenerator.h +++ b/Source/cmVisualStudio10TargetGenerator.h @@ -101,6 +101,11 @@ private: bool ComputeCudaOptions(std::string const& config); void WriteCudaOptions(std::string const& config, std::vector const& includes); + + bool ComputeCudaLinkOptions(); + bool ComputeCudaLinkOptions(std::string const& config); + void WriteCudaLinkOptions(std::string const& config); + bool ComputeMasmOptions(); bool ComputeMasmOptions(std::string const& config); void WriteMasmOptions(std::string const& config, @@ -154,6 +159,7 @@ private: OptionsMap ClOptions; OptionsMap RcOptions; OptionsMap CudaOptions; + OptionsMap CudaLinkOptions; OptionsMap MasmOptions; OptionsMap NasmOptions; OptionsMap LinkOptions; diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu index f677868..a76973d 100644 --- a/Tests/Cuda/Complex/dynamic.cu +++ b/Tests/Cuda/Complex/dynamic.cu @@ -37,7 +37,7 @@ EXPORT int choose_cuda_device() << std::endl; return 1; } - if (prop.major >= 4) { + if (prop.major >= 3) { err = cudaSetDevice(i); if (err != cudaSuccess) { std::cout << "Could not select CUDA device " << i << std::endl; diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt index 420d7a9..0a2542a 100644 --- a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt +++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt @@ -12,6 +12,7 @@ project (CudaOnlySeparateCompilation CUDA) string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") set(CMAKE_CXX_STANDARD 11) set(CMAKE_CUDA_STANDARD 11) + add_library(CUDASeparateLibA STATIC file1.cu file2.cu file3.cu) #Having file4/file5 in a shared library causes serious problems @@ -22,12 +23,24 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu) target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA) add_executable(CudaOnlySeparateCompilation main.cu) -target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB) +target_link_libraries(CudaOnlySeparateCompilation + PRIVATE CUDASeparateLibB) + +set_target_properties(CUDASeparateLibA + CUDASeparateLibB + PROPERTIES CUDA_SEPARABLE_COMPILATION ON + POSITION_INDEPENDENT_CODE ON) -set_target_properties( CUDASeparateLibA - CUDASeparateLibB - PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +if (CMAKE_GENERATOR MATCHES "^Visual Studio") + #Visual Studio CUDA integration will not perform device linking + #on a target that itself does not have GenerateRelocatableDeviceCode + #enabled. + set_target_properties(CudaOnlySeparateCompilation + PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +endif() -set_target_properties( CUDASeparateLibA - CUDASeparateLibB - PROPERTIES POSITION_INDEPENDENT_CODE ON) +if (APPLE) + # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that + # the static cuda runtime can find it at runtime. + target_link_libraries(CudaOnlySeparateCompilation PRIVATE -Wl,-rpath,/usr/local/cuda/lib) +endif() diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main.cu index 03e0921..40dbe5d 100644 --- a/Tests/CudaOnly/SeparateCompilation/main.cu +++ b/Tests/CudaOnly/SeparateCompilation/main.cu @@ -7,9 +7,62 @@ int file4_launch_kernel(int x); int file5_launch_kernel(int x); +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + if (prop.major >= 3) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + int main(int argc, char** argv) { + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; file4_launch_kernel(42); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file4_launch_kernel: kernel launch failed: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + file5_launch_kernel(42); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file5_launch_kernel: kernel launch failed: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + return 0; } -- cgit v0.12