summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert Maynard <robert.maynard@kitware.com>2017-04-25 20:01:09 (GMT)
committerRobert Maynard <robert.maynard@kitware.com>2017-04-26 15:41:22 (GMT)
commita36fb229ba04321be3d0a2472a944c05fea987e9 (patch)
treecc502964c2d72820e19986aad4af3e53b0d89d56
parent3cb7048b521395fdc863dacacb85c6f7f28a1bc7 (diff)
downloadCMake-a36fb229ba04321be3d0a2472a944c05fea987e9.zip
CMake-a36fb229ba04321be3d0a2472a944c05fea987e9.tar.gz
CMake-a36fb229ba04321be3d0a2472a944c05fea987e9.tar.bz2
CUDA: Visual Studio now properly delays device linking
-rw-r--r--Source/cmVisualStudio10TargetGenerator.cxx68
-rw-r--r--Source/cmVisualStudio10TargetGenerator.h6
-rw-r--r--Tests/Cuda/Complex/dynamic.cu2
-rw-r--r--Tests/CudaOnly/SeparateCompilation/CMakeLists.txt27
-rw-r--r--Tests/CudaOnly/SeparateCompilation/main.cu53
5 files changed, 148 insertions, 8 deletions
diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx
index d83662e..9f48825 100644
--- a/Source/cmVisualStudio10TargetGenerator.cxx
+++ b/Source/cmVisualStudio10TargetGenerator.cxx
@@ -116,6 +116,10 @@ cmVisualStudio10TargetGenerator::~cmVisualStudio10TargetGenerator()
i != this->CudaOptions.end(); ++i) {
delete i->second;
}
+ for (OptionsMap::iterator i = this->CudaLinkOptions.begin();
+ i != this->CudaLinkOptions.end(); ++i) {
+ delete i->second;
+ }
if (!this->BuildFileStream) {
return;
}
@@ -213,6 +217,9 @@ void cmVisualStudio10TargetGenerator::Generate()
if (!this->ComputeCudaOptions()) {
return;
}
+ if (!this->ComputeCudaLinkOptions()) {
+ return;
+ }
if (!this->ComputeMasmOptions()) {
return;
}
@@ -2524,6 +2531,66 @@ void cmVisualStudio10TargetGenerator::WriteCudaOptions(
this->WriteString("</CudaCompile>\n", 2);
}
+bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions()
+{
+ if (!this->GlobalGenerator->IsCudaEnabled()) {
+ return true;
+ }
+ for (std::vector<std::string>::const_iterator i =
+ this->Configurations.begin();
+ i != this->Configurations.end(); ++i) {
+ if (!this->ComputeCudaLinkOptions(*i)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
+ std::string const& configName)
+{
+ cmGlobalVisualStudio10Generator* gg =
+ static_cast<cmGlobalVisualStudio10Generator*>(this->GlobalGenerator);
+ CM_AUTO_PTR<Options> pOptions(new Options(
+ this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable()));
+ Options& cudaLinkOptions = *pOptions;
+
+ // Determine if we need to do a device link
+ bool doDeviceLinking = false;
+ switch (this->GeneratorTarget->GetType()) {
+ case cmStateEnums::SHARED_LIBRARY:
+ case cmStateEnums::MODULE_LIBRARY:
+ case cmStateEnums::EXECUTABLE:
+ doDeviceLinking = true;
+ break;
+ default:
+ break;
+ }
+
+ cudaLinkOptions.AddFlag("PerformDeviceLink",
+ doDeviceLinking ? "true" : "false");
+
+ this->CudaLinkOptions[configName] = pOptions.release();
+ return true;
+}
+
+void cmVisualStudio10TargetGenerator::WriteCudaLinkOptions(
+ std::string const& configName)
+{
+ if (this->GeneratorTarget->GetType() > cmStateEnums::MODULE_LIBRARY) {
+ return;
+ }
+
+ if (!this->MSTools || !this->GlobalGenerator->IsCudaEnabled()) {
+ return;
+ }
+
+ this->WriteString("<CudaLink>\n", 2);
+ Options& cudaLinkOptions = *(this->CudaLinkOptions[configName]);
+ cudaLinkOptions.OutputFlagMap(*this->BuildFileStream, " ");
+ this->WriteString("</CudaLink>\n", 2);
+}
+
bool cmVisualStudio10TargetGenerator::ComputeMasmOptions()
{
if (!this->GlobalGenerator->IsMasmEnabled()) {
@@ -3283,6 +3350,7 @@ void cmVisualStudio10TargetGenerator::WriteItemDefinitionGroups()
}
// output link flags <Link></Link>
this->WriteLinkOptions(*i);
+ this->WriteCudaLinkOptions(*i);
// output lib flags <Lib></Lib>
this->WriteLibOptions(*i);
// output manifest flags <Manifest></Manifest>
diff --git a/Source/cmVisualStudio10TargetGenerator.h b/Source/cmVisualStudio10TargetGenerator.h
index bd270bf..6106615 100644
--- a/Source/cmVisualStudio10TargetGenerator.h
+++ b/Source/cmVisualStudio10TargetGenerator.h
@@ -101,6 +101,11 @@ private:
bool ComputeCudaOptions(std::string const& config);
void WriteCudaOptions(std::string const& config,
std::vector<std::string> const& includes);
+
+ bool ComputeCudaLinkOptions();
+ bool ComputeCudaLinkOptions(std::string const& config);
+ void WriteCudaLinkOptions(std::string const& config);
+
bool ComputeMasmOptions();
bool ComputeMasmOptions(std::string const& config);
void WriteMasmOptions(std::string const& config,
@@ -154,6 +159,7 @@ private:
OptionsMap ClOptions;
OptionsMap RcOptions;
OptionsMap CudaOptions;
+ OptionsMap CudaLinkOptions;
OptionsMap MasmOptions;
OptionsMap NasmOptions;
OptionsMap LinkOptions;
diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu
index f677868..a76973d 100644
--- a/Tests/Cuda/Complex/dynamic.cu
+++ b/Tests/Cuda/Complex/dynamic.cu
@@ -37,7 +37,7 @@ EXPORT int choose_cuda_device()
<< std::endl;
return 1;
}
- if (prop.major >= 4) {
+ if (prop.major >= 3) {
err = cudaSetDevice(i);
if (err != cudaSuccess) {
std::cout << "Could not select CUDA device " << i << std::endl;
diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
index 420d7a9..0a2542a 100644
--- a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
+++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
@@ -12,6 +12,7 @@ project (CudaOnlySeparateCompilation CUDA)
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CUDA_STANDARD 11)
+
add_library(CUDASeparateLibA STATIC file1.cu file2.cu file3.cu)
#Having file4/file5 in a shared library causes serious problems
@@ -22,12 +23,24 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
add_executable(CudaOnlySeparateCompilation main.cu)
-target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
+target_link_libraries(CudaOnlySeparateCompilation
+ PRIVATE CUDASeparateLibB)
+
+set_target_properties(CUDASeparateLibA
+ CUDASeparateLibB
+ PROPERTIES CUDA_SEPARABLE_COMPILATION ON
+ POSITION_INDEPENDENT_CODE ON)
-set_target_properties( CUDASeparateLibA
- CUDASeparateLibB
- PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+if (CMAKE_GENERATOR MATCHES "^Visual Studio")
+ #Visual Studio CUDA integration will not perform device linking
+ #on a target that itself does not have GenerateRelocatableDeviceCode
+ #enabled.
+ set_target_properties(CudaOnlySeparateCompilation
+ PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+endif()
-set_target_properties( CUDASeparateLibA
- CUDASeparateLibB
- PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (APPLE)
+ # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+ # the static cuda runtime can find it at runtime.
+ target_link_libraries(CudaOnlySeparateCompilation PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()
diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main.cu
index 03e0921..40dbe5d 100644
--- a/Tests/CudaOnly/SeparateCompilation/main.cu
+++ b/Tests/CudaOnly/SeparateCompilation/main.cu
@@ -7,9 +7,62 @@
int file4_launch_kernel(int x);
int file5_launch_kernel(int x);
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ if (prop.major >= 3) {
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+ << std::endl;
+
+ return 1;
+}
+
int main(int argc, char** argv)
{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ cudaError_t err;
file4_launch_kernel(42);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "file4_launch_kernel: kernel launch failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
file5_launch_kernel(42);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "file5_launch_kernel: kernel launch failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
return 0;
}