summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Help/manual/cmake-properties.7.rst1
-rw-r--r--Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst15
-rw-r--r--Source/cmLinkLineDeviceComputer.cxx21
-rw-r--r--Source/cmMakefileExecutableTargetGenerator.cxx6
-rw-r--r--Source/cmMakefileLibraryTargetGenerator.cxx36
-rw-r--r--Source/cmNinjaNormalTargetGenerator.cxx7
-rw-r--r--Source/cmVisualStudio10TargetGenerator.cxx72
-rw-r--r--Source/cmVisualStudio10TargetGenerator.h6
-rw-r--r--Tests/Cuda/Complex/dynamic.cu2
-rw-r--r--Tests/CudaOnly/CMakeLists.txt1
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt52
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file1.cu10
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file1.h7
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file2.cu25
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file2.h10
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/main.cu85
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake14
-rw-r--r--Tests/CudaOnly/SeparateCompilation/CMakeLists.txt27
-rw-r--r--Tests/CudaOnly/SeparateCompilation/main.cu53
19 files changed, 435 insertions, 15 deletions
diff --git a/Help/manual/cmake-properties.7.rst b/Help/manual/cmake-properties.7.rst
index 344bc09..38cc0d8 100644
--- a/Help/manual/cmake-properties.7.rst
+++ b/Help/manual/cmake-properties.7.rst
@@ -154,6 +154,7 @@ Properties on Targets
/prop_tgt/CROSSCOMPILING_EMULATOR
/prop_tgt/CUDA_PTX_COMPILATION
/prop_tgt/CUDA_SEPARABLE_COMPILATION
+ /prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS
/prop_tgt/CUDA_EXTENSIONS
/prop_tgt/CUDA_STANDARD
/prop_tgt/CUDA_STANDARD_REQUIRED
diff --git a/Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst b/Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst
new file mode 100644
index 0000000..127d79f
--- /dev/null
+++ b/Help/prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS.rst
@@ -0,0 +1,15 @@
+CUDA_RESOLVE_DEVICE_SYMBOLS
+---------------------------
+
+CUDA only: Enables device linking for the specific static library target
+
+If set this will enable device linking on this static library target. Normally
+device linking is deferred until a shared library or executable is generated,
+allowing for multiple static libraries to resolve device symbols at the same
+time.
+
+For instance:
+
+.. code-block:: cmake
+
+ set_property(TARGET mystaticlib PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
diff --git a/Source/cmLinkLineDeviceComputer.cxx b/Source/cmLinkLineDeviceComputer.cxx
index 6789555..bf30b39 100644
--- a/Source/cmLinkLineDeviceComputer.cxx
+++ b/Source/cmLinkLineDeviceComputer.cxx
@@ -39,9 +39,24 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries(
continue;
}
- if (li->Target->GetType() == cmStateEnums::INTERFACE_LIBRARY ||
- li->Target->GetType() == cmStateEnums::SHARED_LIBRARY ||
- li->Target->GetType() == cmStateEnums::MODULE_LIBRARY) {
+ bool skippable = false;
+ switch (li->Target->GetType()) {
+ case cmStateEnums::SHARED_LIBRARY:
+ case cmStateEnums::MODULE_LIBRARY:
+ case cmStateEnums::INTERFACE_LIBRARY:
+ skippable = true;
+ break;
+ case cmStateEnums::STATIC_LIBRARY:
+ // If a static library is resolving its device linking, it should
+ // be removed for other device linking
+ skippable =
+ li->Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
+ break;
+ default:
+ break;
+ }
+
+ if (skippable) {
continue;
}
diff --git a/Source/cmMakefileExecutableTargetGenerator.cxx b/Source/cmMakefileExecutableTargetGenerator.cxx
index a93b42d..a719887 100644
--- a/Source/cmMakefileExecutableTargetGenerator.cxx
+++ b/Source/cmMakefileExecutableTargetGenerator.cxx
@@ -122,7 +122,11 @@ void cmMakefileExecutableTargetGenerator::WriteDeviceExecutableRule(
std::string buildEcho = "Linking ";
buildEcho += linkLanguage;
buildEcho += " device code ";
- buildEcho += targetOutputReal;
+ buildEcho += this->LocalGenerator->ConvertToOutputFormat(
+ this->LocalGenerator->MaybeConvertToRelativePath(
+ this->LocalGenerator->GetCurrentBinaryDirectory(),
+ this->DeviceLinkObject),
+ cmOutputConverter::SHELL);
this->LocalGenerator->AppendEcho(
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
}
diff --git a/Source/cmMakefileLibraryTargetGenerator.cxx b/Source/cmMakefileLibraryTargetGenerator.cxx
index e017b29..2823977 100644
--- a/Source/cmMakefileLibraryTargetGenerator.cxx
+++ b/Source/cmMakefileLibraryTargetGenerator.cxx
@@ -127,6 +127,24 @@ void cmMakefileLibraryTargetGenerator::WriteObjectLibraryRules()
void cmMakefileLibraryTargetGenerator::WriteStaticLibraryRules()
{
+ const std::string cuda_lang("CUDA");
+ cmGeneratorTarget::LinkClosure const* closure =
+ this->GeneratorTarget->GetLinkClosure(this->ConfigName);
+
+ const bool hasCUDA =
+ (std::find(closure->Languages.begin(), closure->Languages.end(),
+ cuda_lang) != closure->Languages.end());
+
+ const bool resolveDeviceSymbols =
+ this->GeneratorTarget->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
+ if (hasCUDA && resolveDeviceSymbols) {
+ std::string linkRuleVar = "CMAKE_CUDA_DEVICE_LINK_LIBRARY";
+ std::string extraFlags;
+ this->LocalGenerator->AppendFlags(
+ extraFlags, this->GeneratorTarget->GetProperty("LINK_FLAGS"));
+ this->WriteDeviceLibraryRules(linkRuleVar, extraFlags, false);
+ }
+
std::string linkLanguage =
this->GeneratorTarget->GetLinkerLanguage(this->ConfigName);
@@ -292,8 +310,12 @@ void cmMakefileLibraryTargetGenerator::WriteDeviceLibraryRules(
cmLocalUnixMakefileGenerator3::EchoProgress progress;
this->MakeEchoProgress(progress);
// Add the link message.
- std::string buildEcho = "Linking " + linkLanguage + " device code";
- buildEcho += targetOutputReal;
+ std::string buildEcho = "Linking " + linkLanguage + " device code ";
+ buildEcho += this->LocalGenerator->ConvertToOutputFormat(
+ this->LocalGenerator->MaybeConvertToRelativePath(
+ this->LocalGenerator->GetCurrentBinaryDirectory(),
+ this->DeviceLinkObject),
+ cmOutputConverter::SHELL);
this->LocalGenerator->AppendEcho(
commands, buildEcho, cmLocalUnixMakefileGenerator3::EchoLink, &progress);
}
@@ -857,6 +879,16 @@ void cmMakefileLibraryTargetGenerator::WriteLibraryRules(
std::vector<std::string> object_strings;
this->WriteObjectsStrings(object_strings, archiveCommandLimit);
+ // Add the cuda device object to the list of archive files. This will
+ // only occur on archives which have CUDA_RESOLVE_DEVICE_SYMBOLS enabled
+ if (!this->DeviceLinkObject.empty()) {
+ object_strings.push_back(this->LocalGenerator->ConvertToOutputFormat(
+ this->LocalGenerator->MaybeConvertToRelativePath(
+ this->LocalGenerator->GetCurrentBinaryDirectory(),
+ this->DeviceLinkObject),
+ cmOutputConverter::SHELL));
+ }
+
// Create the archive with the first set of objects.
std::vector<std::string>::iterator osi = object_strings.begin();
{
diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx
index cfc91bd..8206083 100644
--- a/Source/cmNinjaNormalTargetGenerator.cxx
+++ b/Source/cmNinjaNormalTargetGenerator.cxx
@@ -447,6 +447,7 @@ std::vector<std::string> cmNinjaNormalTargetGenerator::ComputeDeviceLinkCmd()
// an executable or a dynamic library.
std::string linkCmd;
switch (this->GetGeneratorTarget()->GetType()) {
+ case cmStateEnums::STATIC_LIBRARY:
case cmStateEnums::SHARED_LIBRARY:
case cmStateEnums::MODULE_LIBRARY: {
const std::string cudaLinkCmd(
@@ -559,11 +560,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatement()
case cmStateEnums::EXECUTABLE:
shouldHaveDeviceLinking = true;
break;
+ case cmStateEnums::STATIC_LIBRARY:
+ shouldHaveDeviceLinking =
+ genTarget.GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS");
+ break;
default:
break;
}
- if (!shouldHaveDeviceLinking || !hasCUDA) {
+ if (!(shouldHaveDeviceLinking && hasCUDA)) {
return;
}
diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx
index d83662e..5a09718 100644
--- a/Source/cmVisualStudio10TargetGenerator.cxx
+++ b/Source/cmVisualStudio10TargetGenerator.cxx
@@ -116,6 +116,10 @@ cmVisualStudio10TargetGenerator::~cmVisualStudio10TargetGenerator()
i != this->CudaOptions.end(); ++i) {
delete i->second;
}
+ for (OptionsMap::iterator i = this->CudaLinkOptions.begin();
+ i != this->CudaLinkOptions.end(); ++i) {
+ delete i->second;
+ }
if (!this->BuildFileStream) {
return;
}
@@ -213,6 +217,9 @@ void cmVisualStudio10TargetGenerator::Generate()
if (!this->ComputeCudaOptions()) {
return;
}
+ if (!this->ComputeCudaLinkOptions()) {
+ return;
+ }
if (!this->ComputeMasmOptions()) {
return;
}
@@ -2524,6 +2531,70 @@ void cmVisualStudio10TargetGenerator::WriteCudaOptions(
this->WriteString("</CudaCompile>\n", 2);
}
+bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions()
+{
+ if (!this->GlobalGenerator->IsCudaEnabled()) {
+ return true;
+ }
+ for (std::vector<std::string>::const_iterator i =
+ this->Configurations.begin();
+ i != this->Configurations.end(); ++i) {
+ if (!this->ComputeCudaLinkOptions(*i)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool cmVisualStudio10TargetGenerator::ComputeCudaLinkOptions(
+ std::string const& configName)
+{
+ cmGlobalVisualStudio10Generator* gg =
+ static_cast<cmGlobalVisualStudio10Generator*>(this->GlobalGenerator);
+ CM_AUTO_PTR<Options> pOptions(new Options(
+ this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable()));
+ Options& cudaLinkOptions = *pOptions;
+
+ // Determine if we need to do a device link
+ bool doDeviceLinking = false;
+ switch (this->GeneratorTarget->GetType()) {
+ case cmStateEnums::SHARED_LIBRARY:
+ case cmStateEnums::MODULE_LIBRARY:
+ case cmStateEnums::EXECUTABLE:
+ doDeviceLinking = true;
+ break;
+ case cmStateEnums::STATIC_LIBRARY:
+ doDeviceLinking = this->GeneratorTarget->GetPropertyAsBool(
+ "CUDA_RESOLVE_DEVICE_SYMBOLS");
+ break;
+ default:
+ break;
+ }
+
+ cudaLinkOptions.AddFlag("PerformDeviceLink",
+ doDeviceLinking ? "true" : "false");
+
+ this->CudaLinkOptions[configName] = pOptions.release();
+ return true;
+}
+
+void cmVisualStudio10TargetGenerator::WriteCudaLinkOptions(
+ std::string const& configName)
+{
+ if (this->GeneratorTarget->GetType() > cmStateEnums::MODULE_LIBRARY) {
+ return;
+ }
+
+ if (!this->MSTools || !this->GlobalGenerator->IsCudaEnabled()) {
+ return;
+ }
+
+ this->WriteString("<CudaLink>\n", 2);
+ Options& cudaLinkOptions = *(this->CudaLinkOptions[configName]);
+ cudaLinkOptions.OutputFlagMap(*this->BuildFileStream, " ");
+ this->WriteString("</CudaLink>\n", 2);
+}
+
bool cmVisualStudio10TargetGenerator::ComputeMasmOptions()
{
if (!this->GlobalGenerator->IsMasmEnabled()) {
@@ -3283,6 +3354,7 @@ void cmVisualStudio10TargetGenerator::WriteItemDefinitionGroups()
}
// output link flags <Link></Link>
this->WriteLinkOptions(*i);
+ this->WriteCudaLinkOptions(*i);
// output lib flags <Lib></Lib>
this->WriteLibOptions(*i);
// output manifest flags <Manifest></Manifest>
diff --git a/Source/cmVisualStudio10TargetGenerator.h b/Source/cmVisualStudio10TargetGenerator.h
index bd270bf..6106615 100644
--- a/Source/cmVisualStudio10TargetGenerator.h
+++ b/Source/cmVisualStudio10TargetGenerator.h
@@ -101,6 +101,11 @@ private:
bool ComputeCudaOptions(std::string const& config);
void WriteCudaOptions(std::string const& config,
std::vector<std::string> const& includes);
+
+ bool ComputeCudaLinkOptions();
+ bool ComputeCudaLinkOptions(std::string const& config);
+ void WriteCudaLinkOptions(std::string const& config);
+
bool ComputeMasmOptions();
bool ComputeMasmOptions(std::string const& config);
void WriteMasmOptions(std::string const& config,
@@ -154,6 +159,7 @@ private:
OptionsMap ClOptions;
OptionsMap RcOptions;
OptionsMap CudaOptions;
+ OptionsMap CudaLinkOptions;
OptionsMap MasmOptions;
OptionsMap NasmOptions;
OptionsMap LinkOptions;
diff --git a/Tests/Cuda/Complex/dynamic.cu b/Tests/Cuda/Complex/dynamic.cu
index f677868..a76973d 100644
--- a/Tests/Cuda/Complex/dynamic.cu
+++ b/Tests/Cuda/Complex/dynamic.cu
@@ -37,7 +37,7 @@ EXPORT int choose_cuda_device()
<< std::endl;
return 1;
}
- if (prop.major >= 4) {
+ if (prop.major >= 3) {
err = cudaSetDevice(i);
if (err != cudaSuccess) {
std::cout << "Could not select CUDA device " << i << std::endl;
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
index a3bd707..5f456fc 100644
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -3,3 +3,4 @@ ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
+ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
new file mode 100644
index 0000000..b96bb98
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
@@ -0,0 +1,52 @@
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyResolveDeviceSymbols CUDA)
+
+# Find nm and dumpbin
+if(CMAKE_NM)
+ set(dump_command ${CMAKE_NM})
+ set(dump_args -g)
+else()
+ include(GetPrerequisites)
+ message(STATUS "calling list_prerequisites to find dumpbin")
+ list_prerequisites("${CMAKE_COMMAND}" 0 0 0)
+ if(gp_dumpbin)
+ set(dump_command ${gp_dumpbin})
+ set(dump_args /ARCHIVEMEMBERS)
+ endif()
+endif()
+
+#Goal for this example:
+#Build a static library that defines multiple methods and kernels that
+#use each other.
+#Use a custom command to build an executable that uses this static library
+#We do these together to verify that we can get a static library to do
+#device symbol linking, and not have it done when the executable is made
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+
+add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu)
+set_target_properties(CUDAResolveDeviceLib
+ PROPERTIES
+ CUDA_SEPARABLE_COMPILATION ON
+ CUDA_RESOLVE_DEVICE_SYMBOLS ON
+ POSITION_INDEPENDENT_CODE ON)
+
+if(dump_command)
+add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD
+ COMMAND ${CMAKE_COMMAND}
+ -DDUMP_COMMAND=${dump_command}
+ -DDUMP_ARGS=${dump_args}
+ -DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDAResolveDeviceLib>
+ -P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake
+ )
+endif()
+
+add_executable(CudaOnlyResolveDeviceSymbols main.cu)
+target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib)
+
+if(APPLE)
+ # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+ # the static cuda runtime can find it at runtime.
+ target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu
new file mode 100644
index 0000000..1ce63bf
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu
@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+ result_type r;
+ r.input = x;
+ r.sum = x * x;
+ return r;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
new file mode 100644
index 0000000..ff1945c
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+ int input;
+ int sum;
+};
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
new file mode 100644
index 0000000..278fd6c
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
@@ -0,0 +1,25 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+ const result_type r = file1_func(x);
+ const result_type_dynamic rd{ r.input, r.sum, true };
+ return rd;
+}
+
+static __global__ void file2_kernel(result_type_dynamic& r, int x)
+{
+ // call static_func which is a method that is defined in the
+ // static library that is always out of date
+ r = file2_func(x);
+}
+
+int file2_launch_kernel(int x)
+{
+ result_type_dynamic r;
+ file2_kernel<<<1, 1>>>(r, x);
+ return r.sum;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
new file mode 100644
index 0000000..d2dbaa4
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+ int input;
+ int sum;
+ bool from_static;
+};
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
new file mode 100644
index 0000000..b4b5b9e
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
@@ -0,0 +1,85 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+int file2_launch_kernel(int x);
+
+result_type_dynamic __device__ file2_func(int x);
+static __global__ void main_kernel(result_type_dynamic& r, int x)
+{
+ // call function that was not device linked to us, this will cause
+ // a runtime failure of "invalid device function"
+ r = file2_func(x);
+}
+
+int main_launch_kernel(int x)
+{
+ result_type_dynamic r;
+ main_kernel<<<1, 1>>>(r, x);
+ return r.sum;
+}
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ std::cout << "prop.major: " << prop.major << std::endl;
+ if (prop.major >= 3) {
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+ << std::endl;
+
+ return 1;
+}
+
+int main(int argc, char** argv)
+{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ cudaError_t err;
+ file2_launch_kernel(42);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "file2_launch_kernel: kernel launch failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
+ main_launch_kernel(1);
+ err = cudaGetLastError();
+ if (err == cudaSuccess) {
+ // This kernel launch should fail as the file2_func was device linked
+ // into the static library and is not usable by the executable
+ std::cerr << "main_launch_kernel: kernel launch should have failed"
+ << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake
new file mode 100644
index 0000000..94d388b
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake
@@ -0,0 +1,14 @@
+execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH}
+ RESULT_VARIABLE RESULT
+ OUTPUT_VARIABLE OUTPUT
+ ERROR_VARIABLE ERROR
+)
+
+if(NOT "${RESULT}" STREQUAL "0")
+ message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]")
+endif()
+
+if(NOT "${OUTPUT}" MATCHES "(cmake_device_link|device-link)")
+ message(FATAL_ERROR
+ "No cuda device objects found, device linking did not occur")
+endif()
diff --git a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
index 420d7a9..0a2542a 100644
--- a/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
+++ b/Tests/CudaOnly/SeparateCompilation/CMakeLists.txt
@@ -12,6 +12,7 @@ project (CudaOnlySeparateCompilation CUDA)
string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CUDA_STANDARD 11)
+
add_library(CUDASeparateLibA STATIC file1.cu file2.cu file3.cu)
#Having file4/file5 in a shared library causes serious problems
@@ -22,12 +23,24 @@ add_library(CUDASeparateLibB STATIC file4.cu file5.cu)
target_link_libraries(CUDASeparateLibB PRIVATE CUDASeparateLibA)
add_executable(CudaOnlySeparateCompilation main.cu)
-target_link_libraries(CudaOnlySeparateCompilation PRIVATE CUDASeparateLibB)
+target_link_libraries(CudaOnlySeparateCompilation
+ PRIVATE CUDASeparateLibB)
+
+set_target_properties(CUDASeparateLibA
+ CUDASeparateLibB
+ PROPERTIES CUDA_SEPARABLE_COMPILATION ON
+ POSITION_INDEPENDENT_CODE ON)
-set_target_properties( CUDASeparateLibA
- CUDASeparateLibB
- PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+if (CMAKE_GENERATOR MATCHES "^Visual Studio")
+ #Visual Studio CUDA integration will not perform device linking
+ #on a target that itself does not have GenerateRelocatableDeviceCode
+ #enabled.
+ set_target_properties(CudaOnlySeparateCompilation
+ PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+endif()
-set_target_properties( CUDASeparateLibA
- CUDASeparateLibB
- PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (APPLE)
+ # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+ # the static cuda runtime can find it at runtime.
+ target_link_libraries(CudaOnlySeparateCompilation PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()
diff --git a/Tests/CudaOnly/SeparateCompilation/main.cu b/Tests/CudaOnly/SeparateCompilation/main.cu
index 03e0921..40dbe5d 100644
--- a/Tests/CudaOnly/SeparateCompilation/main.cu
+++ b/Tests/CudaOnly/SeparateCompilation/main.cu
@@ -7,9 +7,62 @@
int file4_launch_kernel(int x);
int file5_launch_kernel(int x);
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ if (prop.major >= 3) {
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+ << std::endl;
+
+ return 1;
+}
+
int main(int argc, char** argv)
{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ cudaError_t err;
file4_launch_kernel(42);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "file4_launch_kernel: kernel launch failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
file5_launch_kernel(42);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "file5_launch_kernel: kernel launch failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
return 0;
}