summaryrefslogtreecommitdiffstats
path: root/Tests/CudaOnly
diff options
context:
space:
mode:
authorRobert Maynard <robert.maynard@kitware.com>2017-04-25 20:09:56 (GMT)
committerRobert Maynard <robert.maynard@kitware.com>2017-04-26 20:18:25 (GMT)
commit493671a5212c6548b2d7376c7065f5f76692a792 (patch)
treeb6e861120d50f15946603919b3637806a4e93596 /Tests/CudaOnly
parent8fb85c68bb0090b44df22c27dabc03da63602e5e (diff)
downloadCMake-493671a5212c6548b2d7376c7065f5f76692a792.zip
CMake-493671a5212c6548b2d7376c7065f5f76692a792.tar.gz
CMake-493671a5212c6548b2d7376c7065f5f76692a792.tar.bz2
CUDA: Static libraries can now explicitly resolve device symbols
If a static library has the property CUDA_RESOLVE_DEVICE_SYMBOLS enabled it will now perform the device link step. The normal behavior is to delay calling device link until the static library is consumed by a shared library or an executable.
Diffstat (limited to 'Tests/CudaOnly')
-rw-r--r--Tests/CudaOnly/CMakeLists.txt1
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt52
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file1.cu10
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file1.h7
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file2.cu25
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/file2.h10
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/main.cu85
-rw-r--r--Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake14
8 files changed, 204 insertions, 0 deletions
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
index a3bd707..5f456fc 100644
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -3,3 +3,4 @@ ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
+ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
new file mode 100644
index 0000000..b96bb98
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt
@@ -0,0 +1,52 @@
+cmake_minimum_required(VERSION 3.7)
+project (CudaOnlyResolveDeviceSymbols CUDA)
+
+# Find nm and dumpbin
+if(CMAKE_NM)
+ set(dump_command ${CMAKE_NM})
+ set(dump_args -g)
+else()
+ include(GetPrerequisites)
+ message(STATUS "calling list_prerequisites to find dumpbin")
+ list_prerequisites("${CMAKE_COMMAND}" 0 0 0)
+ if(gp_dumpbin)
+ set(dump_command ${gp_dumpbin})
+ set(dump_args /ARCHIVEMEMBERS)
+ endif()
+endif()
+
+#Goal for this example:
+#Build a static library that defines multiple methods and kernels that
+#use each other.
+#Use a custom command to build an executable that uses this static library
+#We do these together to verify that we can get a static library to do
+#device symbol linking, and not have it done when the executable is made
+string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30")
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+
+add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu)
+set_target_properties(CUDAResolveDeviceLib
+ PROPERTIES
+ CUDA_SEPARABLE_COMPILATION ON
+ CUDA_RESOLVE_DEVICE_SYMBOLS ON
+ POSITION_INDEPENDENT_CODE ON)
+
+if(dump_command)
+add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD
+ COMMAND ${CMAKE_COMMAND}
+ -DDUMP_COMMAND=${dump_command}
+ -DDUMP_ARGS=${dump_args}
+ -DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDAResolveDeviceLib>
+ -P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake
+ )
+endif()
+
+add_executable(CudaOnlyResolveDeviceSymbols main.cu)
+target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib)
+
+if(APPLE)
+ # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
+ # the static cuda runtime can find it at runtime.
+ target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE -Wl,-rpath,/usr/local/cuda/lib)
+endif()
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu
new file mode 100644
index 0000000..1ce63bf
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu
@@ -0,0 +1,10 @@
+
+#include "file1.h"
+
+result_type __device__ file1_func(int x)
+{
+ result_type r;
+ r.input = x;
+ r.sum = x * x;
+ return r;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
new file mode 100644
index 0000000..ff1945c
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h
@@ -0,0 +1,7 @@
+
+#pragma once
+struct result_type
+{
+ int input;
+ int sum;
+};
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
new file mode 100644
index 0000000..278fd6c
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu
@@ -0,0 +1,25 @@
+
+#include "file2.h"
+
+result_type __device__ file1_func(int x);
+
+result_type_dynamic __device__ file2_func(int x)
+{
+ const result_type r = file1_func(x);
+ const result_type_dynamic rd{ r.input, r.sum, true };
+ return rd;
+}
+
+static __global__ void file2_kernel(result_type_dynamic& r, int x)
+{
+ // call static_func which is a method that is defined in the
+ // static library that is always out of date
+ r = file2_func(x);
+}
+
+int file2_launch_kernel(int x)
+{
+ result_type_dynamic r;
+ file2_kernel<<<1, 1>>>(r, x);
+ return r.sum;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
new file mode 100644
index 0000000..d2dbaa4
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h
@@ -0,0 +1,10 @@
+
+#pragma once
+#include "file1.h"
+
+struct result_type_dynamic
+{
+ int input;
+ int sum;
+ bool from_static;
+};
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
new file mode 100644
index 0000000..b4b5b9e
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu
@@ -0,0 +1,85 @@
+
+#include <iostream>
+
+#include "file1.h"
+#include "file2.h"
+
+int file2_launch_kernel(int x);
+
+result_type_dynamic __device__ file2_func(int x);
+static __global__ void main_kernel(result_type_dynamic& r, int x)
+{
+ // call function that was not device linked to us, this will cause
+ // a runtime failure of "invalid device function"
+ r = file2_func(x);
+}
+
+int main_launch_kernel(int x)
+{
+ result_type_dynamic r;
+ main_kernel<<<1, 1>>>(r, x);
+ return r.sum;
+}
+
+int choose_cuda_device()
+{
+ int nDevices = 0;
+ cudaError_t err = cudaGetDeviceCount(&nDevices);
+ if (err != cudaSuccess) {
+ std::cerr << "Failed to retrieve the number of CUDA enabled devices"
+ << std::endl;
+ return 1;
+ }
+ for (int i = 0; i < nDevices; ++i) {
+ cudaDeviceProp prop;
+ cudaError_t err = cudaGetDeviceProperties(&prop, i);
+ if (err != cudaSuccess) {
+ std::cerr << "Could not retrieve properties from CUDA device " << i
+ << std::endl;
+ return 1;
+ }
+ std::cout << "prop.major: " << prop.major << std::endl;
+ if (prop.major >= 3) {
+ err = cudaSetDevice(i);
+ if (err != cudaSuccess) {
+ std::cout << "Could not select CUDA device " << i << std::endl;
+ } else {
+ return 0;
+ }
+ }
+ }
+
+ std::cout << "Could not find a CUDA enabled card supporting compute >=3.0"
+ << std::endl;
+
+ return 1;
+}
+
+int main(int argc, char** argv)
+{
+ int ret = choose_cuda_device();
+ if (ret) {
+ return 0;
+ }
+
+ cudaError_t err;
+ file2_launch_kernel(42);
+ err = cudaGetLastError();
+ if (err != cudaSuccess) {
+ std::cerr << "file2_launch_kernel: kernel launch failed: "
+ << cudaGetErrorString(err) << std::endl;
+ return 1;
+ }
+
+ main_launch_kernel(1);
+ err = cudaGetLastError();
+ if (err == cudaSuccess) {
+ // This kernel launch should fail as the file2_func was device linked
+ // into the static library and is not usable by the executable
+ std::cerr << "main_launch_kernel: kernel launch should have failed"
+ << std::endl;
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake
new file mode 100644
index 0000000..94d388b
--- /dev/null
+++ b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake
@@ -0,0 +1,14 @@
+execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH}
+ RESULT_VARIABLE RESULT
+ OUTPUT_VARIABLE OUTPUT
+ ERROR_VARIABLE ERROR
+)
+
+if(NOT "${RESULT}" STREQUAL "0")
+ message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]")
+endif()
+
+if(NOT "${OUTPUT}" MATCHES "(cmake_device_link|device-link)")
+ message(FATAL_ERROR
+ "No cuda device objects found, device linking did not occur")
+endif()