diff options
author | Robert Maynard <robert.maynard@kitware.com> | 2017-04-25 20:09:56 (GMT) |
---|---|---|
committer | Robert Maynard <robert.maynard@kitware.com> | 2017-04-26 20:18:25 (GMT) |
commit | 493671a5212c6548b2d7376c7065f5f76692a792 (patch) | |
tree | b6e861120d50f15946603919b3637806a4e93596 /Tests/CudaOnly | |
parent | 8fb85c68bb0090b44df22c27dabc03da63602e5e (diff) | |
download | CMake-493671a5212c6548b2d7376c7065f5f76692a792.zip CMake-493671a5212c6548b2d7376c7065f5f76692a792.tar.gz CMake-493671a5212c6548b2d7376c7065f5f76692a792.tar.bz2 |
CUDA: Static libraries can now explicitly resolve device symbols
If a static library has the property CUDA_RESOLVE_DEVICE_SYMBOLS enabled
it will now perform the device link step. The normal behavior is
to delay calling device link until the static library is consumed by
a shared library or an executable.
Diffstat (limited to 'Tests/CudaOnly')
-rw-r--r-- | Tests/CudaOnly/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt | 52 | ||||
-rw-r--r-- | Tests/CudaOnly/ResolveDeviceSymbols/file1.cu | 10 | ||||
-rw-r--r-- | Tests/CudaOnly/ResolveDeviceSymbols/file1.h | 7 | ||||
-rw-r--r-- | Tests/CudaOnly/ResolveDeviceSymbols/file2.cu | 25 | ||||
-rw-r--r-- | Tests/CudaOnly/ResolveDeviceSymbols/file2.h | 10 | ||||
-rw-r--r-- | Tests/CudaOnly/ResolveDeviceSymbols/main.cu | 85 | ||||
-rw-r--r-- | Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake | 14 |
8 files changed, 204 insertions, 0 deletions
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index a3bd707..5f456fc 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -3,3 +3,4 @@ ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard) ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX) ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs) +ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt new file mode 100644 index 0000000..b96bb98 --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 3.7) +project (CudaOnlyResolveDeviceSymbols CUDA) + +# Find nm and dumpbin +if(CMAKE_NM) + set(dump_command ${CMAKE_NM}) + set(dump_args -g) +else() + include(GetPrerequisites) + message(STATUS "calling list_prerequisites to find dumpbin") + list_prerequisites("${CMAKE_COMMAND}" 0 0 0) + if(gp_dumpbin) + set(dump_command ${gp_dumpbin}) + set(dump_args /ARCHIVEMEMBERS) + endif() +endif() + +#Goal for this example: +#Build a static library that defines multiple methods and kernels that +#use each other. +#Use a custom command to build an executable that uses this static library +#We do these together to verify that we can get a static library to do +#device symbol linking, and not have it done when the executable is made +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_30,code=compute_30") +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CUDA_STANDARD 11) + +add_library(CUDAResolveDeviceLib STATIC file1.cu file2.cu) +set_target_properties(CUDAResolveDeviceLib + PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + CUDA_RESOLVE_DEVICE_SYMBOLS ON + POSITION_INDEPENDENT_CODE ON) + +if(dump_command) +add_custom_command(TARGET CUDAResolveDeviceLib POST_BUILD + COMMAND ${CMAKE_COMMAND} + -DDUMP_COMMAND=${dump_command} + -DDUMP_ARGS=${dump_args} + -DTEST_LIBRARY_PATH=$<TARGET_FILE:CUDAResolveDeviceLib> + -P ${CMAKE_CURRENT_SOURCE_DIR}/verify.cmake + ) +endif() + +add_executable(CudaOnlyResolveDeviceSymbols main.cu) +target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE CUDAResolveDeviceLib) + +if(APPLE) + # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that + # the static cuda runtime can find it at runtime. + target_link_libraries(CudaOnlyResolveDeviceSymbols PRIVATE -Wl,-rpath,/usr/local/cuda/lib) +endif() diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu new file mode 100644 index 0000000..1ce63bf --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.cu @@ -0,0 +1,10 @@ + +#include "file1.h" + +result_type __device__ file1_func(int x) +{ + result_type r; + r.input = x; + r.sum = x * x; + return r; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file1.h b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h new file mode 100644 index 0000000..ff1945c --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file1.h @@ -0,0 +1,7 @@ + +#pragma once +struct result_type +{ + int input; + int sum; +}; diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu new file mode 100644 index 0000000..278fd6c --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.cu @@ -0,0 +1,25 @@ + +#include "file2.h" + +result_type __device__ file1_func(int x); + +result_type_dynamic __device__ file2_func(int x) +{ + const result_type r = file1_func(x); + const result_type_dynamic rd{ r.input, r.sum, true }; + return rd; +} + +static __global__ void file2_kernel(result_type_dynamic& r, int x) +{ + // call static_func which is a method that is defined in the + // static library that is always out of date + r = file2_func(x); +} + +int file2_launch_kernel(int x) +{ + result_type_dynamic r; + file2_kernel<<<1, 1>>>(r, x); + return r.sum; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/file2.h b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h new file mode 100644 index 0000000..d2dbaa4 --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/file2.h @@ -0,0 +1,10 @@ + +#pragma once +#include "file1.h" + +struct result_type_dynamic +{ + int input; + int sum; + bool from_static; +}; diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/main.cu b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu new file mode 100644 index 0000000..b4b5b9e --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/main.cu @@ -0,0 +1,85 @@ + +#include <iostream> + +#include "file1.h" +#include "file2.h" + +int file2_launch_kernel(int x); + +result_type_dynamic __device__ file2_func(int x); +static __global__ void main_kernel(result_type_dynamic& r, int x) +{ + // call function that was not device linked to us, this will cause + // a runtime failure of "invalid device function" + r = file2_func(x); +} + +int main_launch_kernel(int x) +{ + result_type_dynamic r; + main_kernel<<<1, 1>>>(r, x); + return r.sum; +} + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + std::cout << "prop.major: " << prop.major << std::endl; + if (prop.major >= 3) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.0" + << std::endl; + + return 1; +} + +int main(int argc, char** argv) +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + cudaError_t err; + file2_launch_kernel(42); + err = cudaGetLastError(); + if (err != cudaSuccess) { + std::cerr << "file2_launch_kernel: kernel launch failed: " + << cudaGetErrorString(err) << std::endl; + return 1; + } + + main_launch_kernel(1); + err = cudaGetLastError(); + if (err == cudaSuccess) { + // This kernel launch should fail as the file2_func was device linked + // into the static library and is not usable by the executable + std::cerr << "main_launch_kernel: kernel launch should have failed" + << std::endl; + return 1; + } + + return 0; +} diff --git a/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake new file mode 100644 index 0000000..94d388b --- /dev/null +++ b/Tests/CudaOnly/ResolveDeviceSymbols/verify.cmake @@ -0,0 +1,14 @@ +execute_process(COMMAND ${DUMP_COMMAND} ${DUMP_ARGS} ${TEST_LIBRARY_PATH} + RESULT_VARIABLE RESULT + OUTPUT_VARIABLE OUTPUT + ERROR_VARIABLE ERROR +) + +if(NOT "${RESULT}" STREQUAL "0") + message(FATAL_ERROR "${DUMP_COMMAND} failed [${RESULT}] [${OUTPUT}] [${ERROR}]") +endif() + +if(NOT "${OUTPUT}" MATCHES "(cmake_device_link|device-link)") + message(FATAL_ERROR + "No cuda device objects found, device linking did not occur") +endif() |