diff options
author | Robert Maynard <rmaynard@nvidia.com> | 2021-09-17 20:39:13 (GMT) |
---|---|---|
committer | unknown <rmaynard@nvidia.com> | 2021-10-20 15:18:06 (GMT) |
commit | 61b9764b03517276e99584de02d52ceefa5689ec (patch) | |
tree | 663e5dee6f66a2ef958002b209437b659ad02dca /Tests | |
parent | 3b5e1b53eae726bb4105240143354270df4495e3 (diff) | |
download | CMake-61b9764b03517276e99584de02d52ceefa5689ec.zip CMake-61b9764b03517276e99584de02d52ceefa5689ec.tar.gz CMake-61b9764b03517276e99584de02d52ceefa5689ec.tar.bz2 |
CUDA: Allow both CUDA_SEPARABLE_COMPILATION and CUDA_PTX_COMPILATION
The target properties `CUDA_SEPARABLE_COMPILATION` and `CUDA_PTX_COMPILATION`
now aren't mutually exclusive and can now be used together on the same
target.
Diffstat (limited to 'Tests')
-rw-r--r-- | Tests/CudaOnly/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Tests/CudaOnly/ExportPTX/CMakeLists.txt | 2 | ||||
-rw-r--r-- | Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt | 51 | ||||
-rw-r--r-- | Tests/CudaOnly/SeparateCompilationPTX/kernels.cu | 14 | ||||
-rw-r--r-- | Tests/CudaOnly/SeparateCompilationPTX/main.cu | 30 | ||||
-rw-r--r-- | Tests/CudaOnly/utils/bin2c_wrapper.cmake (renamed from Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake) | 0 |
6 files changed, 97 insertions, 1 deletions
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index a3fb409..65dfebb 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -16,6 +16,7 @@ add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs) add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine) add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation) +add_cuda_test_macro(CudaOnly.SeparateCompilationPTX CudaOnlySeparateCompilationPTX) if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") # Clang doesn't have flags for selecting the runtime. diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt index e7e7bc4..f1667af 100644 --- a/Tests/CudaOnly/ExportPTX/CMakeLists.txt +++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt @@ -56,7 +56,7 @@ add_custom_command( "-DBIN_TO_C_COMMAND=${bin_to_c}" "-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>" "-DOUTPUT=${output_file}" - -P ${CMAKE_CURRENT_SOURCE_DIR}/bin2c_wrapper.cmake + -P ${CMAKE_CURRENT_SOURCE_DIR}/../utils/bin2c_wrapper.cmake VERBATIM DEPENDS $<TARGET_OBJECTS:CudaPTX> COMMENT "Converting Object files to a C header" diff --git a/Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt new file mode 100644 index 0000000..273f955 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.19) +project (SeparateCompPTX CUDA) + +#Goal for this example: +# How to generate PTX files with RDC enabled + +# PTX can be compiled only for a single virtual architecture at a time +list(POP_FRONT CMAKE_CUDA_ARCHITECTURES temp) +set(CMAKE_CUDA_ARCHITECTURES ${temp}) +string(APPEND CMAKE_CUDA_ARCHITECTURES "-virtual") + +add_library(CudaPTX OBJECT kernels.cu) +set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON) +set_property(TARGET CudaPTX PROPERTY CUDA_SEPARABLE_COMPILATION ON) + + +set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h) + +find_package(CUDAToolkit REQUIRED) +find_program(bin_to_c + NAMES bin2c + PATHS ${CUDAToolkit_BIN_DIR} + ) +if(NOT bin_to_c) + message(FATAL_ERROR + "bin2c not found:\n" + " CUDAToolkit_BIN_DIR='${CUDAToolkit_BIN_DIR}'\n" + ) +endif() + +add_custom_command( + OUTPUT "${output_file}" + COMMAND ${CMAKE_COMMAND} + "-DBIN_TO_C_COMMAND=${bin_to_c}" + "-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>" + "-DOUTPUT=${output_file}" + -P ${CMAKE_CURRENT_SOURCE_DIR}/../utils/bin2c_wrapper.cmake + VERBATIM + DEPENDS $<TARGET_OBJECTS:CudaPTX> + COMMENT "Converting Object files to a C header" + ) + +add_executable(CudaOnlySeparateCompilationPTX main.cu ${output_file}) +target_compile_features(CudaOnlySeparateCompilationPTX PRIVATE cuda_std_11) +target_include_directories(CudaOnlySeparateCompilationPTX PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} ) +target_link_libraries(CudaOnlySeparateCompilationPTX PRIVATE CUDA::cuda_driver) +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlySeparateCompilationPTX PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/SeparateCompilationPTX/kernels.cu b/Tests/CudaOnly/SeparateCompilationPTX/kernels.cu new file mode 100644 index 0000000..f4a52d4 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilationPTX/kernels.cu @@ -0,0 +1,14 @@ + +__global__ void kernelA(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} + +__global__ void kernelB(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/SeparateCompilationPTX/main.cu b/Tests/CudaOnly/SeparateCompilationPTX/main.cu new file mode 100644 index 0000000..164cde5 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilationPTX/main.cu @@ -0,0 +1,30 @@ +#include <iostream> + +#include <cuda.h> + +#include "embedded_objs.h" + +int main() +{ + cuInit(0); + int count = 0; + cuDeviceGetCount(&count); + if (count == 0) { + std::cerr << "No CUDA devices found\n"; + return 1; + } + + CUdevice device; + cuDeviceGet(&device, 0); + + CUcontext context; + cuCtxCreate(&context, 0, device); + + CUmodule module; + cuModuleLoadData(&module, kernels); + if (module == nullptr) { + std::cerr << "Failed to load the embedded ptx" << std::endl; + return 1; + } + std::cout << module << std::endl; +} diff --git a/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake b/Tests/CudaOnly/utils/bin2c_wrapper.cmake index 0baf934..0baf934 100644 --- a/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake +++ b/Tests/CudaOnly/utils/bin2c_wrapper.cmake |