summaryrefslogtreecommitdiffstats
path: root/Tests
diff options
context:
space:
mode:
authorRobert Maynard <rmaynard@nvidia.com>2021-09-17 20:39:13 (GMT)
committerunknown <rmaynard@nvidia.com>2021-10-20 15:18:06 (GMT)
commit61b9764b03517276e99584de02d52ceefa5689ec (patch)
tree663e5dee6f66a2ef958002b209437b659ad02dca /Tests
parent3b5e1b53eae726bb4105240143354270df4495e3 (diff)
downloadCMake-61b9764b03517276e99584de02d52ceefa5689ec.zip
CMake-61b9764b03517276e99584de02d52ceefa5689ec.tar.gz
CMake-61b9764b03517276e99584de02d52ceefa5689ec.tar.bz2
CUDA: Allow both CUDA_SEPARABLE_COMPILATION and CUDA_PTX_COMPILATION
The target properties `CUDA_SEPARABLE_COMPILATION` and `CUDA_PTX_COMPILATION` now aren't mutually exclusive and can now be used together on the same target.
Diffstat (limited to 'Tests')
-rw-r--r--Tests/CudaOnly/CMakeLists.txt1
-rw-r--r--Tests/CudaOnly/ExportPTX/CMakeLists.txt2
-rw-r--r--Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt51
-rw-r--r--Tests/CudaOnly/SeparateCompilationPTX/kernels.cu14
-rw-r--r--Tests/CudaOnly/SeparateCompilationPTX/main.cu30
-rw-r--r--Tests/CudaOnly/utils/bin2c_wrapper.cmake (renamed from Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake)0
6 files changed, 97 insertions, 1 deletions
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
index a3fb409..65dfebb 100644
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -16,6 +16,7 @@ add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs)
add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation)
+add_cuda_test_macro(CudaOnly.SeparateCompilationPTX CudaOnlySeparateCompilationPTX)
if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang")
# Clang doesn't have flags for selecting the runtime.
diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
index e7e7bc4..f1667af 100644
--- a/Tests/CudaOnly/ExportPTX/CMakeLists.txt
+++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
@@ -56,7 +56,7 @@ add_custom_command(
"-DBIN_TO_C_COMMAND=${bin_to_c}"
"-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>"
"-DOUTPUT=${output_file}"
- -P ${CMAKE_CURRENT_SOURCE_DIR}/bin2c_wrapper.cmake
+ -P ${CMAKE_CURRENT_SOURCE_DIR}/../utils/bin2c_wrapper.cmake
VERBATIM
DEPENDS $<TARGET_OBJECTS:CudaPTX>
COMMENT "Converting Object files to a C header"
diff --git a/Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt
new file mode 100644
index 0000000..273f955
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt
@@ -0,0 +1,51 @@
+cmake_minimum_required(VERSION 3.19)
+project (SeparateCompPTX CUDA)
+
+#Goal for this example:
+# How to generate PTX files with RDC enabled
+
+# PTX can be compiled only for a single virtual architecture at a time
+list(POP_FRONT CMAKE_CUDA_ARCHITECTURES temp)
+set(CMAKE_CUDA_ARCHITECTURES ${temp})
+string(APPEND CMAKE_CUDA_ARCHITECTURES "-virtual")
+
+add_library(CudaPTX OBJECT kernels.cu)
+set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON)
+set_property(TARGET CudaPTX PROPERTY CUDA_SEPARABLE_COMPILATION ON)
+
+
+set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h)
+
+find_package(CUDAToolkit REQUIRED)
+find_program(bin_to_c
+ NAMES bin2c
+ PATHS ${CUDAToolkit_BIN_DIR}
+ )
+if(NOT bin_to_c)
+ message(FATAL_ERROR
+ "bin2c not found:\n"
+ " CUDAToolkit_BIN_DIR='${CUDAToolkit_BIN_DIR}'\n"
+ )
+endif()
+
+add_custom_command(
+ OUTPUT "${output_file}"
+ COMMAND ${CMAKE_COMMAND}
+ "-DBIN_TO_C_COMMAND=${bin_to_c}"
+ "-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>"
+ "-DOUTPUT=${output_file}"
+ -P ${CMAKE_CURRENT_SOURCE_DIR}/../utils/bin2c_wrapper.cmake
+ VERBATIM
+ DEPENDS $<TARGET_OBJECTS:CudaPTX>
+ COMMENT "Converting Object files to a C header"
+ )
+
+add_executable(CudaOnlySeparateCompilationPTX main.cu ${output_file})
+target_compile_features(CudaOnlySeparateCompilationPTX PRIVATE cuda_std_11)
+target_include_directories(CudaOnlySeparateCompilationPTX PRIVATE
+ ${CMAKE_CURRENT_BINARY_DIR} )
+target_link_libraries(CudaOnlySeparateCompilationPTX PRIVATE CUDA::cuda_driver)
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlySeparateCompilationPTX PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/SeparateCompilationPTX/kernels.cu b/Tests/CudaOnly/SeparateCompilationPTX/kernels.cu
new file mode 100644
index 0000000..f4a52d4
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilationPTX/kernels.cu
@@ -0,0 +1,14 @@
+
+__global__ void kernelA(float* r, float* x, float* y, float* z, int size)
+{
+ for (int i = threadIdx.x; i < size; i += blockDim.x) {
+ r[i] = x[i] * y[i] + z[i];
+ }
+}
+
+__global__ void kernelB(float* r, float* x, float* y, float* z, int size)
+{
+ for (int i = threadIdx.x; i < size; i += blockDim.x) {
+ r[i] = x[i] * y[i] + z[i];
+ }
+}
diff --git a/Tests/CudaOnly/SeparateCompilationPTX/main.cu b/Tests/CudaOnly/SeparateCompilationPTX/main.cu
new file mode 100644
index 0000000..164cde5
--- /dev/null
+++ b/Tests/CudaOnly/SeparateCompilationPTX/main.cu
@@ -0,0 +1,30 @@
+#include <iostream>
+
+#include <cuda.h>
+
+#include "embedded_objs.h"
+
+int main()
+{
+ cuInit(0);
+ int count = 0;
+ cuDeviceGetCount(&count);
+ if (count == 0) {
+ std::cerr << "No CUDA devices found\n";
+ return 1;
+ }
+
+ CUdevice device;
+ cuDeviceGet(&device, 0);
+
+ CUcontext context;
+ cuCtxCreate(&context, 0, device);
+
+ CUmodule module;
+ cuModuleLoadData(&module, kernels);
+ if (module == nullptr) {
+ std::cerr << "Failed to load the embedded ptx" << std::endl;
+ return 1;
+ }
+ std::cout << module << std::endl;
+}
diff --git a/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake b/Tests/CudaOnly/utils/bin2c_wrapper.cmake
index 0baf934..0baf934 100644
--- a/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake
+++ b/Tests/CudaOnly/utils/bin2c_wrapper.cmake