summaryrefslogtreecommitdiffstats
path: root/Tests/CudaOnly/CUBIN
diff options
context:
space:
mode:
authorRobert Maynard <rmaynard@nvidia.com>2023-01-27 20:46:19 (GMT)
committerRobert Maynard <rmaynard@nvidia.com>2023-03-13 13:54:00 (GMT)
commit2def6a874b52ef70157f101cbca9ee9b92a5a7f5 (patch)
treef08dda163a1d8af66c4ce780cae0875ec2a4696f /Tests/CudaOnly/CUBIN
parent7b37ebe8357d9b1e2a5c97b58c9f2f5b690d163e (diff)
downloadCMake-2def6a874b52ef70157f101cbca9ee9b92a5a7f5.zip
CMake-2def6a874b52ef70157f101cbca9ee9b92a5a7f5.tar.gz
CMake-2def6a874b52ef70157f101cbca9ee9b92a5a7f5.tar.bz2
CUDA: Add support for CUBIN, FATBIN, and OPTIXIR compilation
Diffstat (limited to 'Tests/CudaOnly/CUBIN')
-rw-r--r--Tests/CudaOnly/CUBIN/CMakeLists.txt21
-rw-r--r--Tests/CudaOnly/CUBIN/kernelA.cu7
-rw-r--r--Tests/CudaOnly/CUBIN/kernelB.cu7
-rw-r--r--Tests/CudaOnly/CUBIN/kernelC.cu7
-rw-r--r--Tests/CudaOnly/CUBIN/main.cu56
5 files changed, 98 insertions, 0 deletions
diff --git a/Tests/CudaOnly/CUBIN/CMakeLists.txt b/Tests/CudaOnly/CUBIN/CMakeLists.txt
new file mode 100644
index 0000000..464714b
--- /dev/null
+++ b/Tests/CudaOnly/CUBIN/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.18)
+project(CudaCUBIN LANGUAGES CUDA)
+
+
+set(CMAKE_CUDA_ARCHITECTURES all-major)
+
+add_library(CudaCUBIN OBJECT kernelA.cu kernelB.cu kernelC.cu)
+set_property(TARGET CudaCUBIN PROPERTY CUDA_CUBIN_COMPILATION ON)
+set_property(TARGET CudaCUBIN PROPERTY CUDA_ARCHITECTURES native)
+
+add_executable(CudaOnlyCUBIN main.cu)
+target_compile_features(CudaOnlyCUBIN PRIVATE cuda_std_11)
+target_compile_definitions(CudaOnlyCUBIN PRIVATE "CUBIN_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaCUBIN>,~_~>\"")
+
+find_package(CUDAToolkit REQUIRED)
+target_link_libraries(CudaOnlyCUBIN PRIVATE CUDA::cuda_driver)
+
+if(APPLE)
+ # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime.
+ set_property(TARGET CudaOnlyCUBIN PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
+endif()
diff --git a/Tests/CudaOnly/CUBIN/kernelA.cu b/Tests/CudaOnly/CUBIN/kernelA.cu
new file mode 100644
index 0000000..fbe0d26
--- /dev/null
+++ b/Tests/CudaOnly/CUBIN/kernelA.cu
@@ -0,0 +1,7 @@
+
+__global__ void kernelA(float* r, float* x, float* y, float* z, int size)
+{
+ for (int i = threadIdx.x; i < size; i += blockDim.x) {
+ r[i] = x[i] * y[i] + z[i];
+ }
+}
diff --git a/Tests/CudaOnly/CUBIN/kernelB.cu b/Tests/CudaOnly/CUBIN/kernelB.cu
new file mode 100644
index 0000000..7478253
--- /dev/null
+++ b/Tests/CudaOnly/CUBIN/kernelB.cu
@@ -0,0 +1,7 @@
+
+__global__ void kernelB(float* r, float* x, float* y, float* z, int size)
+{
+ for (int i = threadIdx.x; i < size; i += blockDim.x) {
+ r[i] = x[i] * y[i] + z[i];
+ }
+}
diff --git a/Tests/CudaOnly/CUBIN/kernelC.cu b/Tests/CudaOnly/CUBIN/kernelC.cu
new file mode 100644
index 0000000..5f8a0ce
--- /dev/null
+++ b/Tests/CudaOnly/CUBIN/kernelC.cu
@@ -0,0 +1,7 @@
+
+__global__ void kernelC(float* r, float* x, float* y, float* z, int size)
+{
+ for (int i = threadIdx.x; i < size; i += blockDim.x) {
+ r[i] = x[i] * y[i] + z[i];
+ }
+}
diff --git a/Tests/CudaOnly/CUBIN/main.cu b/Tests/CudaOnly/CUBIN/main.cu
new file mode 100644
index 0000000..da5249c
--- /dev/null
+++ b/Tests/CudaOnly/CUBIN/main.cu
@@ -0,0 +1,56 @@
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include <cuda.h>
+
+#define GENERATED_HEADER(x) GENERATED_HEADER1(x)
+#define GENERATED_HEADER1(x) <x>
+
+static std::string input_paths = { CUBIN_FILE_PATHS };
+
+int main()
+{
+ const std::string delimiter = "~_~";
+ input_paths += delimiter;
+
+ size_t end = 0;
+ size_t previous_end = 0;
+ std::vector<std::string> actual_paths;
+ while ((end = input_paths.find(delimiter, previous_end)) !=
+ std::string::npos) {
+ actual_paths.emplace_back(
+ input_paths.substr(previous_end, end - previous_end));
+ previous_end = end + 3;
+ }
+
+ cuInit(0);
+ int count = 0;
+ cuDeviceGetCount(&count);
+ if (count == 0) {
+ std::cerr << "No CUDA devices found\n";
+ return 1;
+ }
+
+ CUdevice device;
+ cuDeviceGet(&device, 0);
+
+ CUcontext context;
+ cuCtxCreate(&context, 0, device);
+
+ CUmodule module;
+ for (auto p : actual_paths) {
+ if (p.find(".cubin") == std::string::npos) {
+ std::cout << p << " Doesn't have the .cubin suffix" << p << std::endl;
+ return 1;
+ }
+ std::cout << "trying to load cubin: " << p << std::endl;
+ CUresult result = cuModuleLoad(&module, p.c_str());
+ std::cout << "module pointer: " << module << '\n';
+ if (result != CUDA_SUCCESS || module == nullptr) {
+ std::cerr << "Failed to load the embedded cubin with error: "
+ << static_cast<unsigned int>(result) << '\n';
+ return 1;
+ }
+ }
+}