diff options
author | Robert Maynard <rmaynard@nvidia.com> | 2023-01-27 20:46:19 (GMT) |
---|---|---|
committer | Robert Maynard <rmaynard@nvidia.com> | 2023-03-13 13:54:00 (GMT) |
commit | 2def6a874b52ef70157f101cbca9ee9b92a5a7f5 (patch) | |
tree | f08dda163a1d8af66c4ce780cae0875ec2a4696f /Tests/CudaOnly/CUBIN | |
parent | 7b37ebe8357d9b1e2a5c97b58c9f2f5b690d163e (diff) | |
download | CMake-2def6a874b52ef70157f101cbca9ee9b92a5a7f5.zip CMake-2def6a874b52ef70157f101cbca9ee9b92a5a7f5.tar.gz CMake-2def6a874b52ef70157f101cbca9ee9b92a5a7f5.tar.bz2 |
CUDA: Add support for CUBIN, FATBIN, and OPTIXIR compilation
Diffstat (limited to 'Tests/CudaOnly/CUBIN')
-rw-r--r-- | Tests/CudaOnly/CUBIN/CMakeLists.txt | 21 | ||||
-rw-r--r-- | Tests/CudaOnly/CUBIN/kernelA.cu | 7 | ||||
-rw-r--r-- | Tests/CudaOnly/CUBIN/kernelB.cu | 7 | ||||
-rw-r--r-- | Tests/CudaOnly/CUBIN/kernelC.cu | 7 | ||||
-rw-r--r-- | Tests/CudaOnly/CUBIN/main.cu | 56 |
5 files changed, 98 insertions, 0 deletions
diff --git a/Tests/CudaOnly/CUBIN/CMakeLists.txt b/Tests/CudaOnly/CUBIN/CMakeLists.txt new file mode 100644 index 0000000..464714b --- /dev/null +++ b/Tests/CudaOnly/CUBIN/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.18) +project(CudaCUBIN LANGUAGES CUDA) + + +set(CMAKE_CUDA_ARCHITECTURES all-major) + +add_library(CudaCUBIN OBJECT kernelA.cu kernelB.cu kernelC.cu) +set_property(TARGET CudaCUBIN PROPERTY CUDA_CUBIN_COMPILATION ON) +set_property(TARGET CudaCUBIN PROPERTY CUDA_ARCHITECTURES native) + +add_executable(CudaOnlyCUBIN main.cu) +target_compile_features(CudaOnlyCUBIN PRIVATE cuda_std_11) +target_compile_definitions(CudaOnlyCUBIN PRIVATE "CUBIN_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaCUBIN>,~_~>\"") + +find_package(CUDAToolkit REQUIRED) +target_link_libraries(CudaOnlyCUBIN PRIVATE CUDA::cuda_driver) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyCUBIN PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/CUBIN/kernelA.cu b/Tests/CudaOnly/CUBIN/kernelA.cu new file mode 100644 index 0000000..fbe0d26 --- /dev/null +++ b/Tests/CudaOnly/CUBIN/kernelA.cu @@ -0,0 +1,7 @@ + +__global__ void kernelA(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/CUBIN/kernelB.cu b/Tests/CudaOnly/CUBIN/kernelB.cu new file mode 100644 index 0000000..7478253 --- /dev/null +++ b/Tests/CudaOnly/CUBIN/kernelB.cu @@ -0,0 +1,7 @@ + +__global__ void kernelB(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/CUBIN/kernelC.cu b/Tests/CudaOnly/CUBIN/kernelC.cu new file mode 100644 index 0000000..5f8a0ce --- /dev/null +++ b/Tests/CudaOnly/CUBIN/kernelC.cu @@ -0,0 +1,7 @@ + +__global__ void kernelC(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/CUBIN/main.cu b/Tests/CudaOnly/CUBIN/main.cu new file mode 100644 index 0000000..da5249c --- /dev/null +++ b/Tests/CudaOnly/CUBIN/main.cu @@ -0,0 +1,56 @@ +#include <iostream> +#include <string> +#include <vector> + +#include <cuda.h> + +#define GENERATED_HEADER(x) GENERATED_HEADER1(x) +#define GENERATED_HEADER1(x) <x> + +static std::string input_paths = { CUBIN_FILE_PATHS }; + +int main() +{ + const std::string delimiter = "~_~"; + input_paths += delimiter; + + size_t end = 0; + size_t previous_end = 0; + std::vector<std::string> actual_paths; + while ((end = input_paths.find(delimiter, previous_end)) != + std::string::npos) { + actual_paths.emplace_back( + input_paths.substr(previous_end, end - previous_end)); + previous_end = end + 3; + } + + cuInit(0); + int count = 0; + cuDeviceGetCount(&count); + if (count == 0) { + std::cerr << "No CUDA devices found\n"; + return 1; + } + + CUdevice device; + cuDeviceGet(&device, 0); + + CUcontext context; + cuCtxCreate(&context, 0, device); + + CUmodule module; + for (auto p : actual_paths) { + if (p.find(".cubin") == std::string::npos) { + std::cout << p << " Doesn't have the .cubin suffix" << p << std::endl; + return 1; + } + std::cout << "trying to load cubin: " << p << std::endl; + CUresult result = cuModuleLoad(&module, p.c_str()); + std::cout << "module pointer: " << module << '\n'; + if (result != CUDA_SUCCESS || module == nullptr) { + std::cerr << "Failed to load the embedded cubin with error: " + << static_cast<unsigned int>(result) << '\n'; + return 1; + } + } +} |