diff options
Diffstat (limited to 'Tests/CudaOnly')
-rw-r--r-- | Tests/CudaOnly/CMakeLists.txt | 3 | ||||
-rw-r--r-- | Tests/CudaOnly/CUBIN/CMakeLists.txt | 29 | ||||
-rw-r--r-- | Tests/CudaOnly/CUBIN/kernelA.cu | 7 | ||||
-rw-r--r-- | Tests/CudaOnly/CUBIN/kernelB.cu | 7 | ||||
-rw-r--r-- | Tests/CudaOnly/CUBIN/kernelC.cu | 7 | ||||
-rw-r--r-- | Tests/CudaOnly/CUBIN/main.cu | 58 | ||||
-rw-r--r-- | Tests/CudaOnly/CUBIN/main_no_native_archs.cu | 4 | ||||
-rw-r--r-- | Tests/CudaOnly/DeviceLTO/CMakeLists.txt | 11 | ||||
-rw-r--r-- | Tests/CudaOnly/Fatbin/CMakeLists.txt | 25 | ||||
-rw-r--r-- | Tests/CudaOnly/Fatbin/main.cu | 58 | ||||
-rw-r--r-- | Tests/CudaOnly/OptixIR/CMakeLists.txt | 33 | ||||
-rw-r--r-- | Tests/CudaOnly/OptixIR/main.cu | 53 | ||||
-rw-r--r-- | Tests/CudaOnly/RuntimeControls/verify_runtime.cmake | 2 | ||||
-rw-r--r-- | Tests/CudaOnly/SharedRuntimePlusToolkit/CMakeLists.txt | 3 | ||||
-rw-r--r-- | Tests/CudaOnly/SharedRuntimeViaCUDAFlags/CMakeLists.txt | 3 | ||||
-rw-r--r-- | Tests/CudaOnly/StaticRuntimePlusToolkit/CMakeLists.txt | 3 |
16 files changed, 300 insertions, 6 deletions
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index db08076..aa25c4c 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -27,6 +27,9 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") # Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode. add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) + add_cuda_test_macro(CudaOnly.CUBIN CudaOnlyCUBIN) + add_cuda_test_macro(CudaOnly.Fatbin CudaOnlyFatbin) + add_cuda_test_macro(CudaOnly.OptixIR CudaOnlyOptixIR) endif() add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO) diff --git a/Tests/CudaOnly/CUBIN/CMakeLists.txt b/Tests/CudaOnly/CUBIN/CMakeLists.txt new file mode 100644 index 0000000..81787e4 --- /dev/null +++ b/Tests/CudaOnly/CUBIN/CMakeLists.txt @@ -0,0 +1,29 @@ +cmake_minimum_required(VERSION 3.18) +unset(ENV{CMAKE_CUDA_ARCHITECTURES_NATIVE_CLAMP}) # CUBIN needs true native arch +project(CudaCUBIN LANGUAGES CUDA) + +set(CMAKE_CUDA_ARCHITECTURES all-major) + +# CUBIN needs the true native arch to be supported by the CUDA toolkit. +set(unavailable_native_archs "${CMAKE_CUDA_ARCHITECTURES_NATIVE}") +list(REMOVE_ITEM unavailable_native_archs ${CMAKE_CUDA_ARCHITECTURES_ALL}) +if(unavailable_native_archs) + add_executable(CudaOnlyCUBIN main_no_native_archs.cu) + return() +endif() + +add_library(CudaCUBIN OBJECT kernelA.cu kernelB.cu kernelC.cu) +set_property(TARGET CudaCUBIN PROPERTY CUDA_CUBIN_COMPILATION ON) +set_property(TARGET CudaCUBIN PROPERTY CUDA_ARCHITECTURES native) + +add_executable(CudaOnlyCUBIN main.cu) +target_compile_features(CudaOnlyCUBIN PRIVATE cuda_std_11) +target_compile_definitions(CudaOnlyCUBIN PRIVATE "CUBIN_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaCUBIN>,~_~>\"") + +find_package(CUDAToolkit REQUIRED) +target_link_libraries(CudaOnlyCUBIN PRIVATE CUDA::cuda_driver) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyCUBIN PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/CUBIN/kernelA.cu b/Tests/CudaOnly/CUBIN/kernelA.cu new file mode 100644 index 0000000..fbe0d26 --- /dev/null +++ b/Tests/CudaOnly/CUBIN/kernelA.cu @@ -0,0 +1,7 @@ + +__global__ void kernelA(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/CUBIN/kernelB.cu b/Tests/CudaOnly/CUBIN/kernelB.cu new file mode 100644 index 0000000..7478253 --- /dev/null +++ b/Tests/CudaOnly/CUBIN/kernelB.cu @@ -0,0 +1,7 @@ + +__global__ void kernelB(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/CUBIN/kernelC.cu b/Tests/CudaOnly/CUBIN/kernelC.cu new file mode 100644 index 0000000..5f8a0ce --- /dev/null +++ b/Tests/CudaOnly/CUBIN/kernelC.cu @@ -0,0 +1,7 @@ + +__global__ void kernelC(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/CUBIN/main.cu b/Tests/CudaOnly/CUBIN/main.cu new file mode 100644 index 0000000..581970a --- /dev/null +++ b/Tests/CudaOnly/CUBIN/main.cu @@ -0,0 +1,58 @@ +#include <iostream> +#include <string> +#include <vector> + +#include <cuda.h> + +#define GENERATED_HEADER(x) GENERATED_HEADER1(x) +#define GENERATED_HEADER1(x) <x> + +static std::string input_paths = { CUBIN_FILE_PATHS }; + +int main() +{ + const std::string delimiter = "~_~"; + input_paths += delimiter; + + size_t end = 0; + size_t previous_end = 0; + std::vector<std::string> actual_paths; + while ((end = input_paths.find(delimiter, previous_end)) != + std::string::npos) { + actual_paths.emplace_back( + input_paths.substr(previous_end, end - previous_end)); + previous_end = end + 3; + } + + cuInit(0); + int count = 0; + cuDeviceGetCount(&count); + if (count == 0) { + std::cerr << "No CUDA devices found\n"; + return 1; + } + + CUdevice device; + cuDeviceGet(&device, 0); + + CUcontext context; + cuCtxCreate(&context, 0, device); + + CUmodule module; + for (auto p : actual_paths) { + if (p.find(".cubin") == std::string::npos) { + std::cout << p << " Doesn't have the .cubin suffix" << p << std::endl; + return 1; + } + std::cout << "trying to load cubin: " << p << std::endl; + CUresult result = cuModuleLoad(&module, p.c_str()); + std::cout << "module pointer: " << module << '\n'; + if (result != CUDA_SUCCESS || module == nullptr) { + std::cerr << "Failed to load the embedded cubin with error: " + << static_cast<unsigned int>(result) << '\n'; + return 1; + } + } + + return 0; +} diff --git a/Tests/CudaOnly/CUBIN/main_no_native_archs.cu b/Tests/CudaOnly/CUBIN/main_no_native_archs.cu new file mode 100644 index 0000000..f8b643a --- /dev/null +++ b/Tests/CudaOnly/CUBIN/main_no_native_archs.cu @@ -0,0 +1,4 @@ +int main() +{ + return 0; +} diff --git a/Tests/CudaOnly/DeviceLTO/CMakeLists.txt b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt index 653b35d..5653bdf 100644 --- a/Tests/CudaOnly/DeviceLTO/CMakeLists.txt +++ b/Tests/CudaOnly/DeviceLTO/CMakeLists.txt @@ -9,16 +9,23 @@ project(DeviceLTO CUDA) add_library(CUDA_dlto STATIC file1.cu file2.cu file3.cu) add_executable(CudaOnlyDeviceLTO main.cu) +set(archs_to_test "${CMAKE_CUDA_ARCHITECTURES_ALL}") +if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + # Also test with at least one virtual architecture. + list(POP_BACK CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR latest_arch) + list(APPEND archs_to_test ${latest_arch}-virtual) +endif() + set_target_properties(CUDA_dlto PROPERTIES - CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}" + CUDA_ARCHITECTURES "${archs_to_test}" CUDA_SEPARABLE_COMPILATION ON POSITION_INDEPENDENT_CODE ON) set_target_properties(CudaOnlyDeviceLTO PROPERTIES CUDA_SEPARABLE_COMPILATION ON - CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_ALL}" + CUDA_ARCHITECTURES "${archs_to_test}" ) target_link_libraries(CudaOnlyDeviceLTO PRIVATE CUDA_dlto) diff --git a/Tests/CudaOnly/Fatbin/CMakeLists.txt b/Tests/CudaOnly/Fatbin/CMakeLists.txt new file mode 100644 index 0000000..db0dc22 --- /dev/null +++ b/Tests/CudaOnly/Fatbin/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.18) +project(CudaFATBIN LANGUAGES CUDA) + + +set(CMAKE_CUDA_ARCHITECTURES all-major) + +add_library(CudaFATBIN OBJECT +${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelA.cu +${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelB.cu +${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelC.cu) + +set_property(TARGET CudaFATBIN PROPERTY CUDA_FATBIN_COMPILATION ON) + +# Will use `cuModuleLoadFatBinary` to load the fatbinaries +add_executable(CudaOnlyFatbin main.cu) +target_compile_features(CudaOnlyFatbin PRIVATE cuda_std_11) +target_compile_definitions(CudaOnlyFatbin PRIVATE "FATBIN_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaFATBIN>,~_~>\"") + +find_package(CUDAToolkit REQUIRED) +target_link_libraries(CudaOnlyFatbin PRIVATE CUDA::cuda_driver) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyFatbin PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/Fatbin/main.cu b/Tests/CudaOnly/Fatbin/main.cu new file mode 100644 index 0000000..89af0e3 --- /dev/null +++ b/Tests/CudaOnly/Fatbin/main.cu @@ -0,0 +1,58 @@ +#include <iostream> +#include <string> +#include <vector> + +#include <cuda.h> + +#define GENERATED_HEADER(x) GENERATED_HEADER1(x) +#define GENERATED_HEADER1(x) <x> + +static std::string input_paths = { FATBIN_FILE_PATHS }; + +int main() +{ + const std::string delimiter = "~_~"; + input_paths += delimiter; + + size_t end = 0; + size_t previous_end = 0; + std::vector<std::string> actual_paths; + while ((end = input_paths.find(delimiter, previous_end)) != + std::string::npos) { + actual_paths.emplace_back( + input_paths.substr(previous_end, end - previous_end)); + previous_end = end + 3; + } + + cuInit(0); + int count = 0; + cuDeviceGetCount(&count); + if (count == 0) { + std::cerr << "No CUDA devices found\n"; + return 1; + } + + CUdevice device; + cuDeviceGet(&device, 0); + + CUcontext context; + cuCtxCreate(&context, 0, device); + + CUmodule module; + for (auto p : actual_paths) { + if (p.find(".fatbin") == std::string::npos) { + std::cout << p << " Doesn't have the .fatbin suffix" << p << std::endl; + return 1; + } + std::cout << "trying to load fatbin: " << p << std::endl; + CUresult result = cuModuleLoad(&module, p.c_str()); + std::cout << "module pointer: " << module << '\n'; + if (result != CUDA_SUCCESS || module == nullptr) { + std::cerr << "Failed to load the embedded fatbin with error: " + << static_cast<unsigned int>(result) << '\n'; + return 1; + } + } + + return 0; +} diff --git a/Tests/CudaOnly/OptixIR/CMakeLists.txt b/Tests/CudaOnly/OptixIR/CMakeLists.txt new file mode 100644 index 0000000..afeabda --- /dev/null +++ b/Tests/CudaOnly/OptixIR/CMakeLists.txt @@ -0,0 +1,33 @@ +cmake_minimum_required(VERSION 3.18) +project(CudaOptix LANGUAGES CUDA) + + +set(CMAKE_CUDA_ARCHITECTURES all-major) + +add_library(CudaOptix OBJECT + ${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelA.cu + ${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelB.cu + ${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelC.cu) + +if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0") + set_property(TARGET CudaOptix PROPERTY CUDA_OPTIX_COMPILATION ON) +endif() + +set_property(TARGET CudaOptix PROPERTY CUDA_ARCHITECTURES native) + +add_executable(CudaOnlyOptixIR main.cu) +target_compile_features(CudaOnlyOptixIR PRIVATE cuda_std_11) + +if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0") + target_compile_definitions(CudaOnlyOptixIR PRIVATE "OPTIX_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaOptix>,~_~>\"") +else() + target_compile_definitions(CudaOnlyOptixIR PRIVATE "OPTIX_FILE_PATHS=\"NO_OPTIX_SUPPORT\"") +endif() + +find_package(CUDAToolkit REQUIRED) +target_link_libraries(CudaOnlyOptixIR PRIVATE CUDA::cuda_driver) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyOptixIR PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/OptixIR/main.cu b/Tests/CudaOnly/OptixIR/main.cu new file mode 100644 index 0000000..c79829b --- /dev/null +++ b/Tests/CudaOnly/OptixIR/main.cu @@ -0,0 +1,53 @@ +#include <fstream> +#include <iostream> +#include <string> +#include <vector> + +#include <cuda.h> + +#define GENERATED_HEADER(x) GENERATED_HEADER1(x) +#define GENERATED_HEADER1(x) <x> + +static std::string input_paths = { OPTIX_FILE_PATHS }; + +int main() +{ + if (input_paths == "NO_OPTIX_SUPPORT") { + return 0; + } + + const std::string delimiter = "~_~"; + input_paths += delimiter; + + size_t end = 0; + size_t previous_end = 0; + std::vector<std::string> actual_paths; + while ((end = input_paths.find(delimiter, previous_end)) != + std::string::npos) { + actual_paths.emplace_back( + input_paths.substr(previous_end, end - previous_end)); + previous_end = end + 3; + } + + if (actual_paths.empty()) { + std::cerr << "Failed to parse OPTIX_FILE_PATHS" << std::endl; + return 1; + } + + const std::uint32_t optix_magic_value = 0x7f4e43ed; + for (auto p : actual_paths) { + if (p.find(".optixir") == std::string::npos) { + std::cout << p << " Doesn't have the .optixir suffix" << p << std::endl; + return 1; + } + std::ifstream input(p, std::ios::binary); + std::uint32_t value; + input.read(reinterpret_cast<char*>(&value), sizeof(value)); + if (value != optix_magic_value) { + std::cerr << p << " Doesn't look like an optix-ir file" << std::endl; + return 1; + } + } + + return 0; +} diff --git a/Tests/CudaOnly/RuntimeControls/verify_runtime.cmake b/Tests/CudaOnly/RuntimeControls/verify_runtime.cmake index b313dac..27fbe45 100644 --- a/Tests/CudaOnly/RuntimeControls/verify_runtime.cmake +++ b/Tests/CudaOnly/RuntimeControls/verify_runtime.cmake @@ -7,7 +7,7 @@ file(GET_RUNTIME_DEPENDENCIES EXECUTABLES ${EXEC_PATH} ) -list(FILTER resolved_libs INCLUDE REGEX ".*cudart.*") +list(FILTER resolved_libs INCLUDE REGEX ".*[Cc][Uu][Dd][Aa][Rr][Tt].*") list(LENGTH resolved_libs has_cudart) if(has_cudart EQUAL 0) diff --git a/Tests/CudaOnly/SharedRuntimePlusToolkit/CMakeLists.txt b/Tests/CudaOnly/SharedRuntimePlusToolkit/CMakeLists.txt index 0b01085..7dc919f 100644 --- a/Tests/CudaOnly/SharedRuntimePlusToolkit/CMakeLists.txt +++ b/Tests/CudaOnly/SharedRuntimePlusToolkit/CMakeLists.txt @@ -40,5 +40,6 @@ target_link_libraries(CudaOnlySharedRuntimePlusToolkit PRIVATE SharedToolkit if(UNIX) # Help the shared cuda runtime find libcudart as it is not located # in a default system searched location - set_property(TARGET CudaOnlySharedRuntimePlusToolkit PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + find_package(CUDAToolkit REQUIRED) + set_property(TARGET CudaOnlySharedRuntimePlusToolkit PROPERTY BUILD_RPATH "${CUDAToolkit_LIBRARY_DIR}") endif() diff --git a/Tests/CudaOnly/SharedRuntimeViaCUDAFlags/CMakeLists.txt b/Tests/CudaOnly/SharedRuntimeViaCUDAFlags/CMakeLists.txt index 24ff478..cf6eef2 100644 --- a/Tests/CudaOnly/SharedRuntimeViaCUDAFlags/CMakeLists.txt +++ b/Tests/CudaOnly/SharedRuntimeViaCUDAFlags/CMakeLists.txt @@ -11,5 +11,6 @@ add_executable(CudaOnlySharedRuntimeViaCUDAFlags main.cu) if(UNIX) # Help the shared cuda runtime find libcudart as it is not located # in a default system searched location - set_property(TARGET CudaOnlySharedRuntimeViaCUDAFlags PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + find_package(CUDAToolkit REQUIRED) + set_property(TARGET CudaOnlySharedRuntimeViaCUDAFlags PROPERTY BUILD_RPATH "${CUDAToolkit_LIBRARY_DIR}") endif() diff --git a/Tests/CudaOnly/StaticRuntimePlusToolkit/CMakeLists.txt b/Tests/CudaOnly/StaticRuntimePlusToolkit/CMakeLists.txt index ae03b66..8149060 100644 --- a/Tests/CudaOnly/StaticRuntimePlusToolkit/CMakeLists.txt +++ b/Tests/CudaOnly/StaticRuntimePlusToolkit/CMakeLists.txt @@ -39,5 +39,6 @@ target_link_libraries(CudaOnlyStaticRuntimePlusToolkit PRIVATE SharedToolkit if(UNIX) # Help the shared cuda runtime find libcurand and libnppif when they are not located # in a default system searched location - set_property(TARGET CudaOnlyStaticRuntimePlusToolkit PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + find_package(CUDAToolkit REQUIRED) + set_property(TARGET CudaOnlyStaticRuntimePlusToolkit PROPERTY BUILD_RPATH "${CUDAToolkit_LIBRARY_DIR}") endif() |