diff options
23 files changed, 463 insertions, 35 deletions
diff --git a/Auxiliary/vim/syntax/cmake.vim b/Auxiliary/vim/syntax/cmake.vim index f303bd4..1a47f67 100644 --- a/Auxiliary/vim/syntax/cmake.vim +++ b/Auxiliary/vim/syntax/cmake.vim @@ -128,7 +128,10 @@ syn keyword cmakeProperty contained \ CPACK_WIX_ACL \ CROSSCOMPILING_EMULATOR \ CUDA_ARCHITECTURES + \ CUDA_CUBIN_COMPILATION \ CUDA_EXTENSIONS + \ CUDA_FATBIN_COMPILATION + \ CUDA_OPTIX_COMPILATION \ CUDA_PTX_COMPILATION \ CUDA_RESOLVE_DEVICE_SYMBOLS \ CUDA_RUNTIME_LIBRARY diff --git a/Help/manual/cmake-properties.7.rst b/Help/manual/cmake-properties.7.rst index 01c9ce8..8559b0b 100644 --- a/Help/manual/cmake-properties.7.rst +++ b/Help/manual/cmake-properties.7.rst @@ -175,7 +175,10 @@ Properties on Targets /prop_tgt/CONFIG_POSTFIX /prop_tgt/CROSSCOMPILING_EMULATOR /prop_tgt/CUDA_ARCHITECTURES + /prop_tgt/CUDA_CUBIN_COMPILATION /prop_tgt/CUDA_EXTENSIONS + /prop_tgt/CUDA_FATBIN_COMPILATION + /prop_tgt/CUDA_OPTIX_COMPILATION /prop_tgt/CUDA_PTX_COMPILATION /prop_tgt/CUDA_RESOLVE_DEVICE_SYMBOLS /prop_tgt/CUDA_RUNTIME_LIBRARY diff --git a/Help/prop_tgt/CUDA_CUBIN_COMPILATION.rst b/Help/prop_tgt/CUDA_CUBIN_COMPILATION.rst new file mode 100644 index 0000000..f8860ae --- /dev/null +++ b/Help/prop_tgt/CUDA_CUBIN_COMPILATION.rst @@ -0,0 +1,14 @@ +CUDA_CUBIN_COMPILATION +---------------------- + +.. versionadded:: 3.27 + +Compile CUDA sources to ``.cubin`` files instead of ``.obj`` files +within :ref:`Object Libraries`. + +For example: + +.. code-block:: cmake + + add_library(mycubin OBJECT a.cu b.cu) + set_property(TARGET mycubin PROPERTY CUDA_CUBIN_COMPILATION ON) diff --git a/Help/prop_tgt/CUDA_FATBIN_COMPILATION.rst b/Help/prop_tgt/CUDA_FATBIN_COMPILATION.rst new file mode 100644 index 0000000..3d3c715 --- /dev/null +++ b/Help/prop_tgt/CUDA_FATBIN_COMPILATION.rst @@ -0,0 +1,14 @@ +CUDA_FATBIN_COMPILATION +----------------------- + +.. versionadded:: 3.27 + +Compile CUDA sources to ``.fatbin`` files instead of ``.obj`` files +within :ref:`Object Libraries`. + +For example: + +.. code-block:: cmake + + add_library(myfbins OBJECT a.cu b.cu) + set_property(TARGET myfbins PROPERTY CUDA_FATBIN_COMPILATION ON) diff --git a/Help/prop_tgt/CUDA_OPTIX_COMPILATION.rst b/Help/prop_tgt/CUDA_OPTIX_COMPILATION.rst new file mode 100644 index 0000000..c2a06a8 --- /dev/null +++ b/Help/prop_tgt/CUDA_OPTIX_COMPILATION.rst @@ -0,0 +1,14 @@ +CUDA_OPTIX_COMPILATION +---------------------- + +.. versionadded:: 3.27 + +Compile CUDA sources to ``.optixir`` files instead of ``.obj`` files +within :ref:`Object Libraries`. + +For example: + +.. code-block:: cmake + + add_library(myoptix OBJECT a.cu b.cu) + set_property(TARGET myoptix PROPERTY CUDA_OPTIX_COMPILATION ON) diff --git a/Help/release/dev/cuda-support-new-compile-modes.rst b/Help/release/dev/cuda-support-new-compile-modes.rst new file mode 100644 index 0000000..2d24c16 --- /dev/null +++ b/Help/release/dev/cuda-support-new-compile-modes.rst @@ -0,0 +1,14 @@ +cuda-support-new-compile-modes +------------------------------ + +* A :prop_tgt:`CUDA_CUBIN_COMPILATION` target property was added to + :ref:`Object Libraries` to support compiling to ``.cubin`` files + instead of host object files. Currently only supported with NVIDIA. + +* A :prop_tgt:`CUDA_FATBIN_COMPILATION` target property was added to + :ref:`Object Libraries` to support compiling to ``.fatbin`` files + instead of host object files. Currently only supported with NVIDIA. + +* A :prop_tgt:`CUDA_OPTIX_COMPILATION` target property was added to + :ref:`Object Libraries` to support compiling to ``.optixir`` files + instead of host object files. Currently only supported with NVIDIA. diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake index dea721e..e774088 100644 --- a/Modules/CMakeCUDAInformation.cmake +++ b/Modules/CMakeCUDAInformation.cmake @@ -134,7 +134,6 @@ include(CMakeCommonLanguageInclude) # CMAKE_CUDA_CREATE_SHARED_LIBRARY # CMAKE_CUDA_CREATE_SHARED_MODULE # CMAKE_CUDA_COMPILE_WHOLE_COMPILATION -# CMAKE_CUDA_COMPILE_PTX_COMPILATION # CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION # CMAKE_CUDA_LINK_EXECUTABLE diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake index 0823954..c839d1c 100644 --- a/Modules/Compiler/NVIDIA-CUDA.cmake +++ b/Modules/Compiler/NVIDIA-CUDA.cmake @@ -8,6 +8,11 @@ set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu") set(_CMAKE_CUDA_WHOLE_FLAG "-c") set(_CMAKE_CUDA_RDC_FLAG "-rdc=true") set(_CMAKE_CUDA_PTX_FLAG "-ptx") +set(_CMAKE_CUDA_CUBIN_FLAG "-cubin") +set(_CMAKE_CUDA_FATBIN_FLAG "-fatbin") +if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0") + set(_CMAKE_CUDA_OPTIX_FLAG "-optix-ir") +endif() if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89) # The -forward-unknown-to-host-compiler flag was only diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx index 54cd8ba..4cfa1d7 100644 --- a/Source/cmGeneratorTarget.cxx +++ b/Source/cmGeneratorTarget.cxx @@ -3,6 +3,7 @@ #include "cmGeneratorTarget.h" #include <algorithm> +#include <array> #include <cassert> #include <cerrno> #include <cstddef> @@ -1011,12 +1012,27 @@ const std::string& cmGeneratorTarget::GetObjectName(cmSourceFile const* file) const char* cmGeneratorTarget::GetCustomObjectExtension() const { - static std::string extension; - const bool has_ptx_extension = - this->GetPropertyAsBool("CUDA_PTX_COMPILATION"); - if (has_ptx_extension) { - extension = ".ptx"; - return extension.c_str(); + struct compiler_mode + { + std::string variable; + std::string extension; + }; + static std::array<compiler_mode, 4> const modes{ + { { "CUDA_PTX_COMPILATION", ".ptx" }, + { "CUDA_CUBIN_COMPILATION", ".cubin" }, + { "CUDA_FATBIN_COMPILATION", ".fatbin" }, + { "CUDA_OPTIX_COMPILATION", ".optixir" } } + }; + + std::string const& compiler = + this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID"); + if (!compiler.empty()) { + for (const auto& m : modes) { + const bool has_extension = this->GetPropertyAsBool(m.variable); + if (has_extension) { + return m.extension.c_str(); + } + } } return nullptr; } diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx index e217dd9..2ead739 100644 --- a/Source/cmMakefileTargetGenerator.cxx +++ b/Source/cmMakefileTargetGenerator.cxx @@ -3,6 +3,7 @@ #include "cmMakefileTargetGenerator.h" #include <algorithm> +#include <array> #include <cassert> #include <cstdio> #include <iterator> @@ -977,11 +978,23 @@ void cmMakefileTargetGenerator::WriteObjectRuleFiles( this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG"); cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " "); } - if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) { - const std::string& ptxFlag = - this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG"); - cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag); - } else { + + static std::array<cm::string_view, 4> const compileModes{ + { "PTX"_s, "CUBIN"_s, "FATBIN"_s, "OPTIX"_s } + }; + bool useNormalCompileMode = true; + for (cm::string_view mode : compileModes) { + auto propName = cmStrCat("CUDA_", mode, "_COMPILATION"); + auto defName = cmStrCat("_CMAKE_CUDA_", mode, "_FLAG"); + if (this->GeneratorTarget->GetPropertyAsBool(propName)) { + const std::string& flag = + this->Makefile->GetRequiredDefinition(defName); + cudaCompileMode = cmStrCat(cudaCompileMode, flag); + useNormalCompileMode = false; + break; + } + } + if (useNormalCompileMode) { const std::string& wholeFlag = this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG"); cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag); diff --git a/Source/cmNinjaTargetGenerator.cxx b/Source/cmNinjaTargetGenerator.cxx index 8663f46..dc56142 100644 --- a/Source/cmNinjaTargetGenerator.cxx +++ b/Source/cmNinjaTargetGenerator.cxx @@ -3,6 +3,7 @@ #include "cmNinjaTargetGenerator.h" #include <algorithm> +#include <array> #include <cassert> #include <functional> #include <iterator> @@ -859,11 +860,22 @@ void cmNinjaTargetGenerator::WriteCompileRule(const std::string& lang, this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG"); cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " "); } - if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) { - const std::string& ptxFlag = - this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG"); - cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag); - } else { + static std::array<cm::string_view, 4> const compileModes{ + { "PTX"_s, "CUBIN"_s, "FATBIN"_s, "OPTIX"_s } + }; + bool useNormalCompileMode = true; + for (cm::string_view mode : compileModes) { + auto propName = cmStrCat("CUDA_", mode, "_COMPILATION"); + auto defName = cmStrCat("_CMAKE_CUDA_", mode, "_FLAG"); + if (this->GeneratorTarget->GetPropertyAsBool(propName)) { + const std::string& flag = + this->Makefile->GetRequiredDefinition(defName); + cudaCompileMode = cmStrCat(cudaCompileMode, flag); + useNormalCompileMode = false; + break; + } + } + if (useNormalCompileMode) { const std::string& wholeFlag = this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG"); cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag); @@ -1789,11 +1801,22 @@ void cmNinjaTargetGenerator::ExportObjectCompileCommand( this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG"); cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " "); } - if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) { - const std::string& ptxFlag = - this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG"); - cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag); - } else { + static std::array<cm::string_view, 4> const compileModes{ + { "PTX"_s, "CUBIN"_s, "FATBIN"_s, "OPTIX"_s } + }; + bool useNormalCompileMode = true; + for (cm::string_view mode : compileModes) { + auto propName = cmStrCat("CUDA_", mode, "_COMPILATION"); + auto defName = cmStrCat("_CMAKE_CUDA_", mode, "_FLAG"); + if (this->GeneratorTarget->GetPropertyAsBool(propName)) { + const std::string& flag = + this->Makefile->GetRequiredDefinition(defName); + cudaCompileMode = cmStrCat(cudaCompileMode, flag); + useNormalCompileMode = false; + break; + } + } + if (useNormalCompileMode) { const std::string& wholeFlag = this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG"); cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag); diff --git a/Source/cmTarget.cxx b/Source/cmTarget.cxx index ec87271..f06e26b 100644 --- a/Source/cmTarget.cxx +++ b/Source/cmTarget.cxx @@ -1775,6 +1775,9 @@ MAKE_PROP(COMPILE_FEATURES); MAKE_PROP(COMPILE_OPTIONS); MAKE_PROP(PRECOMPILE_HEADERS); MAKE_PROP(PRECOMPILE_HEADERS_REUSE_FROM); +MAKE_PROP(CUDA_CUBIN_COMPILATION); +MAKE_PROP(CUDA_FATBIN_COMPILATION); +MAKE_PROP(CUDA_OPTIX_COMPILATION); MAKE_PROP(CUDA_PTX_COMPILATION); MAKE_PROP(EXPORT_NAME); MAKE_PROP(IMPORTED); @@ -1910,14 +1913,38 @@ void cmTarget::StoreProperty(const std::string& prop, ValueType value) value ? value : std::string{})) { // NOLINT(bugprone-branch-clone) /* error was reported by check method */ - } else if (prop == propCUDA_PTX_COMPILATION && - this->GetType() != cmStateEnums::OBJECT_LIBRARY) { - std::ostringstream e; - e << "CUDA_PTX_COMPILATION property can only be applied to OBJECT " - "targets (\"" - << this->impl->Name << "\")\n"; - this->impl->Makefile->IssueMessage(MessageType::FATAL_ERROR, e.str()); - return; + } else if (prop == propCUDA_CUBIN_COMPILATION || + prop == propCUDA_FATBIN_COMPILATION || + prop == propCUDA_OPTIX_COMPILATION || + prop == propCUDA_PTX_COMPILATION) { + auto const& compiler = + this->impl->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID"); + auto const& compilerVersion = + this->impl->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_VERSION"); + if (this->GetType() != cmStateEnums::OBJECT_LIBRARY) { + auto e = + cmStrCat(prop, " property can only be applied to OBJECT targets(", + this->impl->Name, ")\n"); + this->impl->Makefile->IssueMessage(MessageType::FATAL_ERROR, e); + return; + } + const bool flag_found = + (prop == propCUDA_PTX_COMPILATION && + this->impl->Makefile->GetDefinition("_CMAKE_CUDA_PTX_FLAG")) || + (prop == propCUDA_CUBIN_COMPILATION && + this->impl->Makefile->GetDefinition("_CMAKE_CUDA_CUBIN_FLAG")) || + (prop == propCUDA_FATBIN_COMPILATION && + this->impl->Makefile->GetDefinition("_CMAKE_CUDA_FATBIN_FLAG")) || + (prop == propCUDA_OPTIX_COMPILATION && + this->impl->Makefile->GetDefinition("_CMAKE_CUDA_OPTIX_FLAG")); + if (flag_found) { + this->impl->Properties.SetProperty(prop, value); + } else { + auto e = cmStrCat(prop, " property is not supported by ", compiler, + " compiler version ", compilerVersion, "."); + this->impl->Makefile->IssueMessage(MessageType::FATAL_ERROR, e); + return; + } } else if (prop == propPRECOMPILE_HEADERS_REUSE_FROM) { if (this->GetProperty("PRECOMPILE_HEADERS")) { std::ostringstream e; diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx index ef0fcf3..8926f9e 100644 --- a/Source/cmVisualStudio10TargetGenerator.cxx +++ b/Source/cmVisualStudio10TargetGenerator.cxx @@ -3597,13 +3597,13 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions( if (this->GeneratorTarget->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION")) { cudaOptions.AddFlag("GenerateRelocatableDeviceCode", "true"); } - bool notPtx = true; + bool notPtxLike = true; if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) { cudaOptions.AddFlag("NvccCompilation", "ptx"); // We drop the %(Extension) component as CMake expects all PTX files // to not have the source file extension at all cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx"); - notPtx = false; + notPtxLike = false; if (cmSystemTools::VersionCompare(cmSystemTools::OP_GREATER_EQUAL, cudaVersion, "9.0") && @@ -3618,9 +3618,24 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions( "%(BaseCommandLineTemplate) [CompileOut] [FastMath] " "[Defines] \"%(FullPath)\""); } - } - - if (notPtx && + } else if (this->GeneratorTarget->GetPropertyAsBool( + "CUDA_CUBIN_COMPILATION")) { + cudaOptions.AddFlag("NvccCompilation", "cubin"); + cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).cubin"); + notPtxLike = false; + } else if (this->GeneratorTarget->GetPropertyAsBool( + "CUDA_FATBIN_COMPILATION")) { + cudaOptions.AddFlag("NvccCompilation", "fatbin"); + cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).fatbin"); + notPtxLike = false; + } else if (this->GeneratorTarget->GetPropertyAsBool( + "CUDA_OPTIX_COMPILATION")) { + cudaOptions.AddFlag("NvccCompilation", "optix-ir"); + cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).optixir"); + notPtxLike = false; + } + + if (notPtxLike && cmSystemTools::VersionCompareGreaterEq( "8.0", this->GlobalGenerator->GetPlatformToolsetCudaString())) { // Explicitly state that we want this file to be treated as a diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index db08076..aa25c4c 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -27,6 +27,9 @@ if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") # Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode. add_cuda_test_macro(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) + add_cuda_test_macro(CudaOnly.CUBIN CudaOnlyCUBIN) + add_cuda_test_macro(CudaOnly.Fatbin CudaOnlyFatbin) + add_cuda_test_macro(CudaOnly.OptixIR CudaOnlyOptixIR) endif() add_cuda_test_macro(CudaOnly.DeviceLTO CudaOnlyDeviceLTO) diff --git a/Tests/CudaOnly/CUBIN/CMakeLists.txt b/Tests/CudaOnly/CUBIN/CMakeLists.txt new file mode 100644 index 0000000..464714b --- /dev/null +++ b/Tests/CudaOnly/CUBIN/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.18) +project(CudaCUBIN LANGUAGES CUDA) + + +set(CMAKE_CUDA_ARCHITECTURES all-major) + +add_library(CudaCUBIN OBJECT kernelA.cu kernelB.cu kernelC.cu) +set_property(TARGET CudaCUBIN PROPERTY CUDA_CUBIN_COMPILATION ON) +set_property(TARGET CudaCUBIN PROPERTY CUDA_ARCHITECTURES native) + +add_executable(CudaOnlyCUBIN main.cu) +target_compile_features(CudaOnlyCUBIN PRIVATE cuda_std_11) +target_compile_definitions(CudaOnlyCUBIN PRIVATE "CUBIN_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaCUBIN>,~_~>\"") + +find_package(CUDAToolkit REQUIRED) +target_link_libraries(CudaOnlyCUBIN PRIVATE CUDA::cuda_driver) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyCUBIN PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/CUBIN/kernelA.cu b/Tests/CudaOnly/CUBIN/kernelA.cu new file mode 100644 index 0000000..fbe0d26 --- /dev/null +++ b/Tests/CudaOnly/CUBIN/kernelA.cu @@ -0,0 +1,7 @@ + +__global__ void kernelA(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/CUBIN/kernelB.cu b/Tests/CudaOnly/CUBIN/kernelB.cu new file mode 100644 index 0000000..7478253 --- /dev/null +++ b/Tests/CudaOnly/CUBIN/kernelB.cu @@ -0,0 +1,7 @@ + +__global__ void kernelB(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/CUBIN/kernelC.cu b/Tests/CudaOnly/CUBIN/kernelC.cu new file mode 100644 index 0000000..5f8a0ce --- /dev/null +++ b/Tests/CudaOnly/CUBIN/kernelC.cu @@ -0,0 +1,7 @@ + +__global__ void kernelC(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/CUBIN/main.cu b/Tests/CudaOnly/CUBIN/main.cu new file mode 100644 index 0000000..da5249c --- /dev/null +++ b/Tests/CudaOnly/CUBIN/main.cu @@ -0,0 +1,56 @@ +#include <iostream> +#include <string> +#include <vector> + +#include <cuda.h> + +#define GENERATED_HEADER(x) GENERATED_HEADER1(x) +#define GENERATED_HEADER1(x) <x> + +static std::string input_paths = { CUBIN_FILE_PATHS }; + +int main() +{ + const std::string delimiter = "~_~"; + input_paths += delimiter; + + size_t end = 0; + size_t previous_end = 0; + std::vector<std::string> actual_paths; + while ((end = input_paths.find(delimiter, previous_end)) != + std::string::npos) { + actual_paths.emplace_back( + input_paths.substr(previous_end, end - previous_end)); + previous_end = end + 3; + } + + cuInit(0); + int count = 0; + cuDeviceGetCount(&count); + if (count == 0) { + std::cerr << "No CUDA devices found\n"; + return 1; + } + + CUdevice device; + cuDeviceGet(&device, 0); + + CUcontext context; + cuCtxCreate(&context, 0, device); + + CUmodule module; + for (auto p : actual_paths) { + if (p.find(".cubin") == std::string::npos) { + std::cout << p << " Doesn't have the .cubin suffix" << p << std::endl; + return 1; + } + std::cout << "trying to load cubin: " << p << std::endl; + CUresult result = cuModuleLoad(&module, p.c_str()); + std::cout << "module pointer: " << module << '\n'; + if (result != CUDA_SUCCESS || module == nullptr) { + std::cerr << "Failed to load the embedded cubin with error: " + << static_cast<unsigned int>(result) << '\n'; + return 1; + } + } +} diff --git a/Tests/CudaOnly/Fatbin/CMakeLists.txt b/Tests/CudaOnly/Fatbin/CMakeLists.txt new file mode 100644 index 0000000..db0dc22 --- /dev/null +++ b/Tests/CudaOnly/Fatbin/CMakeLists.txt @@ -0,0 +1,25 @@ +cmake_minimum_required(VERSION 3.18) +project(CudaFATBIN LANGUAGES CUDA) + + +set(CMAKE_CUDA_ARCHITECTURES all-major) + +add_library(CudaFATBIN OBJECT +${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelA.cu +${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelB.cu +${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelC.cu) + +set_property(TARGET CudaFATBIN PROPERTY CUDA_FATBIN_COMPILATION ON) + +# Will use `cuModuleLoadFatBinary` to load the fatbinaries +add_executable(CudaOnlyFatbin main.cu) +target_compile_features(CudaOnlyFatbin PRIVATE cuda_std_11) +target_compile_definitions(CudaOnlyFatbin PRIVATE "FATBIN_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaFATBIN>,~_~>\"") + +find_package(CUDAToolkit REQUIRED) +target_link_libraries(CudaOnlyFatbin PRIVATE CUDA::cuda_driver) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyFatbin PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/Fatbin/main.cu b/Tests/CudaOnly/Fatbin/main.cu new file mode 100644 index 0000000..903feee --- /dev/null +++ b/Tests/CudaOnly/Fatbin/main.cu @@ -0,0 +1,56 @@ +#include <iostream> +#include <string> +#include <vector> + +#include <cuda.h> + +#define GENERATED_HEADER(x) GENERATED_HEADER1(x) +#define GENERATED_HEADER1(x) <x> + +static std::string input_paths = { FATBIN_FILE_PATHS }; + +int main() +{ + const std::string delimiter = "~_~"; + input_paths += delimiter; + + size_t end = 0; + size_t previous_end = 0; + std::vector<std::string> actual_paths; + while ((end = input_paths.find(delimiter, previous_end)) != + std::string::npos) { + actual_paths.emplace_back( + input_paths.substr(previous_end, end - previous_end)); + previous_end = end + 3; + } + + cuInit(0); + int count = 0; + cuDeviceGetCount(&count); + if (count == 0) { + std::cerr << "No CUDA devices found\n"; + return 1; + } + + CUdevice device; + cuDeviceGet(&device, 0); + + CUcontext context; + cuCtxCreate(&context, 0, device); + + CUmodule module; + for (auto p : actual_paths) { + if (p.find(".fatbin") == std::string::npos) { + std::cout << p << " Doesn't have the .fatbin suffix" << p << std::endl; + return 1; + } + std::cout << "trying to load fatbin: " << p << std::endl; + CUresult result = cuModuleLoad(&module, p.c_str()); + std::cout << "module pointer: " << module << '\n'; + if (result != CUDA_SUCCESS || module == nullptr) { + std::cerr << "Failed to load the embedded fatbin with error: " + << static_cast<unsigned int>(result) << '\n'; + return 1; + } + } +} diff --git a/Tests/CudaOnly/OptixIR/CMakeLists.txt b/Tests/CudaOnly/OptixIR/CMakeLists.txt new file mode 100644 index 0000000..afeabda --- /dev/null +++ b/Tests/CudaOnly/OptixIR/CMakeLists.txt @@ -0,0 +1,33 @@ +cmake_minimum_required(VERSION 3.18) +project(CudaOptix LANGUAGES CUDA) + + +set(CMAKE_CUDA_ARCHITECTURES all-major) + +add_library(CudaOptix OBJECT + ${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelA.cu + ${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelB.cu + ${CMAKE_CURRENT_SOURCE_DIR}/../CUBIN/kernelC.cu) + +if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0") + set_property(TARGET CudaOptix PROPERTY CUDA_OPTIX_COMPILATION ON) +endif() + +set_property(TARGET CudaOptix PROPERTY CUDA_ARCHITECTURES native) + +add_executable(CudaOnlyOptixIR main.cu) +target_compile_features(CudaOnlyOptixIR PRIVATE cuda_std_11) + +if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.7.0") + target_compile_definitions(CudaOnlyOptixIR PRIVATE "OPTIX_FILE_PATHS=\"$<JOIN:$<TARGET_OBJECTS:CudaOptix>,~_~>\"") +else() + target_compile_definitions(CudaOnlyOptixIR PRIVATE "OPTIX_FILE_PATHS=\"NO_OPTIX_SUPPORT\"") +endif() + +find_package(CUDAToolkit REQUIRED) +target_link_libraries(CudaOnlyOptixIR PRIVATE CUDA::cuda_driver) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyOptixIR PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/OptixIR/main.cu b/Tests/CudaOnly/OptixIR/main.cu new file mode 100644 index 0000000..c79829b --- /dev/null +++ b/Tests/CudaOnly/OptixIR/main.cu @@ -0,0 +1,53 @@ +#include <fstream> +#include <iostream> +#include <string> +#include <vector> + +#include <cuda.h> + +#define GENERATED_HEADER(x) GENERATED_HEADER1(x) +#define GENERATED_HEADER1(x) <x> + +static std::string input_paths = { OPTIX_FILE_PATHS }; + +int main() +{ + if (input_paths == "NO_OPTIX_SUPPORT") { + return 0; + } + + const std::string delimiter = "~_~"; + input_paths += delimiter; + + size_t end = 0; + size_t previous_end = 0; + std::vector<std::string> actual_paths; + while ((end = input_paths.find(delimiter, previous_end)) != + std::string::npos) { + actual_paths.emplace_back( + input_paths.substr(previous_end, end - previous_end)); + previous_end = end + 3; + } + + if (actual_paths.empty()) { + std::cerr << "Failed to parse OPTIX_FILE_PATHS" << std::endl; + return 1; + } + + const std::uint32_t optix_magic_value = 0x7f4e43ed; + for (auto p : actual_paths) { + if (p.find(".optixir") == std::string::npos) { + std::cout << p << " Doesn't have the .optixir suffix" << p << std::endl; + return 1; + } + std::ifstream input(p, std::ios::binary); + std::uint32_t value; + input.read(reinterpret_cast<char*>(&value), sizeof(value)); + if (value != optix_magic_value) { + std::cerr << p << " Doesn't look like an optix-ir file" << std::endl; + return 1; + } + } + + return 0; +} |