diff options
-rw-r--r-- | Help/release/dev/cuda-ptx-separable-compilation.rst | 5 | ||||
-rw-r--r-- | Modules/CMakeCUDAInformation.cmake | 19 | ||||
-rw-r--r-- | Modules/Compiler/Clang-CUDA.cmake | 3 | ||||
-rw-r--r-- | Modules/Compiler/NVIDIA-CUDA.cmake | 3 | ||||
-rw-r--r-- | Modules/Platform/Windows-NVIDIA-CUDA.cmake | 8 | ||||
-rw-r--r-- | Source/cmMakefileTargetGenerator.cxx | 33 | ||||
-rw-r--r-- | Source/cmNinjaTargetGenerator.cxx | 34 | ||||
-rw-r--r-- | Source/cmRulePlaceholderExpander.cxx | 5 | ||||
-rw-r--r-- | Source/cmRulePlaceholderExpander.h | 1 | ||||
-rw-r--r-- | Source/cmVisualStudio10TargetGenerator.cxx | 7 | ||||
-rw-r--r-- | Tests/CudaOnly/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Tests/CudaOnly/ExportPTX/CMakeLists.txt | 2 | ||||
-rw-r--r-- | Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt | 51 | ||||
-rw-r--r-- | Tests/CudaOnly/SeparateCompilationPTX/kernels.cu | 14 | ||||
-rw-r--r-- | Tests/CudaOnly/SeparateCompilationPTX/main.cu | 30 | ||||
-rw-r--r-- | Tests/CudaOnly/utils/bin2c_wrapper.cmake (renamed from Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake) | 0 |
16 files changed, 158 insertions, 58 deletions
diff --git a/Help/release/dev/cuda-ptx-separable-compilation.rst b/Help/release/dev/cuda-ptx-separable-compilation.rst new file mode 100644 index 0000000..49c66c0 --- /dev/null +++ b/Help/release/dev/cuda-ptx-separable-compilation.rst @@ -0,0 +1,5 @@ +cuda-ptx-separable-compilation +------------------------------ + +* ``CUDA`` targets can now enable both :prop_tgt:`CUDA_SEPARABLE_COMPILATION` and + :prop_tgt:`CUDA_PTX_COMPILATION`. diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake index e9cfed6..94b67dc 100644 --- a/Modules/CMakeCUDAInformation.cmake +++ b/Modules/CMakeCUDAInformation.cmake @@ -160,22 +160,9 @@ if(NOT DEFINED CMAKE_CUDA_ARCHIVE_FINISH) set(CMAKE_CUDA_ARCHIVE_FINISH "<CMAKE_RANLIB> <TARGET>") endif() -#Specify how to compile when ptx has been requested -if(NOT CMAKE_CUDA_COMPILE_PTX_COMPILATION) - set(CMAKE_CUDA_COMPILE_PTX_COMPILATION - "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} ${_CMAKE_CUDA_PTX_FLAG} <SOURCE> -o <OBJECT>") -endif() - -#Specify how to compile when separable compilation has been requested -if(NOT CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION) - set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION - "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} ${_CMAKE_CUDA_DEVICE_CODE} <SOURCE> -o <OBJECT>") -endif() - -#Specify how to compile when whole compilation has been requested -if(NOT CMAKE_CUDA_COMPILE_WHOLE_COMPILATION) - set(CMAKE_CUDA_COMPILE_WHOLE_COMPILATION - "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} -c <SOURCE> -o <OBJECT>") +if(NOT CMAKE_CUDA_COMPILE_OBJECT) + set(CMAKE_CUDA_COMPILE_OBJECT + "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} <CUDA_COMPILE_MODE> <SOURCE> -o <OBJECT>") endif() # compile a cu file into an executable diff --git a/Modules/Compiler/Clang-CUDA.cmake b/Modules/Compiler/Clang-CUDA.cmake index 0223081..b105518 100644 --- a/Modules/Compiler/Clang-CUDA.cmake +++ b/Modules/Compiler/Clang-CUDA.cmake @@ -18,8 +18,9 @@ __compiler_clang_cxx_standards(CUDA) set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE) set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda") +set(_CMAKE_CUDA_WHOLE_FLAG "-c") +set(_CMAKE_CUDA_RDC_FLAG "-fgpu-rdc") set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S") -set(_CMAKE_CUDA_DEVICE_CODE "-fgpu-rdc -c") # RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default. set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICIT_LINKS}") diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake index c2fe42d..2f12b43 100644 --- a/Modules/Compiler/NVIDIA-CUDA.cmake +++ b/Modules/Compiler/NVIDIA-CUDA.cmake @@ -5,8 +5,9 @@ set(CMAKE_CUDA_VERBOSE_FLAG "-v") set(CMAKE_CUDA_VERBOSE_COMPILE_FLAG "-Xcompiler=-v") set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cu") +set(_CMAKE_CUDA_WHOLE_FLAG "-c") +set(_CMAKE_CUDA_RDC_FLAG "-rdc=true") set(_CMAKE_CUDA_PTX_FLAG "-ptx") -set(_CMAKE_CUDA_DEVICE_CODE "-dc") if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 10.2.89) # The -forward-unknown-to-host-compiler flag was only diff --git a/Modules/Platform/Windows-NVIDIA-CUDA.cmake b/Modules/Platform/Windows-NVIDIA-CUDA.cmake index b83932e..6c1699b 100644 --- a/Modules/Platform/Windows-NVIDIA-CUDA.cmake +++ b/Modules/Platform/Windows-NVIDIA-CUDA.cmake @@ -1,11 +1,7 @@ include(Platform/Windows-MSVC) -set(CMAKE_CUDA_COMPILE_PTX_COMPILATION - "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} -ptx <SOURCE> -o <OBJECT> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS") -set(CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION - "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} -dc <SOURCE> -o <OBJECT> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS") -set(CMAKE_CUDA_COMPILE_WHOLE_COMPILATION - "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} -c <SOURCE> -o <OBJECT> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS") +set(CMAKE_CUDA_COMPILE_OBJECT + "<CMAKE_CUDA_COMPILER> ${_CMAKE_CUDA_EXTRA_FLAGS} <DEFINES> <INCLUDES> <FLAGS> ${_CMAKE_COMPILE_AS_CUDA_FLAG} <CUDA_COMPILE_MODE> <SOURCE> -o <OBJECT> -Xcompiler=-Fd<TARGET_COMPILE_PDB>,-FS") set(__IMPLICIT_LINKS) foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES}) diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx index 8edadd3..9f2ae19 100644 --- a/Source/cmMakefileTargetGenerator.cxx +++ b/Source/cmMakefileTargetGenerator.cxx @@ -897,28 +897,31 @@ void cmMakefileTargetGenerator::WriteObjectRuleFiles( // Construct the compile rules. { - std::vector<std::string> compileCommands; + std::string cudaCompileMode; if (lang == "CUDA") { - std::string cmdVar; if (this->GeneratorTarget->GetPropertyAsBool( "CUDA_SEPARABLE_COMPILATION")) { - cmdVar = "CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION"; - } else if (this->GeneratorTarget->GetPropertyAsBool( - "CUDA_PTX_COMPILATION")) { - cmdVar = "CMAKE_CUDA_COMPILE_PTX_COMPILATION"; + const std::string& rdcFlag = + this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG"); + cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " "); + } + if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) { + const std::string& ptxFlag = + this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG"); + cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag); } else { - cmdVar = "CMAKE_CUDA_COMPILE_WHOLE_COMPILATION"; + const std::string& wholeFlag = + this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG"); + cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag); } - const std::string& compileRule = - this->Makefile->GetRequiredDefinition(cmdVar); - cmExpandList(compileRule, compileCommands); - } else { - const std::string cmdVar = "CMAKE_" + lang + "_COMPILE_OBJECT"; - const std::string& compileRule = - this->Makefile->GetRequiredDefinition(cmdVar); - cmExpandList(compileRule, compileCommands); + vars.CudaCompileMode = cudaCompileMode.c_str(); } + std::vector<std::string> compileCommands; + const std::string& compileRule = this->Makefile->GetRequiredDefinition( + "CMAKE_" + lang + "_COMPILE_OBJECT"); + cmExpandList(compileRule, compileCommands); + if (this->GeneratorTarget->GetPropertyAsBool("EXPORT_COMPILE_COMMANDS") && lang_can_export_cmds && compileCommands.size() == 1) { std::string compileCommand = compileCommands[0]; diff --git a/Source/cmNinjaTargetGenerator.cxx b/Source/cmNinjaTargetGenerator.cxx index 57fc020..2c45e1b 100644 --- a/Source/cmNinjaTargetGenerator.cxx +++ b/Source/cmNinjaTargetGenerator.cxx @@ -605,6 +605,7 @@ void cmNinjaTargetGenerator::WriteCompileRule(const std::string& lang, vars.TargetCompilePDB = "$TARGET_COMPILE_PDB"; vars.ObjectDir = "$OBJECT_DIR"; vars.ObjectFileDir = "$OBJECT_FILE_DIR"; + vars.CudaCompileMode = "$CUDA_COMPILE_MODE"; vars.ISPCHeader = "$ISPC_HEADER_FILE"; cmMakefile* mf = this->GetMakefile(); @@ -815,27 +816,32 @@ void cmNinjaTargetGenerator::WriteCompileRule(const std::string& lang, vars.Flags = flags.c_str(); vars.DependencyFile = rule.DepFile.c_str(); - // Rule for compiling object file. - std::vector<std::string> compileCmds; + std::string cudaCompileMode; if (lang == "CUDA") { - std::string cmdVar; if (this->GeneratorTarget->GetPropertyAsBool( "CUDA_SEPARABLE_COMPILATION")) { - cmdVar = "CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION"; - } else if (this->GeneratorTarget->GetPropertyAsBool( - "CUDA_PTX_COMPILATION")) { - cmdVar = "CMAKE_CUDA_COMPILE_PTX_COMPILATION"; + const std::string& rdcFlag = + this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_RDC_FLAG"); + cudaCompileMode = cmStrCat(cudaCompileMode, rdcFlag, " "); + } + if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) { + const std::string& ptxFlag = + this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_PTX_FLAG"); + cudaCompileMode = cmStrCat(cudaCompileMode, ptxFlag); } else { - cmdVar = "CMAKE_CUDA_COMPILE_WHOLE_COMPILATION"; + const std::string& wholeFlag = + this->Makefile->GetRequiredDefinition("_CMAKE_CUDA_WHOLE_FLAG"); + cudaCompileMode = cmStrCat(cudaCompileMode, wholeFlag); } - const std::string& compileCmd = mf->GetRequiredDefinition(cmdVar); - cmExpandList(compileCmd, compileCmds); - } else { - const std::string cmdVar = cmStrCat("CMAKE_", lang, "_COMPILE_OBJECT"); - const std::string& compileCmd = mf->GetRequiredDefinition(cmdVar); - cmExpandList(compileCmd, compileCmds); + vars.CudaCompileMode = cudaCompileMode.c_str(); } + // Rule for compiling object file. + std::vector<std::string> compileCmds; + const std::string cmdVar = cmStrCat("CMAKE_", lang, "_COMPILE_OBJECT"); + const std::string& compileCmd = mf->GetRequiredDefinition(cmdVar); + cmExpandList(compileCmd, compileCmds); + // See if we need to use a compiler launcher like ccache or distcc std::string compilerLauncher; if (!compileCmds.empty() && diff --git a/Source/cmRulePlaceholderExpander.cxx b/Source/cmRulePlaceholderExpander.cxx index 7480aeb..4cee09d 100644 --- a/Source/cmRulePlaceholderExpander.cxx +++ b/Source/cmRulePlaceholderExpander.cxx @@ -85,6 +85,11 @@ std::string cmRulePlaceholderExpander::ExpandRuleVariable( return replaceValues.ObjectsQuoted; } } + if (replaceValues.CudaCompileMode) { + if (variable == "CUDA_COMPILE_MODE") { + return replaceValues.CudaCompileMode; + } + } if (replaceValues.AIXExports) { if (variable == "AIX_EXPORTS") { return replaceValues.AIXExports; diff --git a/Source/cmRulePlaceholderExpander.h b/Source/cmRulePlaceholderExpander.h index c22e0fa..852954f 100644 --- a/Source/cmRulePlaceholderExpander.h +++ b/Source/cmRulePlaceholderExpander.h @@ -65,6 +65,7 @@ public: const char* SwiftOutputFileMap = nullptr; const char* SwiftSources = nullptr; const char* ISPCHeader = nullptr; + const char* CudaCompileMode = nullptr; const char* Fatbinary = nullptr; const char* RegisterFile = nullptr; const char* Launcher = nullptr; diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx index b792c03..4e93a4a 100644 --- a/Source/cmVisualStudio10TargetGenerator.cxx +++ b/Source/cmVisualStudio10TargetGenerator.cxx @@ -3211,18 +3211,17 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions( // the default to not have any extension cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).obj"); - bool notPtx = true; if (this->GeneratorTarget->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION")) { cudaOptions.AddFlag("GenerateRelocatableDeviceCode", "true"); - } else if (this->GeneratorTarget->GetPropertyAsBool( - "CUDA_PTX_COMPILATION")) { + } + bool notPtx = true; + if (this->GeneratorTarget->GetPropertyAsBool("CUDA_PTX_COMPILATION")) { cudaOptions.AddFlag("NvccCompilation", "ptx"); // We drop the %(Extension) component as CMake expects all PTX files // to not have the source file extension at all cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx"); notPtx = false; } - if (notPtx && cmSystemTools::VersionCompareGreaterEq( "8.0", this->GlobalGenerator->GetPlatformToolsetCudaString())) { diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index a3fb409..65dfebb 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -16,6 +16,7 @@ add_cuda_test_macro(CudaOnly.WithDefs CudaOnlyWithDefs) add_cuda_test_macro(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine) add_cuda_test_macro(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) add_cuda_test_macro(CudaOnly.SeparateCompilation main/CudaOnlySeparateCompilation) +add_cuda_test_macro(CudaOnly.SeparateCompilationPTX CudaOnlySeparateCompilationPTX) if(CMake_TEST_CUDA AND NOT CMake_TEST_CUDA STREQUAL "Clang") # Clang doesn't have flags for selecting the runtime. diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt index e7e7bc4..f1667af 100644 --- a/Tests/CudaOnly/ExportPTX/CMakeLists.txt +++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt @@ -56,7 +56,7 @@ add_custom_command( "-DBIN_TO_C_COMMAND=${bin_to_c}" "-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>" "-DOUTPUT=${output_file}" - -P ${CMAKE_CURRENT_SOURCE_DIR}/bin2c_wrapper.cmake + -P ${CMAKE_CURRENT_SOURCE_DIR}/../utils/bin2c_wrapper.cmake VERBATIM DEPENDS $<TARGET_OBJECTS:CudaPTX> COMMENT "Converting Object files to a C header" diff --git a/Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt b/Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt new file mode 100644 index 0000000..273f955 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilationPTX/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.19) +project (SeparateCompPTX CUDA) + +#Goal for this example: +# How to generate PTX files with RDC enabled + +# PTX can be compiled only for a single virtual architecture at a time +list(POP_FRONT CMAKE_CUDA_ARCHITECTURES temp) +set(CMAKE_CUDA_ARCHITECTURES ${temp}) +string(APPEND CMAKE_CUDA_ARCHITECTURES "-virtual") + +add_library(CudaPTX OBJECT kernels.cu) +set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON) +set_property(TARGET CudaPTX PROPERTY CUDA_SEPARABLE_COMPILATION ON) + + +set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h) + +find_package(CUDAToolkit REQUIRED) +find_program(bin_to_c + NAMES bin2c + PATHS ${CUDAToolkit_BIN_DIR} + ) +if(NOT bin_to_c) + message(FATAL_ERROR + "bin2c not found:\n" + " CUDAToolkit_BIN_DIR='${CUDAToolkit_BIN_DIR}'\n" + ) +endif() + +add_custom_command( + OUTPUT "${output_file}" + COMMAND ${CMAKE_COMMAND} + "-DBIN_TO_C_COMMAND=${bin_to_c}" + "-DOBJECTS=$<TARGET_OBJECTS:CudaPTX>" + "-DOUTPUT=${output_file}" + -P ${CMAKE_CURRENT_SOURCE_DIR}/../utils/bin2c_wrapper.cmake + VERBATIM + DEPENDS $<TARGET_OBJECTS:CudaPTX> + COMMENT "Converting Object files to a C header" + ) + +add_executable(CudaOnlySeparateCompilationPTX main.cu ${output_file}) +target_compile_features(CudaOnlySeparateCompilationPTX PRIVATE cuda_std_11) +target_include_directories(CudaOnlySeparateCompilationPTX PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} ) +target_link_libraries(CudaOnlySeparateCompilationPTX PRIVATE CUDA::cuda_driver) +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlySeparateCompilationPTX PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/SeparateCompilationPTX/kernels.cu b/Tests/CudaOnly/SeparateCompilationPTX/kernels.cu new file mode 100644 index 0000000..f4a52d4 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilationPTX/kernels.cu @@ -0,0 +1,14 @@ + +__global__ void kernelA(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} + +__global__ void kernelB(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/SeparateCompilationPTX/main.cu b/Tests/CudaOnly/SeparateCompilationPTX/main.cu new file mode 100644 index 0000000..164cde5 --- /dev/null +++ b/Tests/CudaOnly/SeparateCompilationPTX/main.cu @@ -0,0 +1,30 @@ +#include <iostream> + +#include <cuda.h> + +#include "embedded_objs.h" + +int main() +{ + cuInit(0); + int count = 0; + cuDeviceGetCount(&count); + if (count == 0) { + std::cerr << "No CUDA devices found\n"; + return 1; + } + + CUdevice device; + cuDeviceGet(&device, 0); + + CUcontext context; + cuCtxCreate(&context, 0, device); + + CUmodule module; + cuModuleLoadData(&module, kernels); + if (module == nullptr) { + std::cerr << "Failed to load the embedded ptx" << std::endl; + return 1; + } + std::cout << module << std::endl; +} diff --git a/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake b/Tests/CudaOnly/utils/bin2c_wrapper.cmake index 0baf934..0baf934 100644 --- a/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake +++ b/Tests/CudaOnly/utils/bin2c_wrapper.cmake |