diff options
author | Brad King <brad.king@kitware.com> | 2022-03-04 18:51:49 (GMT) |
---|---|---|
committer | Brad King <brad.king@kitware.com> | 2022-03-10 14:27:29 (GMT) |
commit | d1b48bfabd6157309b3056967e6e30cc0ce07983 (patch) | |
tree | 3ce109b078bf5da4d866f6e2994bbce8246e4df1 | |
parent | 632752d62e8de2730796f509dbb10551351309c2 (diff) | |
download | CMake-d1b48bfabd6157309b3056967e6e30cc0ce07983.zip CMake-d1b48bfabd6157309b3056967e6e30cc0ce07983.tar.gz CMake-d1b48bfabd6157309b3056967e6e30cc0ce07983.tar.bz2 |
CUDA: Add support for CUDA_ARCHITECTURES=native
CUDA 11.6 added the `nvcc -arch=native` flag to automatically compile
for the host GPUs' architectures. Add support for specifying this
special `native` value in `CMAKE_CUDA_ARCHITECTURES` and
`CUDA_ARCHITECTURES`. During the compiler ABI detection step,
detect the native architectures so we can pass them explicitly
when using Clang or older versions of nvcc.
Fixes: #22375
19 files changed, 177 insertions, 7 deletions
diff --git a/Help/prop_tgt/CUDA_ARCHITECTURES.rst b/Help/prop_tgt/CUDA_ARCHITECTURES.rst index 191f78f..05c2599 100644 --- a/Help/prop_tgt/CUDA_ARCHITECTURES.rst +++ b/Help/prop_tgt/CUDA_ARCHITECTURES.rst @@ -34,6 +34,11 @@ The ``CUDA_ARCHITECTURES`` may be set to one of the following special values: Compile for all supported major real architectures, and the highest major virtual architecture. +``native`` + .. versionadded:: 3.24 + + Compile for the architecture(s) of the host's GPU(s). + Examples ^^^^^^^^ diff --git a/Help/release/dev/cuda-arch-native.rst b/Help/release/dev/cuda-arch-native.rst new file mode 100644 index 0000000..f44a668 --- /dev/null +++ b/Help/release/dev/cuda-arch-native.rst @@ -0,0 +1,7 @@ +cuda-arch-native +---------------- + +* The :variable:`CMAKE_CUDA_ARCHITECTURES` variable and associated + :prop_tgt:`CUDA_ARCHITECTURES` target property now support the + special ``native`` value to compile for the architectures(s) + of the host's GPU(s). diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in index 9f2e213..57d595a 100644 --- a/Modules/CMakeCUDACompiler.cmake.in +++ b/Modules/CMakeCUDACompiler.cmake.in @@ -55,6 +55,7 @@ set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@") set(CMAKE_CUDA_ARCHITECTURES_ALL "@CMAKE_CUDA_ARCHITECTURES_ALL@") set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR "@CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR@") +set(CMAKE_CUDA_ARCHITECTURES_NATIVE "@CMAKE_CUDA_ARCHITECTURES_NATIVE@") set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@") diff --git a/Modules/CMakeCUDACompilerABI.cu b/Modules/CMakeCUDACompilerABI.cu index 449a079..8463e86 100644 --- a/Modules/CMakeCUDACompilerABI.cu +++ b/Modules/CMakeCUDACompilerABI.cu @@ -2,6 +2,10 @@ # error "A C or C++ compiler has been selected for CUDA" #endif +#include <cstdio> + +#include <cuda_runtime.h> + #include "CMakeCompilerABI.h" int main(int argc, char* argv[]) @@ -13,6 +17,31 @@ int main(int argc, char* argv[]) #if defined(ABI_ID) require += info_abi[argc]; #endif - (void)argv; - return require; + static_cast<void>(argv); + + int count = 0; + if (cudaGetDeviceCount(&count) != cudaSuccess || count == 0) { + std::fprintf(stderr, "No CUDA devices found.\n"); + return -1; + } + + int found = 0; + const char* sep = ""; + for (int device = 0; device < count; ++device) { + cudaDeviceProp prop; + if (cudaGetDeviceProperties(&prop, device) == cudaSuccess) { + std::printf("%s%d%d", sep, prop.major, prop.minor); + sep = ";"; + found = 1; + } + } + + if (!found) { + std::fprintf(stderr, "No CUDA architecture detected from any devices.\n"); + // Convince the compiler that the non-zero return value depends + // on the info strings so they are not optimized out. + return require ? -1 : 1; + } + + return 0; } diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake index 4263bed..f68c4b2 100644 --- a/Modules/CMakeDetermineCUDACompiler.cmake +++ b/Modules/CMakeDetermineCUDACompiler.cmake @@ -249,7 +249,7 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "${CMAKE_MATCH_1}") endif() - # Make the all and all-major architecture information available. + # Make the all, all-major, and native architecture information available. # FIXME(#23161): Defer architecture detection until compiler testing. include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake) endif() @@ -291,6 +291,17 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}) endif() endif() + elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "native") + # For sufficiently new NVCC we can just use the 'native' value directly. + # For VS we don't test since we can't find nvcc this early (see #23161). + if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.6) + string(APPEND nvcc_test_flags " -arch=${CMAKE_CUDA_ARCHITECTURES}") + set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}") + elseif(CMAKE_GENERATOR MATCHES "Visual Studio") + set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}") + else() + set(architectures_test ${_CUDA_ARCHITECTURES_NATIVE}) + endif() elseif(CMAKE_CUDA_ARCHITECTURES OR "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "") # Explicit architectures. Test them during detection. set(architectures_explicit TRUE) @@ -636,7 +647,7 @@ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "") message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") endif() endif() -elseif(CMAKE_CUDA_ARCHITECTURES AND NOT "${architectures_tested}" MATCHES "^(all|all-major)$") +elseif(CMAKE_CUDA_ARCHITECTURES AND NOT "${architectures_tested}" MATCHES "^(all|all-major|native)$") # Sort since order mustn't matter. list(SORT architectures_detected) list(SORT architectures_tested) diff --git a/Modules/CMakeDetermineCompilerABI.cmake b/Modules/CMakeDetermineCompilerABI.cmake index 8191d81..82a6d21 100644 --- a/Modules/CMakeDetermineCompilerABI.cmake +++ b/Modules/CMakeDetermineCompilerABI.cmake @@ -26,6 +26,14 @@ function(CMAKE_DETERMINE_COMPILER_ABI lang src) if(DEFINED CMAKE_${lang}_VERBOSE_COMPILE_FLAG) set(COMPILE_DEFINITIONS "${CMAKE_${lang}_VERBOSE_COMPILE_FLAG}") endif() + if(lang STREQUAL "CUDA") + if(CMAKE_CUDA_ARCHITECTURES STREQUAL "native") + # We are about to detect the native architectures, so we do + # not yet know them. Use all architectures during detection. + set(CMAKE_CUDA_ARCHITECTURES "all") + endif() + set(CMAKE_CUDA_RUNTIME_LIBRARY "Static") + endif() if(NOT "x${CMAKE_${lang}_COMPILER_ID}" STREQUAL "xMSVC") # Avoid adding our own platform standard libraries for compilers # from which we might detect implicit link libraries. diff --git a/Modules/CMakeDetermineCompilerId.cmake b/Modules/CMakeDetermineCompilerId.cmake index 5670509..0317f34 100644 --- a/Modules/CMakeDetermineCompilerId.cmake +++ b/Modules/CMakeDetermineCompilerId.cmake @@ -495,7 +495,7 @@ Id flags: ${testflags} ${CMAKE_${lang}_COMPILER_ID_FLAGS_ALWAYS} if(CMAKE_VS_PLATFORM_NAME STREQUAL x64) set(cuda_target "<TargetMachinePlatform>64</TargetMachinePlatform>") endif() - if(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major)$") + if(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major|native)$") foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) string(REGEX MATCH "[0-9]+" arch_name "${arch}") string(APPEND cuda_codegen "compute_${arch_name},sm_${arch_name};") diff --git a/Modules/CMakeTestCUDACompiler.cmake b/Modules/CMakeTestCUDACompiler.cmake index 25a3653..95f015f 100644 --- a/Modules/CMakeTestCUDACompiler.cmake +++ b/Modules/CMakeTestCUDACompiler.cmake @@ -21,6 +21,31 @@ if(CMAKE_CUDA_ABI_COMPILED) # The compiler worked so skip dedicated test below. set(CMAKE_CUDA_COMPILER_WORKS TRUE) message(STATUS "Check for working CUDA compiler: ${CMAKE_CUDA_COMPILER} - skipped") + + # Run the test binary to detect the native architectures. + execute_process(COMMAND "${CMAKE_PLATFORM_INFO_DIR}/CMakeDetermineCompilerABI_CUDA.bin" + RESULT_VARIABLE _CUDA_ARCHS_RESULT + OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT + ERROR_VARIABLE _CUDA_ARCHS_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(_CUDA_ARCHS_RESULT EQUAL 0) + set(CMAKE_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}") + list(REMOVE_DUPLICATES CMAKE_CUDA_ARCHITECTURES_NATIVE) + else() + if(NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+") + set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})") + else() + set(_CUDA_ARCHS_STATUS "") + endif() + string(REPLACE "\n" "\n " _CUDA_ARCHS_OUTPUT " ${_CUDA_ARCHS_OUTPUT}") + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log + "Detecting the CUDA native architecture(s) failed with " + "the following output:\n${_CUDA_ARCHS_OUTPUT}\n\n") + endif() + unset(_CUDA_ARCHS_EXE) + unset(_CUDA_ARCHS_RESULT) + unset(_CUDA_ARCHS_OUTPUT) endif() # This file is used by EnableLanguage in cmGlobalGenerator to diff --git a/Modules/CUDA/architectures.cmake b/Modules/CUDA/architectures.cmake index fa3a5a1..521243b 100644 --- a/Modules/CUDA/architectures.cmake +++ b/Modules/CUDA/architectures.cmake @@ -44,3 +44,43 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4 AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")) list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87) endif() + +# FIXME(#23161): Detect architectures early since we test them during +# compiler detection. We already have code to detect them later during +# compiler testing, so we should not need to do this here. +if(NOT CMAKE_GENERATOR MATCHES "Visual Studio") + set(_CUDA_ARCHS_EXE "${CMAKE_PLATFORM_INFO_DIR}/CMakeDetermineCUDACompilerArchs.bin") + execute_process( + COMMAND "${_CUDA_NVCC_EXECUTABLE}" -o "${_CUDA_ARCHS_EXE}" --cudart=static "${CMAKE_ROOT}/Modules/CMakeCUDACompilerABI.cu" + RESULT_VARIABLE _CUDA_ARCHS_RESULT + OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT + ERROR_VARIABLE _CUDA_ARCHS_OUTPUT + ) + if(_CUDA_ARCHS_RESULT EQUAL 0) + execute_process( + COMMAND "${_CUDA_ARCHS_EXE}" + RESULT_VARIABLE _CUDA_ARCHS_RESULT + OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT + ERROR_VARIABLE _CUDA_ARCHS_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + endif() + if(_CUDA_ARCHS_RESULT EQUAL 0) + set(_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}") + list(REMOVE_DUPLICATES _CUDA_ARCHITECTURES_NATIVE) + else() + if (NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+") + set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})") + else() + set(_CUDA_ARCHS_STATUS "") + endif() + string(REPLACE "\n" "\n " _CUDA_ARCHS_OUTPUT " ${_CUDA_ARCHS_OUTPUT}") + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log + "Detecting the CUDA native architecture(s) failed with " + "the following output:\n${_CUDA_ARCHS_OUTPUT}\n\n") + set(_CUDA_ARCHS_OUTPUT "") + endif() + unset(_CUDA_ARCHS_EXE) + unset(_CUDA_ARCHS_RESULT) + unset(_CUDA_ARCHS_OUTPUT) +endif() diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx index 8a6cc1c..ab3ed12 100644 --- a/Source/cmGeneratorTarget.cxx +++ b/Source/cmGeneratorTarget.cxx @@ -3467,6 +3467,23 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const property = *this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR"); } + } else if (property == "native") { + cmValue native = + this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_NATIVE"); + if (native.IsEmpty()) { + this->Makefile->IssueMessage( + MessageType::FATAL_ERROR, + "CUDA_ARCHITECTURES is set to \"native\", but no GPU was detected."); + } + if (compiler == "NVIDIA" && + cmSystemTools::VersionCompare( + cmSystemTools::OP_GREATER_EQUAL, + this->Makefile->GetDefinition("CMAKE_CUDA_COMPILER_VERSION"), + "11.6")) { + flags = cmStrCat(flags, " -arch=", property); + return; + } + property = *native; } struct CudaArchitecture diff --git a/Source/cmVisualStudioGeneratorOptions.cxx b/Source/cmVisualStudioGeneratorOptions.cxx index 9045a4d..d5d08b8 100644 --- a/Source/cmVisualStudioGeneratorOptions.cxx +++ b/Source/cmVisualStudioGeneratorOptions.cxx @@ -182,7 +182,8 @@ void cmVisualStudioGeneratorOptions::FixCudaCodeGeneration() // First entries for the -arch=<arch> [-code=<code>,...] pair. if (!arch.empty()) { std::string arch_name = arch[0]; - if (arch_name == "all" || arch_name == "all-major") { + if (arch_name == "all" || arch_name == "all-major" || + arch_name == "native") { AppendFlagString("AdditionalOptions", "-arch=" + arch_name); return; } diff --git a/Tests/CudaOnly/ArchSpecial/CMakeLists.txt b/Tests/CudaOnly/ArchSpecial/CMakeLists.txt index 11f4292..46f4ada 100644 --- a/Tests/CudaOnly/ArchSpecial/CMakeLists.txt +++ b/Tests/CudaOnly/ArchSpecial/CMakeLists.txt @@ -25,6 +25,7 @@ function(verify_output flag) endforeach() list(SORT command_archs) + list(REMOVE_DUPLICATES command_archs) if(NOT "${command_archs}" STREQUAL "${architectures}") message(FATAL_ERROR "Architectures used for \"${flag}\" don't match the reference (\"${command_archs}\" != \"${architectures}\").") endif() @@ -50,7 +51,17 @@ try_compile(all_major_archs_compiles ) verify_output(all-major) -if(all_archs_compiles AND all_major_archs_compiles) +set(CMAKE_CUDA_ARCHITECTURES native) +try_compile(native_archs_compiles + ${CMAKE_CURRENT_BINARY_DIR}/try_compile/native_archs_compiles + ${CMAKE_CURRENT_SOURCE_DIR}/main.cu + COMPILE_DEFINITIONS ${try_compile_flags} + OUTPUT_VARIABLE output + ) +verify_output(native) + +if(all_archs_compiles AND all_major_archs_compiles AND native_archs_compiles) + set(CMAKE_CUDA_ARCHITECTURES all) add_executable(CudaOnlyArchSpecial main.cu) target_compile_options(CudaOnlyArchSpecial PRIVATE ${compile_options}) endif() diff --git a/Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake b/Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake index 6fecae7..e37d025 100644 --- a/Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake +++ b/Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake @@ -2,6 +2,7 @@ include(RunCMake) run_cmake(architectures-all) run_cmake(architectures-all-major) +run_cmake(architectures-native) run_cmake(architectures-empty) run_cmake(architectures-invalid) diff --git a/Tests/RunCMake/CUDA_architectures/architectures-all-major-stdout.txt b/Tests/RunCMake/CUDA_architectures/architectures-all-major-stdout.txt index c5cde8f..ee0a5f7 100644 --- a/Tests/RunCMake/CUDA_architectures/architectures-all-major-stdout.txt +++ b/Tests/RunCMake/CUDA_architectures/architectures-all-major-stdout.txt @@ -1,3 +1,4 @@ -- CMAKE_CUDA_ARCHITECTURES='all-major' -- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+' -- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+' +-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+' diff --git a/Tests/RunCMake/CUDA_architectures/architectures-all-major.cmake b/Tests/RunCMake/CUDA_architectures/architectures-all-major.cmake index 5112473..7203c98 100644 --- a/Tests/RunCMake/CUDA_architectures/architectures-all-major.cmake +++ b/Tests/RunCMake/CUDA_architectures/architectures-all-major.cmake @@ -3,3 +3,4 @@ enable_language(CUDA) message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'") message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'") message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'") +message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'") diff --git a/Tests/RunCMake/CUDA_architectures/architectures-all-stdout.txt b/Tests/RunCMake/CUDA_architectures/architectures-all-stdout.txt index aba26b9..fd738e4 100644 --- a/Tests/RunCMake/CUDA_architectures/architectures-all-stdout.txt +++ b/Tests/RunCMake/CUDA_architectures/architectures-all-stdout.txt @@ -1,3 +1,4 @@ -- CMAKE_CUDA_ARCHITECTURES='all' -- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+' -- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+' +-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+' diff --git a/Tests/RunCMake/CUDA_architectures/architectures-all.cmake b/Tests/RunCMake/CUDA_architectures/architectures-all.cmake index 32175f6..0c3a4e9 100644 --- a/Tests/RunCMake/CUDA_architectures/architectures-all.cmake +++ b/Tests/RunCMake/CUDA_architectures/architectures-all.cmake @@ -3,3 +3,4 @@ enable_language(CUDA) message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'") message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'") message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'") +message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'") diff --git a/Tests/RunCMake/CUDA_architectures/architectures-native-stdout.txt b/Tests/RunCMake/CUDA_architectures/architectures-native-stdout.txt new file mode 100644 index 0000000..af49e00 --- /dev/null +++ b/Tests/RunCMake/CUDA_architectures/architectures-native-stdout.txt @@ -0,0 +1,4 @@ +-- CMAKE_CUDA_ARCHITECTURES='native' +-- CMAKE_CUDA_ARCHITECTURES_ALL='[0-9;]+' +-- CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='[0-9;]+' +-- CMAKE_CUDA_ARCHITECTURES_NATIVE='[0-9;]+' diff --git a/Tests/RunCMake/CUDA_architectures/architectures-native.cmake b/Tests/RunCMake/CUDA_architectures/architectures-native.cmake new file mode 100644 index 0000000..64afe13 --- /dev/null +++ b/Tests/RunCMake/CUDA_architectures/architectures-native.cmake @@ -0,0 +1,6 @@ +set(CMAKE_CUDA_ARCHITECTURES "native") +enable_language(CUDA) +message(STATUS "CMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'") +message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL='${CMAKE_CUDA_ARCHITECTURES_ALL}'") +message(STATUS "CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR='${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}'") +message(STATUS "CMAKE_CUDA_ARCHITECTURES_NATIVE='${CMAKE_CUDA_ARCHITECTURES_NATIVE}'") |