diff options
author | Brad King <brad.king@kitware.com> | 2022-02-02 13:23:05 (GMT) |
---|---|---|
committer | Kitware Robot <kwrobot@kitware.com> | 2022-02-02 13:23:19 (GMT) |
commit | 92459258108c0036808f29f83a8503fea18ccee9 (patch) | |
tree | c2f302d41c63a149b510c97c74a664f980aa0808 /Modules | |
parent | 0f316675cfb572058b2077e72a8ecbb6b5c7b534 (diff) | |
parent | 8f64df0a7c2c9126017847f2bb8d37bc54ea0338 (diff) | |
download | CMake-92459258108c0036808f29f83a8503fea18ccee9.zip CMake-92459258108c0036808f29f83a8503fea18ccee9.tar.gz CMake-92459258108c0036808f29f83a8503fea18ccee9.tar.bz2 |
Merge topic 'cuda_generic_arch_all'
8f64df0a7c CUDA: Generic all and all-major support
Acked-by: Kitware Robot <kwrobot@kitware.com>
Acked-by: Alex <leha-bot@yandex.ru>
Merge-request: !6816
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/CMakeCUDACompiler.cmake.in | 4 | ||||
-rw-r--r-- | Modules/CMakeDetermineCUDACompiler.cmake | 101 | ||||
-rw-r--r-- | Modules/CUDA/architectures.cmake | 46 | ||||
-rw-r--r-- | Modules/FindCUDAToolkit.cmake | 77 |
4 files changed, 150 insertions, 78 deletions
diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in index 2f3e9a8..9f2e213 100644 --- a/Modules/CMakeCUDACompiler.cmake.in +++ b/Modules/CMakeCUDACompiler.cmake.in @@ -50,8 +50,12 @@ endif() set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@") set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT@") +set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "@CMAKE_CUDA_COMPILER_TOOLKIT_VERSION@") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@") +set(CMAKE_CUDA_ARCHITECTURES_ALL "@CMAKE_CUDA_ARCHITECTURES_ALL@") +set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR "@CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR@") + set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@") set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES@") diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake index 8fe07fe..c21d622 100644 --- a/Modules/CMakeDetermineCUDACompiler.cmake +++ b/Modules/CMakeDetermineCUDACompiler.cmake @@ -78,10 +78,11 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) message(FATAL_ERROR "Clang with CUDA is not yet supported on Windows. See CMake issue #20776.") endif() - # Find the CUDA toolkit. We store the CMAKE_CUDA_COMPILER_TOOLKIT_ROOT and CMAKE_CUDA_COMPILER_LIBRARY_ROOT - # in CMakeCUDACompiler.cmake, so FindCUDAToolkit can avoid searching on future runs and the toolkit stays the same. + # Find the CUDA toolkit. We store the CMAKE_CUDA_COMPILER_TOOLKIT_ROOT, CMAKE_CUDA_COMPILER_TOOLKIT_VERSION and + # CMAKE_CUDA_COMPILER_LIBRARY_ROOT in CMakeCUDACompiler.cmake so FindCUDAToolkit can avoid searching on future + # runs and the toolkit is the same. # This is very similar to FindCUDAToolkit, but somewhat simplified since we can issue fatal errors - # if we fail to find things we need and we don't need to account for searching the libraries. + # if we fail and we don't need to account for searching the libraries. # For NVCC we can easily deduce the SDK binary directory from the compiler path. if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") @@ -237,6 +238,21 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) endif() endif() + # For regular nvcc we the toolkit version is the same as the compiler version and we can parse it from the vendor test output. + # For Clang we need to invoke nvcc to get version output. + if(NOT CMAKE_GENERATOR MATCHES "Visual Studio") + if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang") + execute_process(COMMAND ${_CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE CMAKE_CUDA_COMPILER_ID_OUTPUT) + endif() + + if(CMAKE_CUDA_COMPILER_ID_OUTPUT MATCHES [=[V([0-9]+\.[0-9]+\.[0-9]+)]=]) + set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "${CMAKE_MATCH_1}") + endif() + + # Make the all and all-major architecture information available. + include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake) + endif() + set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v") if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") @@ -256,33 +272,41 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) endif() endif() - # Append user-specified architectures. - if(DEFINED CMAKE_CUDA_ARCHITECTURES) - if("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall") + # Detect explicit architectures and add them during detection. + if(DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all" AND NOT "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major") + set(architectures_explicit TRUE) + set(architectures_test ${CMAKE_CUDA_ARCHITECTURES}) + endif() + + # For sufficiently new NVCC we can just use the all and all-major flags. + # For VS we don't test since we can't figure out the version this early (see #23161). + # For others select based on version. + if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.5) + if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all") string(APPEND nvcc_test_flags " -arch=all") - set(architectures_mode all) - elseif("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall-major") + elseif("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major") string(APPEND nvcc_test_flags " -arch=all-major") - set(architectures_mode all-major) - else() - set(architectures_mode explicit) - foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) - # Strip specifiers as PTX vs binary doesn't matter. - string(REGEX MATCH "[0-9]+" arch_name "${arch}") - string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}") - string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}") - list(APPEND tested_architectures "${arch_name}") - endforeach() endif() - - # If the user has specified architectures we'll want to fail during compiler detection if they don't work. - set(CMAKE_CUDA_COMPILER_ID_REQUIRE_SUCCESS ON) + elseif(NOT CMAKE_GENERATOR MATCHES "Visual Studio") + if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all") + set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL}) + elseif("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major") + set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}) + endif() endif() + foreach(arch ${architectures_test}) + # Strip specifiers as PTX vs binary doesn't matter. + string(REGEX MATCH "[0-9]+" arch_name "${arch}") + string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}") + string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}") + list(APPEND architectures_tested "${arch_name}") + endforeach() + # Rest of the code treats an empty value as equivalent to "use the defaults". # Error out early to prevent confusing errors as a result of this. # Note that this also catches invalid non-numerical values such as "a". - if(architectures_mode STREQUAL "explicit" AND "${tested_architectures}" STREQUAL "") + if(DEFINED architectures_explicit AND "${architectures_tested}" STREQUAL "") message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES must be valid if set.") endif() @@ -318,6 +342,10 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER}" DIRECTORY) get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY) set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}") + + # We now know the version, so make the architecture variables available. + set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION ${CMAKE_CUDA_COMPILER_VERSION}) + include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake) endif() _cmake_find_compiler_sysroot(CUDA) @@ -604,38 +632,27 @@ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "") message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") endif() endif() -elseif(architectures AND (architectures_mode STREQUAL "xall" OR - architectures_mode STREQUAL "xall-major")) - if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") - message(FATAL_ERROR - "The CMAKE_CUDA_ARCHITECTURES:\n" - " ${CMAKE_CUDA_ARCHITECTURES}\n" - "is not supported with the ${CMAKE_CUDA_COMPILER_ID} compiler. Try:\n" - " ${architectures}\n" - "instead.") - endif() - -elseif(architectures_mode STREQUAL "xexplicit") +else() # Sort since order mustn't matter. list(SORT architectures_detected) - list(SORT tested_architectures) + list(SORT architectures_tested) # We don't distinguish real/virtual architectures during testing. - # For "70-real;70-virtual" we detect "70" as working and tested_architectures is "70;70". + # For "70-real;70-virtual" we detect "70" as working and architectures_tested is "70;70". # Thus we need to remove duplicates before checking if they're equal. - list(REMOVE_DUPLICATES tested_architectures) + list(REMOVE_DUPLICATES architectures_tested) # Print the actual architectures for generic values (all and all-major). if(NOT DEFINED architectures_explicit) - set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${tested_architectures})") + set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${architectures_tested})") else() - set(architectures_error "${tested_architectures}") + set(architectures_error "${architectures_tested}") endif() - if(NOT "${architectures_detected}" STREQUAL "${tested_architectures}") + if(NOT "${architectures_detected}" STREQUAL "${architectures_tested}") message(FATAL_ERROR "The CMAKE_CUDA_ARCHITECTURES:\n" - " ${CMAKE_CUDA_ARCHITECTURES}\n" + " ${architectures_error}\n" "do not all work with this compiler. Try:\n" " ${architectures_detected}\n" "instead.") @@ -655,7 +672,7 @@ unset(_CUDA_LIBRARY_DIR) unset(_CUDA_TARGET_DIR) unset(_CUDA_TARGET_NAME) -unset(architectures_mode) +unset(architectures_explicit) set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX") set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX") diff --git a/Modules/CUDA/architectures.cmake b/Modules/CUDA/architectures.cmake new file mode 100644 index 0000000..fa3a5a1 --- /dev/null +++ b/Modules/CUDA/architectures.cmake @@ -0,0 +1,46 @@ +# See supported GPUs on Wikipedia +# https://en.wikipedia.org/wiki/CUDA#GPUs_supported + +# Initial set based on CUDA 7.0. +set(CMAKE_CUDA_ARCHITECTURES_ALL 20 21 30 35 37 50 52 53) +set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 20 30 35 50) + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 8.0) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 60 61 62) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 60) +endif() + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 9.0) + if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 6.0) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 70 72) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 70) + endif() + + list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL 20 21) + list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 20 21) +endif() + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 10.0 + AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 75) +endif() + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.0) + if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 80) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 80) + endif() + + list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL 30) + list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 30) +endif() + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.1 + AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 86) +endif() + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4 + AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87) +endif() diff --git a/Modules/FindCUDAToolkit.cmake b/Modules/FindCUDAToolkit.cmake index 573f956..7ecc9d4 100644 --- a/Modules/FindCUDAToolkit.cmake +++ b/Modules/FindCUDAToolkit.cmake @@ -499,12 +499,17 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT) set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}") set(CUDAToolkit_BIN_DIR "${CUDAToolkit_ROOT_DIR}/bin") set(CUDAToolkit_NVCC_EXECUTABLE "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}") -else() + set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}") + if(CUDAToolkit_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + endif() +else() function(_CUDAToolkit_find_root_dir ) cmake_parse_arguments(arg "" "" "SEARCH_PATHS;FIND_FLAGS" ${ARGN}) - if(NOT CUDAToolkit_BIN_DIR) if(NOT CUDAToolkit_SENTINEL_FILE) find_program(CUDAToolkit_NVCC_EXECUTABLE @@ -687,6 +692,40 @@ else() get_filename_component(CUDAToolkit_LIBRARY_ROOT "${_CUDAToolkit_version_file}" DIRECTORY ABSOLUTE) endif() unset(_CUDAToolkit_version_file) + + if(CUDAToolkit_NVCC_EXECUTABLE AND + CMAKE_CUDA_COMPILER_VERSION AND + CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER) + # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value + # This if statement will always match, but is used to provide variables for MATCH 1,2,3... + if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}") + endif() + elseif(CUDAToolkit_NVCC_EXECUTABLE) + # Compute the version by invoking nvcc + execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) + if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + endif() + unset(NVCC_OUT) + else() + _CUDAToolkit_find_version_file(version_file) + if(version_file) + file(READ "${version_file}" VERSION_INFO) + if(VERSION_INFO MATCHES [=[CUDA Version ([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + endif() + endif() + endif() endif() # Find target directory when crosscompiling. @@ -754,40 +793,6 @@ if(NOT EXISTS "${CUDAToolkit_INCLUDE_DIR}/cublas_v2.h") endif() endif() -if(CUDAToolkit_NVCC_EXECUTABLE AND - CMAKE_CUDA_COMPILER_VERSION AND - CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER) - # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value - # This if statement will always match, but is used to provide variables for MATCH 1,2,3... - if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) - set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") - set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") - set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") - set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}") - endif() -elseif(CUDAToolkit_NVCC_EXECUTABLE) - # Compute the version by invoking nvcc - execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) - if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=]) - set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") - set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") - set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") - set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") - endif() - unset(NVCC_OUT) -else() - _CUDAToolkit_find_version_file(version_file) - if(version_file) - file(READ "${version_file}" VERSION_INFO) - if(VERSION_INFO MATCHES [=[CUDA Version ([0-9]+)\.([0-9]+)\.([0-9]+)]=]) - set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") - set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") - set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") - set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") - endif() - endif() -endif() - # Find the CUDA Runtime Library libcudart find_library(CUDA_CUDART NAMES cudart |