diff options
author | Brad King <brad.king@kitware.com> | 2022-02-02 13:23:05 (GMT) |
---|---|---|
committer | Kitware Robot <kwrobot@kitware.com> | 2022-02-02 13:23:19 (GMT) |
commit | 92459258108c0036808f29f83a8503fea18ccee9 (patch) | |
tree | c2f302d41c63a149b510c97c74a664f980aa0808 | |
parent | 0f316675cfb572058b2077e72a8ecbb6b5c7b534 (diff) | |
parent | 8f64df0a7c2c9126017847f2bb8d37bc54ea0338 (diff) | |
download | CMake-92459258108c0036808f29f83a8503fea18ccee9.zip CMake-92459258108c0036808f29f83a8503fea18ccee9.tar.gz CMake-92459258108c0036808f29f83a8503fea18ccee9.tar.bz2 |
Merge topic 'cuda_generic_arch_all'
8f64df0a7c CUDA: Generic all and all-major support
Acked-by: Kitware Robot <kwrobot@kitware.com>
Acked-by: Alex <leha-bot@yandex.ru>
Merge-request: !6816
-rw-r--r-- | Help/release/dev/cuda-new-arch-modes.rst | 6 | ||||
-rw-r--r-- | Modules/CMakeCUDACompiler.cmake.in | 4 | ||||
-rw-r--r-- | Modules/CMakeDetermineCUDACompiler.cmake | 101 | ||||
-rw-r--r-- | Modules/CUDA/architectures.cmake | 46 | ||||
-rw-r--r-- | Modules/FindCUDAToolkit.cmake | 77 | ||||
-rw-r--r-- | Source/cmGeneratorTarget.cxx | 24 | ||||
-rw-r--r-- | Tests/CudaOnly/All/CMakeLists.txt | 80 |
7 files changed, 214 insertions, 124 deletions
diff --git a/Help/release/dev/cuda-new-arch-modes.rst b/Help/release/dev/cuda-new-arch-modes.rst index 549abc3..fcfd8f1 100644 --- a/Help/release/dev/cuda-new-arch-modes.rst +++ b/Help/release/dev/cuda-new-arch-modes.rst @@ -2,9 +2,7 @@ cuda-new-arch-modes ------------------- * The :prop_tgt:`CUDA_ARCHITECTURES` target property now supports the - `all`, and `all-major` values when the CUDA compiler id is ``NVIDIA``, - and version is 11.5+. + `all`, and `all-major` values for CUDA toolkit 7.0+. * The :variable:`CMAKE_CUDA_ARCHITECTURES` variable now supports the - `all`, and `all-major` values when the `CUDA` compiler id is ``NVIDIA``, - and version is 11.5+. + `all`, and `all-major` values for CUDA toolkit 7.0+. diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in index 2f3e9a8..9f2e213 100644 --- a/Modules/CMakeCUDACompiler.cmake.in +++ b/Modules/CMakeCUDACompiler.cmake.in @@ -50,8 +50,12 @@ endif() set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@") set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT@") +set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "@CMAKE_CUDA_COMPILER_TOOLKIT_VERSION@") set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@") +set(CMAKE_CUDA_ARCHITECTURES_ALL "@CMAKE_CUDA_ARCHITECTURES_ALL@") +set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR "@CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR@") + set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@") set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES@") diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake index 8fe07fe..c21d622 100644 --- a/Modules/CMakeDetermineCUDACompiler.cmake +++ b/Modules/CMakeDetermineCUDACompiler.cmake @@ -78,10 +78,11 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) message(FATAL_ERROR "Clang with CUDA is not yet supported on Windows. See CMake issue #20776.") endif() - # Find the CUDA toolkit. We store the CMAKE_CUDA_COMPILER_TOOLKIT_ROOT and CMAKE_CUDA_COMPILER_LIBRARY_ROOT - # in CMakeCUDACompiler.cmake, so FindCUDAToolkit can avoid searching on future runs and the toolkit stays the same. + # Find the CUDA toolkit. We store the CMAKE_CUDA_COMPILER_TOOLKIT_ROOT, CMAKE_CUDA_COMPILER_TOOLKIT_VERSION and + # CMAKE_CUDA_COMPILER_LIBRARY_ROOT in CMakeCUDACompiler.cmake so FindCUDAToolkit can avoid searching on future + # runs and the toolkit is the same. # This is very similar to FindCUDAToolkit, but somewhat simplified since we can issue fatal errors - # if we fail to find things we need and we don't need to account for searching the libraries. + # if we fail and we don't need to account for searching the libraries. # For NVCC we can easily deduce the SDK binary directory from the compiler path. if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") @@ -237,6 +238,21 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) endif() endif() + # For regular nvcc we the toolkit version is the same as the compiler version and we can parse it from the vendor test output. + # For Clang we need to invoke nvcc to get version output. + if(NOT CMAKE_GENERATOR MATCHES "Visual Studio") + if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang") + execute_process(COMMAND ${_CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE CMAKE_CUDA_COMPILER_ID_OUTPUT) + endif() + + if(CMAKE_CUDA_COMPILER_ID_OUTPUT MATCHES [=[V([0-9]+\.[0-9]+\.[0-9]+)]=]) + set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "${CMAKE_MATCH_1}") + endif() + + # Make the all and all-major architecture information available. + include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake) + endif() + set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v") if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") @@ -256,33 +272,41 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) endif() endif() - # Append user-specified architectures. - if(DEFINED CMAKE_CUDA_ARCHITECTURES) - if("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall") + # Detect explicit architectures and add them during detection. + if(DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all" AND NOT "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major") + set(architectures_explicit TRUE) + set(architectures_test ${CMAKE_CUDA_ARCHITECTURES}) + endif() + + # For sufficiently new NVCC we can just use the all and all-major flags. + # For VS we don't test since we can't figure out the version this early (see #23161). + # For others select based on version. + if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.5) + if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all") string(APPEND nvcc_test_flags " -arch=all") - set(architectures_mode all) - elseif("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall-major") + elseif("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major") string(APPEND nvcc_test_flags " -arch=all-major") - set(architectures_mode all-major) - else() - set(architectures_mode explicit) - foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) - # Strip specifiers as PTX vs binary doesn't matter. - string(REGEX MATCH "[0-9]+" arch_name "${arch}") - string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}") - string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}") - list(APPEND tested_architectures "${arch_name}") - endforeach() endif() - - # If the user has specified architectures we'll want to fail during compiler detection if they don't work. - set(CMAKE_CUDA_COMPILER_ID_REQUIRE_SUCCESS ON) + elseif(NOT CMAKE_GENERATOR MATCHES "Visual Studio") + if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all") + set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL}) + elseif("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major") + set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR}) + endif() endif() + foreach(arch ${architectures_test}) + # Strip specifiers as PTX vs binary doesn't matter. + string(REGEX MATCH "[0-9]+" arch_name "${arch}") + string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}") + string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}") + list(APPEND architectures_tested "${arch_name}") + endforeach() + # Rest of the code treats an empty value as equivalent to "use the defaults". # Error out early to prevent confusing errors as a result of this. # Note that this also catches invalid non-numerical values such as "a". - if(architectures_mode STREQUAL "explicit" AND "${tested_architectures}" STREQUAL "") + if(DEFINED architectures_explicit AND "${architectures_tested}" STREQUAL "") message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES must be valid if set.") endif() @@ -318,6 +342,10 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER}" DIRECTORY) get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY) set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}") + + # We now know the version, so make the architecture variables available. + set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION ${CMAKE_CUDA_COMPILER_VERSION}) + include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake) endif() _cmake_find_compiler_sysroot(CUDA) @@ -604,38 +632,27 @@ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "") message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") endif() endif() -elseif(architectures AND (architectures_mode STREQUAL "xall" OR - architectures_mode STREQUAL "xall-major")) - if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") - message(FATAL_ERROR - "The CMAKE_CUDA_ARCHITECTURES:\n" - " ${CMAKE_CUDA_ARCHITECTURES}\n" - "is not supported with the ${CMAKE_CUDA_COMPILER_ID} compiler. Try:\n" - " ${architectures}\n" - "instead.") - endif() - -elseif(architectures_mode STREQUAL "xexplicit") +else() # Sort since order mustn't matter. list(SORT architectures_detected) - list(SORT tested_architectures) + list(SORT architectures_tested) # We don't distinguish real/virtual architectures during testing. - # For "70-real;70-virtual" we detect "70" as working and tested_architectures is "70;70". + # For "70-real;70-virtual" we detect "70" as working and architectures_tested is "70;70". # Thus we need to remove duplicates before checking if they're equal. - list(REMOVE_DUPLICATES tested_architectures) + list(REMOVE_DUPLICATES architectures_tested) # Print the actual architectures for generic values (all and all-major). if(NOT DEFINED architectures_explicit) - set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${tested_architectures})") + set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${architectures_tested})") else() - set(architectures_error "${tested_architectures}") + set(architectures_error "${architectures_tested}") endif() - if(NOT "${architectures_detected}" STREQUAL "${tested_architectures}") + if(NOT "${architectures_detected}" STREQUAL "${architectures_tested}") message(FATAL_ERROR "The CMAKE_CUDA_ARCHITECTURES:\n" - " ${CMAKE_CUDA_ARCHITECTURES}\n" + " ${architectures_error}\n" "do not all work with this compiler. Try:\n" " ${architectures_detected}\n" "instead.") @@ -655,7 +672,7 @@ unset(_CUDA_LIBRARY_DIR) unset(_CUDA_TARGET_DIR) unset(_CUDA_TARGET_NAME) -unset(architectures_mode) +unset(architectures_explicit) set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX") set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX") diff --git a/Modules/CUDA/architectures.cmake b/Modules/CUDA/architectures.cmake new file mode 100644 index 0000000..fa3a5a1 --- /dev/null +++ b/Modules/CUDA/architectures.cmake @@ -0,0 +1,46 @@ +# See supported GPUs on Wikipedia +# https://en.wikipedia.org/wiki/CUDA#GPUs_supported + +# Initial set based on CUDA 7.0. +set(CMAKE_CUDA_ARCHITECTURES_ALL 20 21 30 35 37 50 52 53) +set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 20 30 35 50) + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 8.0) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 60 61 62) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 60) +endif() + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 9.0) + if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 6.0) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 70 72) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 70) + endif() + + list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL 20 21) + list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 20 21) +endif() + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 10.0 + AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 75) +endif() + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.0) + if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 80) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 80) + endif() + + list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL 30) + list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 30) +endif() + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.1 + AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 86) +endif() + +if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4 + AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")) + list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87) +endif() diff --git a/Modules/FindCUDAToolkit.cmake b/Modules/FindCUDAToolkit.cmake index 573f956..7ecc9d4 100644 --- a/Modules/FindCUDAToolkit.cmake +++ b/Modules/FindCUDAToolkit.cmake @@ -499,12 +499,17 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT) set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}") set(CUDAToolkit_BIN_DIR "${CUDAToolkit_ROOT_DIR}/bin") set(CUDAToolkit_NVCC_EXECUTABLE "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}") -else() + set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}") + if(CUDAToolkit_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + endif() +else() function(_CUDAToolkit_find_root_dir ) cmake_parse_arguments(arg "" "" "SEARCH_PATHS;FIND_FLAGS" ${ARGN}) - if(NOT CUDAToolkit_BIN_DIR) if(NOT CUDAToolkit_SENTINEL_FILE) find_program(CUDAToolkit_NVCC_EXECUTABLE @@ -687,6 +692,40 @@ else() get_filename_component(CUDAToolkit_LIBRARY_ROOT "${_CUDAToolkit_version_file}" DIRECTORY ABSOLUTE) endif() unset(_CUDAToolkit_version_file) + + if(CUDAToolkit_NVCC_EXECUTABLE AND + CMAKE_CUDA_COMPILER_VERSION AND + CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER) + # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value + # This if statement will always match, but is used to provide variables for MATCH 1,2,3... + if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}") + endif() + elseif(CUDAToolkit_NVCC_EXECUTABLE) + # Compute the version by invoking nvcc + execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) + if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + endif() + unset(NVCC_OUT) + else() + _CUDAToolkit_find_version_file(version_file) + if(version_file) + file(READ "${version_file}" VERSION_INFO) + if(VERSION_INFO MATCHES [=[CUDA Version ([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") + set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") + set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") + set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + endif() + endif() + endif() endif() # Find target directory when crosscompiling. @@ -754,40 +793,6 @@ if(NOT EXISTS "${CUDAToolkit_INCLUDE_DIR}/cublas_v2.h") endif() endif() -if(CUDAToolkit_NVCC_EXECUTABLE AND - CMAKE_CUDA_COMPILER_VERSION AND - CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER) - # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value - # This if statement will always match, but is used to provide variables for MATCH 1,2,3... - if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) - set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") - set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") - set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") - set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}") - endif() -elseif(CUDAToolkit_NVCC_EXECUTABLE) - # Compute the version by invoking nvcc - execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) - if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=]) - set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") - set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") - set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") - set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") - endif() - unset(NVCC_OUT) -else() - _CUDAToolkit_find_version_file(version_file) - if(version_file) - file(READ "${version_file}" VERSION_INFO) - if(VERSION_INFO MATCHES [=[CUDA Version ([0-9]+)\.([0-9]+)\.([0-9]+)]=]) - set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}") - set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}") - set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}") - set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") - endif() - endif() -endif() - # Find the CUDA Runtime Library libcudart find_library(CUDA_CUDART NAMES cudart diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx index c4f1a13..9f1029e 100644 --- a/Source/cmGeneratorTarget.cxx +++ b/Source/cmGeneratorTarget.cxx @@ -3415,7 +3415,7 @@ void cmGeneratorTarget::AddExplicitLanguageFlags(std::string& flags, void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const { - const std::string& property = this->GetSafeProperty("CUDA_ARCHITECTURES"); + std::string property = this->GetSafeProperty("CUDA_ARCHITECTURES"); if (property.empty()) { switch (this->GetPolicyStatusCMP0104()) { @@ -3447,16 +3447,24 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID"); // Check for special modes: `all`, `all-major`. - if (property == "all") { - if (compiler == "NVIDIA") { + if (compiler == "NVIDIA" && + cmSystemTools::VersionCompare( + cmSystemTools::OP_GREATER_EQUAL, + this->Makefile->GetDefinition("CMAKE_CUDA_COMPILER_VERSION"), + "11.5")) { + if (property == "all") { flags += " -arch=all"; - return; - } - } else if (property == "all-major") { - if (compiler == "NVIDIA") { + } else if (property == "all-major") { flags += " -arch=all-major"; - return; } + return; + } + + if (property == "all") { + property = *this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_ALL"); + } else if (property == "all-major") { + property = + *this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR"); } struct CudaArchitecture diff --git a/Tests/CudaOnly/All/CMakeLists.txt b/Tests/CudaOnly/All/CMakeLists.txt index fe29bb0..ba32e9a 100644 --- a/Tests/CudaOnly/All/CMakeLists.txt +++ b/Tests/CudaOnly/All/CMakeLists.txt @@ -2,43 +2,55 @@ cmake_minimum_required(VERSION 3.20) project(CudaOnlyAll CUDA) if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND - CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.5.0) - + CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0) set(compile_options -Wno-deprecated-gpu-targets) - function(verify_output flag output_var) - string(REGEX MATCHALL "-arch compute_([0-9]+)" target_archs "${${output_var}}") - list(LENGTH target_archs count) - if(count LESS 2) - message(FATAL_ERROR "${flag} failed to map to multiple architectures") - endif() - endfunction() endif() -if(COMMAND verify_output) - set(try_compile_flags -v ${compile_options}) - - set(CMAKE_CUDA_ARCHITECTURES all) - try_compile(all_archs_compiles - ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_archs_compiles - ${CMAKE_CURRENT_SOURCE_DIR}/main.cu - COMPILE_DEFINITIONS ${try_compile_flags} - OUTPUT_VARIABLE output - ) - verify_output(all output) - - set(CMAKE_CUDA_ARCHITECTURES all-major) - try_compile(all_major_archs_compiles - ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_major_archs_compiles - ${CMAKE_CURRENT_SOURCE_DIR}/main.cu - COMPILE_DEFINITIONS ${try_compile_flags} - OUTPUT_VARIABLE output - ) - verify_output(all-major output) - - if(all_archs_compiles AND all_major_archs_compiles) - add_executable(CudaOnlyAll main.cu) - target_compile_options(CudaOnlyAll PRIVATE ${compile_options}) +function(verify_output flag) + string(REPLACE "-" "_" architectures "${flag}") + string(TOUPPER "${architectures}" architectures) + set(architectures "${CMAKE_CUDA_ARCHITECTURES_${architectures}}") + + if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang") + set(match_regex "-target-cpu sm_([0-9]+)") + elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + set(match_regex "-arch compute_([0-9]+)") + endif() + + string(REGEX MATCHALL "${match_regex}" target_cpus "${output}") + + foreach(cpu ${target_cpus}) + string(REGEX MATCH "${match_regex}" dont_care "${cpu}") + list(APPEND command_archs "${CMAKE_MATCH_1}") + endforeach() + + list(SORT command_archs) + if(NOT "${command_archs}" STREQUAL "${architectures}") + message(FATAL_ERROR "Architectures used for \"${flag}\" don't match the reference (\"${command_archs}\" != \"${architectures}\").") endif() -else() +endfunction() + +set(try_compile_flags -v ${compile_options}) + +set(CMAKE_CUDA_ARCHITECTURES all) +try_compile(all_archs_compiles + ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_archs_compiles + ${CMAKE_CURRENT_SOURCE_DIR}/main.cu + COMPILE_DEFINITIONS ${try_compile_flags} + OUTPUT_VARIABLE output + ) +verify_output(all) + +set(CMAKE_CUDA_ARCHITECTURES all-major) +try_compile(all_major_archs_compiles + ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_major_archs_compiles + ${CMAKE_CURRENT_SOURCE_DIR}/main.cu + COMPILE_DEFINITIONS ${try_compile_flags} + OUTPUT_VARIABLE output + ) +verify_output(all-major) + +if(all_archs_compiles AND all_major_archs_compiles) add_executable(CudaOnlyAll main.cu) + target_compile_options(CudaOnlyAll PRIVATE ${compile_options}) endif() |