summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorBrad King <brad.king@kitware.com>2022-02-02 13:23:05 (GMT)
committerKitware Robot <kwrobot@kitware.com>2022-02-02 13:23:19 (GMT)
commit92459258108c0036808f29f83a8503fea18ccee9 (patch)
treec2f302d41c63a149b510c97c74a664f980aa0808 /Modules
parent0f316675cfb572058b2077e72a8ecbb6b5c7b534 (diff)
parent8f64df0a7c2c9126017847f2bb8d37bc54ea0338 (diff)
downloadCMake-92459258108c0036808f29f83a8503fea18ccee9.zip
CMake-92459258108c0036808f29f83a8503fea18ccee9.tar.gz
CMake-92459258108c0036808f29f83a8503fea18ccee9.tar.bz2
Merge topic 'cuda_generic_arch_all'
8f64df0a7c CUDA: Generic all and all-major support Acked-by: Kitware Robot <kwrobot@kitware.com> Acked-by: Alex <leha-bot@yandex.ru> Merge-request: !6816
Diffstat (limited to 'Modules')
-rw-r--r--Modules/CMakeCUDACompiler.cmake.in4
-rw-r--r--Modules/CMakeDetermineCUDACompiler.cmake101
-rw-r--r--Modules/CUDA/architectures.cmake46
-rw-r--r--Modules/FindCUDAToolkit.cmake77
4 files changed, 150 insertions, 78 deletions
diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in
index 2f3e9a8..9f2e213 100644
--- a/Modules/CMakeCUDACompiler.cmake.in
+++ b/Modules/CMakeCUDACompiler.cmake.in
@@ -50,8 +50,12 @@ endif()
set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@")
set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT@")
+set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "@CMAKE_CUDA_COMPILER_TOOLKIT_VERSION@")
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@")
+set(CMAKE_CUDA_ARCHITECTURES_ALL "@CMAKE_CUDA_ARCHITECTURES_ALL@")
+set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR "@CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR@")
+
set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")
set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES@")
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index 8fe07fe..c21d622 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -78,10 +78,11 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
message(FATAL_ERROR "Clang with CUDA is not yet supported on Windows. See CMake issue #20776.")
endif()
- # Find the CUDA toolkit. We store the CMAKE_CUDA_COMPILER_TOOLKIT_ROOT and CMAKE_CUDA_COMPILER_LIBRARY_ROOT
- # in CMakeCUDACompiler.cmake, so FindCUDAToolkit can avoid searching on future runs and the toolkit stays the same.
+ # Find the CUDA toolkit. We store the CMAKE_CUDA_COMPILER_TOOLKIT_ROOT, CMAKE_CUDA_COMPILER_TOOLKIT_VERSION and
+ # CMAKE_CUDA_COMPILER_LIBRARY_ROOT in CMakeCUDACompiler.cmake so FindCUDAToolkit can avoid searching on future
+ # runs and the toolkit is the same.
# This is very similar to FindCUDAToolkit, but somewhat simplified since we can issue fatal errors
- # if we fail to find things we need and we don't need to account for searching the libraries.
+ # if we fail and we don't need to account for searching the libraries.
# For NVCC we can easily deduce the SDK binary directory from the compiler path.
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
@@ -237,6 +238,21 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
endif()
endif()
+ # For regular nvcc we the toolkit version is the same as the compiler version and we can parse it from the vendor test output.
+ # For Clang we need to invoke nvcc to get version output.
+ if(NOT CMAKE_GENERATOR MATCHES "Visual Studio")
+ if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
+ execute_process(COMMAND ${_CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE CMAKE_CUDA_COMPILER_ID_OUTPUT)
+ endif()
+
+ if(CMAKE_CUDA_COMPILER_ID_OUTPUT MATCHES [=[V([0-9]+\.[0-9]+\.[0-9]+)]=])
+ set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "${CMAKE_MATCH_1}")
+ endif()
+
+ # Make the all and all-major architecture information available.
+ include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
+ endif()
+
set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
@@ -256,33 +272,41 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
endif()
endif()
- # Append user-specified architectures.
- if(DEFINED CMAKE_CUDA_ARCHITECTURES)
- if("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall")
+ # Detect explicit architectures and add them during detection.
+ if(DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all" AND NOT "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major")
+ set(architectures_explicit TRUE)
+ set(architectures_test ${CMAKE_CUDA_ARCHITECTURES})
+ endif()
+
+ # For sufficiently new NVCC we can just use the all and all-major flags.
+ # For VS we don't test since we can't figure out the version this early (see #23161).
+ # For others select based on version.
+ if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.5)
+ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all")
string(APPEND nvcc_test_flags " -arch=all")
- set(architectures_mode all)
- elseif("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall-major")
+ elseif("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major")
string(APPEND nvcc_test_flags " -arch=all-major")
- set(architectures_mode all-major)
- else()
- set(architectures_mode explicit)
- foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
- # Strip specifiers as PTX vs binary doesn't matter.
- string(REGEX MATCH "[0-9]+" arch_name "${arch}")
- string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}")
- string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}")
- list(APPEND tested_architectures "${arch_name}")
- endforeach()
endif()
-
- # If the user has specified architectures we'll want to fail during compiler detection if they don't work.
- set(CMAKE_CUDA_COMPILER_ID_REQUIRE_SUCCESS ON)
+ elseif(NOT CMAKE_GENERATOR MATCHES "Visual Studio")
+ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all")
+ set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL})
+ elseif("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major")
+ set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR})
+ endif()
endif()
+ foreach(arch ${architectures_test})
+ # Strip specifiers as PTX vs binary doesn't matter.
+ string(REGEX MATCH "[0-9]+" arch_name "${arch}")
+ string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}")
+ string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}")
+ list(APPEND architectures_tested "${arch_name}")
+ endforeach()
+
# Rest of the code treats an empty value as equivalent to "use the defaults".
# Error out early to prevent confusing errors as a result of this.
# Note that this also catches invalid non-numerical values such as "a".
- if(architectures_mode STREQUAL "explicit" AND "${tested_architectures}" STREQUAL "")
+ if(DEFINED architectures_explicit AND "${architectures_tested}" STREQUAL "")
message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES must be valid if set.")
endif()
@@ -318,6 +342,10 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER}" DIRECTORY)
get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY)
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
+
+ # We now know the version, so make the architecture variables available.
+ set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION ${CMAKE_CUDA_COMPILER_VERSION})
+ include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
endif()
_cmake_find_compiler_sysroot(CUDA)
@@ -604,38 +632,27 @@ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
endif()
endif()
-elseif(architectures AND (architectures_mode STREQUAL "xall" OR
- architectures_mode STREQUAL "xall-major"))
- if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
- message(FATAL_ERROR
- "The CMAKE_CUDA_ARCHITECTURES:\n"
- " ${CMAKE_CUDA_ARCHITECTURES}\n"
- "is not supported with the ${CMAKE_CUDA_COMPILER_ID} compiler. Try:\n"
- " ${architectures}\n"
- "instead.")
- endif()
-
-elseif(architectures_mode STREQUAL "xexplicit")
+else()
# Sort since order mustn't matter.
list(SORT architectures_detected)
- list(SORT tested_architectures)
+ list(SORT architectures_tested)
# We don't distinguish real/virtual architectures during testing.
- # For "70-real;70-virtual" we detect "70" as working and tested_architectures is "70;70".
+ # For "70-real;70-virtual" we detect "70" as working and architectures_tested is "70;70".
# Thus we need to remove duplicates before checking if they're equal.
- list(REMOVE_DUPLICATES tested_architectures)
+ list(REMOVE_DUPLICATES architectures_tested)
# Print the actual architectures for generic values (all and all-major).
if(NOT DEFINED architectures_explicit)
- set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${tested_architectures})")
+ set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${architectures_tested})")
else()
- set(architectures_error "${tested_architectures}")
+ set(architectures_error "${architectures_tested}")
endif()
- if(NOT "${architectures_detected}" STREQUAL "${tested_architectures}")
+ if(NOT "${architectures_detected}" STREQUAL "${architectures_tested}")
message(FATAL_ERROR
"The CMAKE_CUDA_ARCHITECTURES:\n"
- " ${CMAKE_CUDA_ARCHITECTURES}\n"
+ " ${architectures_error}\n"
"do not all work with this compiler. Try:\n"
" ${architectures_detected}\n"
"instead.")
@@ -655,7 +672,7 @@ unset(_CUDA_LIBRARY_DIR)
unset(_CUDA_TARGET_DIR)
unset(_CUDA_TARGET_NAME)
-unset(architectures_mode)
+unset(architectures_explicit)
set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX")
diff --git a/Modules/CUDA/architectures.cmake b/Modules/CUDA/architectures.cmake
new file mode 100644
index 0000000..fa3a5a1
--- /dev/null
+++ b/Modules/CUDA/architectures.cmake
@@ -0,0 +1,46 @@
+# See supported GPUs on Wikipedia
+# https://en.wikipedia.org/wiki/CUDA#GPUs_supported
+
+# Initial set based on CUDA 7.0.
+set(CMAKE_CUDA_ARCHITECTURES_ALL 20 21 30 35 37 50 52 53)
+set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 20 30 35 50)
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 8.0)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 60 61 62)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 60)
+endif()
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 9.0)
+ if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 6.0)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 70 72)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 70)
+ endif()
+
+ list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL 20 21)
+ list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 20 21)
+endif()
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 10.0
+ AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0))
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 75)
+endif()
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.0)
+ if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 80)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 80)
+ endif()
+
+ list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL 30)
+ list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 30)
+endif()
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.1
+ AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0))
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 86)
+endif()
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4
+ AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang"))
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87)
+endif()
diff --git a/Modules/FindCUDAToolkit.cmake b/Modules/FindCUDAToolkit.cmake
index 573f956..7ecc9d4 100644
--- a/Modules/FindCUDAToolkit.cmake
+++ b/Modules/FindCUDAToolkit.cmake
@@ -499,12 +499,17 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT)
set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}")
set(CUDAToolkit_BIN_DIR "${CUDAToolkit_ROOT_DIR}/bin")
set(CUDAToolkit_NVCC_EXECUTABLE "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}")
-else()
+ set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}")
+ if(CUDAToolkit_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+ set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+ set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+ set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+ endif()
+else()
function(_CUDAToolkit_find_root_dir )
cmake_parse_arguments(arg "" "" "SEARCH_PATHS;FIND_FLAGS" ${ARGN})
-
if(NOT CUDAToolkit_BIN_DIR)
if(NOT CUDAToolkit_SENTINEL_FILE)
find_program(CUDAToolkit_NVCC_EXECUTABLE
@@ -687,6 +692,40 @@ else()
get_filename_component(CUDAToolkit_LIBRARY_ROOT "${_CUDAToolkit_version_file}" DIRECTORY ABSOLUTE)
endif()
unset(_CUDAToolkit_version_file)
+
+ if(CUDAToolkit_NVCC_EXECUTABLE AND
+ CMAKE_CUDA_COMPILER_VERSION AND
+ CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER)
+ # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value
+ # This if statement will always match, but is used to provide variables for MATCH 1,2,3...
+ if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+ set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+ set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+ set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+ set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")
+ endif()
+ elseif(CUDAToolkit_NVCC_EXECUTABLE)
+ # Compute the version by invoking nvcc
+ execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
+ if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+ set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+ set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+ set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+ set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
+ endif()
+ unset(NVCC_OUT)
+ else()
+ _CUDAToolkit_find_version_file(version_file)
+ if(version_file)
+ file(READ "${version_file}" VERSION_INFO)
+ if(VERSION_INFO MATCHES [=[CUDA Version ([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+ set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+ set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+ set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+ set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
+ endif()
+ endif()
+ endif()
endif()
# Find target directory when crosscompiling.
@@ -754,40 +793,6 @@ if(NOT EXISTS "${CUDAToolkit_INCLUDE_DIR}/cublas_v2.h")
endif()
endif()
-if(CUDAToolkit_NVCC_EXECUTABLE AND
- CMAKE_CUDA_COMPILER_VERSION AND
- CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER)
- # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value
- # This if statement will always match, but is used to provide variables for MATCH 1,2,3...
- if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=])
- set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
- set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
- set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
- set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")
- endif()
-elseif(CUDAToolkit_NVCC_EXECUTABLE)
- # Compute the version by invoking nvcc
- execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
- if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
- set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
- set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
- set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
- set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
- endif()
- unset(NVCC_OUT)
-else()
- _CUDAToolkit_find_version_file(version_file)
- if(version_file)
- file(READ "${version_file}" VERSION_INFO)
- if(VERSION_INFO MATCHES [=[CUDA Version ([0-9]+)\.([0-9]+)\.([0-9]+)]=])
- set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
- set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
- set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
- set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
- endif()
- endif()
-endif()
-
# Find the CUDA Runtime Library libcudart
find_library(CUDA_CUDART
NAMES cudart