summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaul Tambre <raul@tambre.ee>2021-12-19 10:49:58 (GMT)
committerRaul Tambre <raul@tambre.ee>2022-02-01 16:25:20 (GMT)
commit8f64df0a7c2c9126017847f2bb8d37bc54ea0338 (patch)
tree589ad2a37c64cbde54e8e28006ad4393a7a5cd21
parent5305d5aa1a6900c64a5833176b43a21acb13fb30 (diff)
downloadCMake-8f64df0a7c2c9126017847f2bb8d37bc54ea0338.zip
CMake-8f64df0a7c2c9126017847f2bb8d37bc54ea0338.tar.gz
CMake-8f64df0a7c2c9126017847f2bb8d37bc54ea0338.tar.bz2
CUDA: Generic all and all-major support
Commit 14d8a276 (CUDA: Support nvcc 11.5 new -arch=all|all-major flags, 2021-08-17) added all and all-major options to CUDA_ARCHITECTURES. These are fairly generic and likely to see real-world use by distributors. Thus it's desirable to support these also for Clang and older NVCC versions. The supported architectures are dependent on the toolkit version. We determine the toolkit version prior to compiler detection. For NVCC we get the version from the vendor identification output, but for Clang we need to invoke NVCC separately. The architecture information is mostly based on the Wikipedia list with the earliest supported version being CUDA 7.0. This could be documented and expanded in the future to allow projects to query CUDA toolkit version and architecture information. For Clang we additionally constrain based on its support. Additionally the architecture mismatch detection logic is fixed, improved and updated for generic support: * Commit 01428c55 (CUDA: Fail fast if CMAKE_CUDA_ARCHITECTURES doesn't work during detection, 2020-08-29) enabled CMAKE_CUDA_COMPILER_ID_REQUIRE_SUCCESS if CMAKE_CUDA_ARCHITECTURES is specified. This results in CMakeDetermineCompilerID.cmake printing the compiler error and our code for presenting the mismatch in a user-friendly way being useless. The custom logic seems preferable so go back to not enabling it. * Commit 14d8a276 (CUDA: Support nvcc 11.5 new -arch=all|all-major flags, 2021-08-17) tried to support CMP0054 but forgot to add x to the interpolated result. Thus the conditions would always evaluate to false. This is fixed as a byproduct of removing NVIDIA specific checks, improving the error message and replacing architectures_mode with a simpler architectures_explicit. Visual Studio support omits testing the flags during detection due to complexities in determining the toolkit version when using it. A long-term proper implementation would be #23161. Implements #22860.
-rw-r--r--Help/release/dev/cuda-new-arch-modes.rst6
-rw-r--r--Modules/CMakeCUDACompiler.cmake.in4
-rw-r--r--Modules/CMakeDetermineCUDACompiler.cmake101
-rw-r--r--Modules/CUDA/architectures.cmake46
-rw-r--r--Modules/FindCUDAToolkit.cmake77
-rw-r--r--Source/cmGeneratorTarget.cxx24
-rw-r--r--Tests/CudaOnly/All/CMakeLists.txt80
7 files changed, 214 insertions, 124 deletions
diff --git a/Help/release/dev/cuda-new-arch-modes.rst b/Help/release/dev/cuda-new-arch-modes.rst
index 549abc3..fcfd8f1 100644
--- a/Help/release/dev/cuda-new-arch-modes.rst
+++ b/Help/release/dev/cuda-new-arch-modes.rst
@@ -2,9 +2,7 @@ cuda-new-arch-modes
-------------------
* The :prop_tgt:`CUDA_ARCHITECTURES` target property now supports the
- `all`, and `all-major` values when the CUDA compiler id is ``NVIDIA``,
- and version is 11.5+.
+ `all`, and `all-major` values for CUDA toolkit 7.0+.
* The :variable:`CMAKE_CUDA_ARCHITECTURES` variable now supports the
- `all`, and `all-major` values when the `CUDA` compiler id is ``NVIDIA``,
- and version is 11.5+.
+ `all`, and `all-major` values for CUDA toolkit 7.0+.
diff --git a/Modules/CMakeCUDACompiler.cmake.in b/Modules/CMakeCUDACompiler.cmake.in
index 2f3e9a8..9f2e213 100644
--- a/Modules/CMakeCUDACompiler.cmake.in
+++ b/Modules/CMakeCUDACompiler.cmake.in
@@ -50,8 +50,12 @@ endif()
set(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_ROOT@")
set(CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_TOOLKIT_LIBRARY_ROOT@")
+set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "@CMAKE_CUDA_COMPILER_TOOLKIT_VERSION@")
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "@CMAKE_CUDA_COMPILER_LIBRARY_ROOT@")
+set(CMAKE_CUDA_ARCHITECTURES_ALL "@CMAKE_CUDA_ARCHITECTURES_ALL@")
+set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR "@CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR@")
+
set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "@CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES@")
set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "@CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES@")
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index 8fe07fe..c21d622 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -78,10 +78,11 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
message(FATAL_ERROR "Clang with CUDA is not yet supported on Windows. See CMake issue #20776.")
endif()
- # Find the CUDA toolkit. We store the CMAKE_CUDA_COMPILER_TOOLKIT_ROOT and CMAKE_CUDA_COMPILER_LIBRARY_ROOT
- # in CMakeCUDACompiler.cmake, so FindCUDAToolkit can avoid searching on future runs and the toolkit stays the same.
+ # Find the CUDA toolkit. We store the CMAKE_CUDA_COMPILER_TOOLKIT_ROOT, CMAKE_CUDA_COMPILER_TOOLKIT_VERSION and
+ # CMAKE_CUDA_COMPILER_LIBRARY_ROOT in CMakeCUDACompiler.cmake so FindCUDAToolkit can avoid searching on future
+ # runs and the toolkit is the same.
# This is very similar to FindCUDAToolkit, but somewhat simplified since we can issue fatal errors
- # if we fail to find things we need and we don't need to account for searching the libraries.
+ # if we fail and we don't need to account for searching the libraries.
# For NVCC we can easily deduce the SDK binary directory from the compiler path.
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
@@ -237,6 +238,21 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
endif()
endif()
+ # For regular nvcc we the toolkit version is the same as the compiler version and we can parse it from the vendor test output.
+ # For Clang we need to invoke nvcc to get version output.
+ if(NOT CMAKE_GENERATOR MATCHES "Visual Studio")
+ if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
+ execute_process(COMMAND ${_CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE CMAKE_CUDA_COMPILER_ID_OUTPUT)
+ endif()
+
+ if(CMAKE_CUDA_COMPILER_ID_OUTPUT MATCHES [=[V([0-9]+\.[0-9]+\.[0-9]+)]=])
+ set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "${CMAKE_MATCH_1}")
+ endif()
+
+ # Make the all and all-major architecture information available.
+ include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
+ endif()
+
set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
@@ -256,33 +272,41 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
endif()
endif()
- # Append user-specified architectures.
- if(DEFINED CMAKE_CUDA_ARCHITECTURES)
- if("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall")
+ # Detect explicit architectures and add them during detection.
+ if(DEFINED CMAKE_CUDA_ARCHITECTURES AND NOT "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all" AND NOT "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major")
+ set(architectures_explicit TRUE)
+ set(architectures_test ${CMAKE_CUDA_ARCHITECTURES})
+ endif()
+
+ # For sufficiently new NVCC we can just use the all and all-major flags.
+ # For VS we don't test since we can't figure out the version this early (see #23161).
+ # For others select based on version.
+ if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.5)
+ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all")
string(APPEND nvcc_test_flags " -arch=all")
- set(architectures_mode all)
- elseif("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall-major")
+ elseif("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major")
string(APPEND nvcc_test_flags " -arch=all-major")
- set(architectures_mode all-major)
- else()
- set(architectures_mode explicit)
- foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
- # Strip specifiers as PTX vs binary doesn't matter.
- string(REGEX MATCH "[0-9]+" arch_name "${arch}")
- string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}")
- string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}")
- list(APPEND tested_architectures "${arch_name}")
- endforeach()
endif()
-
- # If the user has specified architectures we'll want to fail during compiler detection if they don't work.
- set(CMAKE_CUDA_COMPILER_ID_REQUIRE_SUCCESS ON)
+ elseif(NOT CMAKE_GENERATOR MATCHES "Visual Studio")
+ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all")
+ set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL})
+ elseif("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "all-major")
+ set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR})
+ endif()
endif()
+ foreach(arch ${architectures_test})
+ # Strip specifiers as PTX vs binary doesn't matter.
+ string(REGEX MATCH "[0-9]+" arch_name "${arch}")
+ string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}")
+ string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}")
+ list(APPEND architectures_tested "${arch_name}")
+ endforeach()
+
# Rest of the code treats an empty value as equivalent to "use the defaults".
# Error out early to prevent confusing errors as a result of this.
# Note that this also catches invalid non-numerical values such as "a".
- if(architectures_mode STREQUAL "explicit" AND "${tested_architectures}" STREQUAL "")
+ if(DEFINED architectures_explicit AND "${architectures_tested}" STREQUAL "")
message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES must be valid if set.")
endif()
@@ -318,6 +342,10 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER}" DIRECTORY)
get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY)
set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
+
+ # We now know the version, so make the architecture variables available.
+ set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION ${CMAKE_CUDA_COMPILER_VERSION})
+ include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
endif()
_cmake_find_compiler_sysroot(CUDA)
@@ -604,38 +632,27 @@ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
endif()
endif()
-elseif(architectures AND (architectures_mode STREQUAL "xall" OR
- architectures_mode STREQUAL "xall-major"))
- if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
- message(FATAL_ERROR
- "The CMAKE_CUDA_ARCHITECTURES:\n"
- " ${CMAKE_CUDA_ARCHITECTURES}\n"
- "is not supported with the ${CMAKE_CUDA_COMPILER_ID} compiler. Try:\n"
- " ${architectures}\n"
- "instead.")
- endif()
-
-elseif(architectures_mode STREQUAL "xexplicit")
+else()
# Sort since order mustn't matter.
list(SORT architectures_detected)
- list(SORT tested_architectures)
+ list(SORT architectures_tested)
# We don't distinguish real/virtual architectures during testing.
- # For "70-real;70-virtual" we detect "70" as working and tested_architectures is "70;70".
+ # For "70-real;70-virtual" we detect "70" as working and architectures_tested is "70;70".
# Thus we need to remove duplicates before checking if they're equal.
- list(REMOVE_DUPLICATES tested_architectures)
+ list(REMOVE_DUPLICATES architectures_tested)
# Print the actual architectures for generic values (all and all-major).
if(NOT DEFINED architectures_explicit)
- set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${tested_architectures})")
+ set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${architectures_tested})")
else()
- set(architectures_error "${tested_architectures}")
+ set(architectures_error "${architectures_tested}")
endif()
- if(NOT "${architectures_detected}" STREQUAL "${tested_architectures}")
+ if(NOT "${architectures_detected}" STREQUAL "${architectures_tested}")
message(FATAL_ERROR
"The CMAKE_CUDA_ARCHITECTURES:\n"
- " ${CMAKE_CUDA_ARCHITECTURES}\n"
+ " ${architectures_error}\n"
"do not all work with this compiler. Try:\n"
" ${architectures_detected}\n"
"instead.")
@@ -655,7 +672,7 @@ unset(_CUDA_LIBRARY_DIR)
unset(_CUDA_TARGET_DIR)
unset(_CUDA_TARGET_NAME)
-unset(architectures_mode)
+unset(architectures_explicit)
set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX")
diff --git a/Modules/CUDA/architectures.cmake b/Modules/CUDA/architectures.cmake
new file mode 100644
index 0000000..fa3a5a1
--- /dev/null
+++ b/Modules/CUDA/architectures.cmake
@@ -0,0 +1,46 @@
+# See supported GPUs on Wikipedia
+# https://en.wikipedia.org/wiki/CUDA#GPUs_supported
+
+# Initial set based on CUDA 7.0.
+set(CMAKE_CUDA_ARCHITECTURES_ALL 20 21 30 35 37 50 52 53)
+set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 20 30 35 50)
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 8.0)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 60 61 62)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 60)
+endif()
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 9.0)
+ if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 6.0)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 70 72)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 70)
+ endif()
+
+ list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL 20 21)
+ list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 20 21)
+endif()
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 10.0
+ AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0))
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 75)
+endif()
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.0)
+ if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 80)
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 80)
+ endif()
+
+ list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL 30)
+ list(REMOVE_ITEM CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 30)
+endif()
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.1
+ AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang" OR CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0))
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 86)
+endif()
+
+if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4
+ AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang"))
+ list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87)
+endif()
diff --git a/Modules/FindCUDAToolkit.cmake b/Modules/FindCUDAToolkit.cmake
index 573f956..7ecc9d4 100644
--- a/Modules/FindCUDAToolkit.cmake
+++ b/Modules/FindCUDAToolkit.cmake
@@ -499,12 +499,17 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT)
set(CUDAToolkit_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_LIBRARY_ROOT}")
set(CUDAToolkit_BIN_DIR "${CUDAToolkit_ROOT_DIR}/bin")
set(CUDAToolkit_NVCC_EXECUTABLE "${CUDAToolkit_BIN_DIR}/nvcc${CMAKE_EXECUTABLE_SUFFIX}")
-else()
+ set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_TOOLKIT_VERSION}")
+ if(CUDAToolkit_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+ set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+ set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+ set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+ endif()
+else()
function(_CUDAToolkit_find_root_dir )
cmake_parse_arguments(arg "" "" "SEARCH_PATHS;FIND_FLAGS" ${ARGN})
-
if(NOT CUDAToolkit_BIN_DIR)
if(NOT CUDAToolkit_SENTINEL_FILE)
find_program(CUDAToolkit_NVCC_EXECUTABLE
@@ -687,6 +692,40 @@ else()
get_filename_component(CUDAToolkit_LIBRARY_ROOT "${_CUDAToolkit_version_file}" DIRECTORY ABSOLUTE)
endif()
unset(_CUDAToolkit_version_file)
+
+ if(CUDAToolkit_NVCC_EXECUTABLE AND
+ CMAKE_CUDA_COMPILER_VERSION AND
+ CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER)
+ # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value
+ # This if statement will always match, but is used to provide variables for MATCH 1,2,3...
+ if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+ set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+ set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+ set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+ set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")
+ endif()
+ elseif(CUDAToolkit_NVCC_EXECUTABLE)
+ # Compute the version by invoking nvcc
+ execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
+ if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+ set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+ set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+ set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+ set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
+ endif()
+ unset(NVCC_OUT)
+ else()
+ _CUDAToolkit_find_version_file(version_file)
+ if(version_file)
+ file(READ "${version_file}" VERSION_INFO)
+ if(VERSION_INFO MATCHES [=[CUDA Version ([0-9]+)\.([0-9]+)\.([0-9]+)]=])
+ set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
+ set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
+ set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
+ set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
+ endif()
+ endif()
+ endif()
endif()
# Find target directory when crosscompiling.
@@ -754,40 +793,6 @@ if(NOT EXISTS "${CUDAToolkit_INCLUDE_DIR}/cublas_v2.h")
endif()
endif()
-if(CUDAToolkit_NVCC_EXECUTABLE AND
- CMAKE_CUDA_COMPILER_VERSION AND
- CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER)
- # Need to set these based off the already computed CMAKE_CUDA_COMPILER_VERSION value
- # This if statement will always match, but is used to provide variables for MATCH 1,2,3...
- if(CMAKE_CUDA_COMPILER_VERSION MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=])
- set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
- set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
- set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
- set(CUDAToolkit_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")
- endif()
-elseif(CUDAToolkit_NVCC_EXECUTABLE)
- # Compute the version by invoking nvcc
- execute_process(COMMAND ${CUDAToolkit_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
- if(NVCC_OUT MATCHES [=[ V([0-9]+)\.([0-9]+)\.([0-9]+)]=])
- set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
- set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
- set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
- set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
- endif()
- unset(NVCC_OUT)
-else()
- _CUDAToolkit_find_version_file(version_file)
- if(version_file)
- file(READ "${version_file}" VERSION_INFO)
- if(VERSION_INFO MATCHES [=[CUDA Version ([0-9]+)\.([0-9]+)\.([0-9]+)]=])
- set(CUDAToolkit_VERSION_MAJOR "${CMAKE_MATCH_1}")
- set(CUDAToolkit_VERSION_MINOR "${CMAKE_MATCH_2}")
- set(CUDAToolkit_VERSION_PATCH "${CMAKE_MATCH_3}")
- set(CUDAToolkit_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
- endif()
- endif()
-endif()
-
# Find the CUDA Runtime Library libcudart
find_library(CUDA_CUDART
NAMES cudart
diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx
index c4f1a13..9f1029e 100644
--- a/Source/cmGeneratorTarget.cxx
+++ b/Source/cmGeneratorTarget.cxx
@@ -3415,7 +3415,7 @@ void cmGeneratorTarget::AddExplicitLanguageFlags(std::string& flags,
void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
{
- const std::string& property = this->GetSafeProperty("CUDA_ARCHITECTURES");
+ std::string property = this->GetSafeProperty("CUDA_ARCHITECTURES");
if (property.empty()) {
switch (this->GetPolicyStatusCMP0104()) {
@@ -3447,16 +3447,24 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
// Check for special modes: `all`, `all-major`.
- if (property == "all") {
- if (compiler == "NVIDIA") {
+ if (compiler == "NVIDIA" &&
+ cmSystemTools::VersionCompare(
+ cmSystemTools::OP_GREATER_EQUAL,
+ this->Makefile->GetDefinition("CMAKE_CUDA_COMPILER_VERSION"),
+ "11.5")) {
+ if (property == "all") {
flags += " -arch=all";
- return;
- }
- } else if (property == "all-major") {
- if (compiler == "NVIDIA") {
+ } else if (property == "all-major") {
flags += " -arch=all-major";
- return;
}
+ return;
+ }
+
+ if (property == "all") {
+ property = *this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_ALL");
+ } else if (property == "all-major") {
+ property =
+ *this->Makefile->GetDefinition("CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR");
}
struct CudaArchitecture
diff --git a/Tests/CudaOnly/All/CMakeLists.txt b/Tests/CudaOnly/All/CMakeLists.txt
index fe29bb0..ba32e9a 100644
--- a/Tests/CudaOnly/All/CMakeLists.txt
+++ b/Tests/CudaOnly/All/CMakeLists.txt
@@ -2,43 +2,55 @@ cmake_minimum_required(VERSION 3.20)
project(CudaOnlyAll CUDA)
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND
- CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.5.0)
-
+ CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0)
set(compile_options -Wno-deprecated-gpu-targets)
- function(verify_output flag output_var)
- string(REGEX MATCHALL "-arch compute_([0-9]+)" target_archs "${${output_var}}")
- list(LENGTH target_archs count)
- if(count LESS 2)
- message(FATAL_ERROR "${flag} failed to map to multiple architectures")
- endif()
- endfunction()
endif()
-if(COMMAND verify_output)
- set(try_compile_flags -v ${compile_options})
-
- set(CMAKE_CUDA_ARCHITECTURES all)
- try_compile(all_archs_compiles
- ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_archs_compiles
- ${CMAKE_CURRENT_SOURCE_DIR}/main.cu
- COMPILE_DEFINITIONS ${try_compile_flags}
- OUTPUT_VARIABLE output
- )
- verify_output(all output)
-
- set(CMAKE_CUDA_ARCHITECTURES all-major)
- try_compile(all_major_archs_compiles
- ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_major_archs_compiles
- ${CMAKE_CURRENT_SOURCE_DIR}/main.cu
- COMPILE_DEFINITIONS ${try_compile_flags}
- OUTPUT_VARIABLE output
- )
- verify_output(all-major output)
-
- if(all_archs_compiles AND all_major_archs_compiles)
- add_executable(CudaOnlyAll main.cu)
- target_compile_options(CudaOnlyAll PRIVATE ${compile_options})
+function(verify_output flag)
+ string(REPLACE "-" "_" architectures "${flag}")
+ string(TOUPPER "${architectures}" architectures)
+ set(architectures "${CMAKE_CUDA_ARCHITECTURES_${architectures}}")
+
+ if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
+ set(match_regex "-target-cpu sm_([0-9]+)")
+ elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+ set(match_regex "-arch compute_([0-9]+)")
+ endif()
+
+ string(REGEX MATCHALL "${match_regex}" target_cpus "${output}")
+
+ foreach(cpu ${target_cpus})
+ string(REGEX MATCH "${match_regex}" dont_care "${cpu}")
+ list(APPEND command_archs "${CMAKE_MATCH_1}")
+ endforeach()
+
+ list(SORT command_archs)
+ if(NOT "${command_archs}" STREQUAL "${architectures}")
+ message(FATAL_ERROR "Architectures used for \"${flag}\" don't match the reference (\"${command_archs}\" != \"${architectures}\").")
endif()
-else()
+endfunction()
+
+set(try_compile_flags -v ${compile_options})
+
+set(CMAKE_CUDA_ARCHITECTURES all)
+try_compile(all_archs_compiles
+ ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_archs_compiles
+ ${CMAKE_CURRENT_SOURCE_DIR}/main.cu
+ COMPILE_DEFINITIONS ${try_compile_flags}
+ OUTPUT_VARIABLE output
+ )
+verify_output(all)
+
+set(CMAKE_CUDA_ARCHITECTURES all-major)
+try_compile(all_major_archs_compiles
+ ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_major_archs_compiles
+ ${CMAKE_CURRENT_SOURCE_DIR}/main.cu
+ COMPILE_DEFINITIONS ${try_compile_flags}
+ OUTPUT_VARIABLE output
+ )
+verify_output(all-major)
+
+if(all_archs_compiles AND all_major_archs_compiles)
add_executable(CudaOnlyAll main.cu)
+ target_compile_options(CudaOnlyAll PRIVATE ${compile_options})
endif()