summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRaul Tambre <raul@tambre.ee>2021-12-24 19:29:37 (GMT)
committerRaul Tambre <raul@tambre.ee>2022-01-27 20:11:13 (GMT)
commitdaf372c4d686000fd2c6f380efa7f5ddfd915ceb (patch)
tree246028ce0713a469d7bf6c5bd0832d022296e0f6
parent7a0d0983521cbd16030add2afbb0f7d9e75cce6f (diff)
downloadCMake-daf372c4d686000fd2c6f380efa7f5ddfd915ceb.zip
CMake-daf372c4d686000fd2c6f380efa7f5ddfd915ceb.tar.gz
CMake-daf372c4d686000fd2c6f380efa7f5ddfd915ceb.tar.bz2
CUDA: Fix issuing error if default architecture detection fails
We require CUDA_ARCHITECTURES to be set for targets (see CMP0104). If not set anything after compiler detection such as ABI detection will fail to generate. This means we need to error if CMAKE_CUDA_ARCHITECTURES is not set to a valid value as a result of compiler detection. Currently we fail to issue the error if compiler detection failed and the ID is unset. In such a case we won't define detected_architecture making the code responsible for the error unreachable. Simplify the detection of architectures used during compiler detection by always detecting all of them, which enables us to simply the check in the "default to compiler" path if CMAKE_CUDA_ARCHITECTURES is empty. As a result we need to move the error checking and CMAKE_CUDA_ARCHITECTURES=OFF handling fully into the default path thus simplifying the code and unifying the code paths for NVCC and CUDA. This also happens to fix: 1. CMAKE_CUDA_ARCHITECTURES=OFF on Clang. 2. A theoretical issue of a compiler defaulting to multiple architectures. I've additionally added printing of the compiler output along the error to better reveal possible underlying compiler/system configuration issues. Fixes #23010.
-rw-r--r--Help/release/dev/cuda-compiler-detection-robustness.rst11
-rw-r--r--Modules/CMakeDetermineCUDACompiler.cmake62
2 files changed, 41 insertions, 32 deletions
diff --git a/Help/release/dev/cuda-compiler-detection-robustness.rst b/Help/release/dev/cuda-compiler-detection-robustness.rst
new file mode 100644
index 0000000..cc49a8d
--- /dev/null
+++ b/Help/release/dev/cuda-compiler-detection-robustness.rst
@@ -0,0 +1,11 @@
+cuda-compiler-detection-robustness
+----------------------------------
+
+* CUDA compiler detection now issues an error in all cases when it's unable to
+ compute the default architecture(s) if required (see :policy:`CMP0104`).
+
+* CUDA compiler detection now correctly handles ``OFF`` for
+ :variable:`CMAKE_CUDA_ARCHITECTURES` on Clang.
+
+* CUDA compiler detection now supports the theoretical case of multiple default
+ architectures.
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index 73b1017..8fe07fe 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -353,18 +353,12 @@ if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
set(_SET_CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT
"set(CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT \"${CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT}\")")
elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
- if(NOT CMAKE_CUDA_ARCHITECTURES)
- # Find the architecture that we successfully compiled using and set it as the default.
- string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
- set(detected_architecture "${CMAKE_MATCH_1}")
- else()
- string(REGEX MATCHALL "-target-cpu sm_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+ string(REGEX MATCHALL "-target-cpu sm_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
- foreach(cpu ${target_cpus})
- string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${cpu}")
- list(APPEND architectures "${CMAKE_MATCH_1}")
- endforeach()
- endif()
+ foreach(cpu ${target_cpus})
+ string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${cpu}")
+ list(APPEND architectures_detected "${CMAKE_MATCH_1}")
+ endforeach()
# Find target directory when crosscompiling.
if(CMAKE_CROSSCOMPILING)
@@ -590,28 +584,25 @@ if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
"Failed to detect CUDA nvcc include information:\n${_nvcc_log}\n\n")
endif()
- # Parse default CUDA architecture.
- cmake_policy(GET CMP0104 _CUDA_CMP0104)
- if(NOT CMAKE_CUDA_ARCHITECTURES AND _CUDA_CMP0104 STREQUAL "NEW")
- string(REGEX MATCH "arch[ =]compute_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
- set(detected_architecture "${CMAKE_MATCH_1}")
- elseif(CMAKE_CUDA_ARCHITECTURES)
- string(REGEX MATCHALL "-arch compute_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
-
- foreach(cpu ${target_cpus})
- string(REGEX MATCH "-arch compute_([0-9]+)" dont_care "${cpu}")
- list(APPEND architectures "${CMAKE_MATCH_1}")
- endforeach()
- endif()
+ string(REGEX MATCHALL "-arch compute_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+
+ foreach(cpu ${target_cpus})
+ string(REGEX MATCH "-arch compute_([0-9]+)" dont_care "${cpu}")
+ list(APPEND architectures_detected "${CMAKE_MATCH_1}")
+ endforeach()
endif()
# If the user didn't set the architectures, then set them to a default.
# If the user did, then make sure those architectures worked.
-if(DEFINED detected_architecture AND "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
- set(CMAKE_CUDA_ARCHITECTURES "${detected_architecture}" CACHE STRING "CUDA architectures")
+if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
+ cmake_policy(GET CMP0104 _CUDA_CMP0104)
- if(NOT CMAKE_CUDA_ARCHITECTURES)
- message(FATAL_ERROR "Failed to find a working CUDA architecture.")
+ if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" OR _CUDA_CMP0104 STREQUAL "NEW")
+ set(CMAKE_CUDA_ARCHITECTURES "${architectures_detected}" CACHE STRING "CUDA architectures")
+
+ if(NOT CMAKE_CUDA_ARCHITECTURES)
+ message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+ endif()
endif()
elseif(architectures AND (architectures_mode STREQUAL "xall" OR
architectures_mode STREQUAL "xall-major"))
@@ -624,9 +615,9 @@ elseif(architectures AND (architectures_mode STREQUAL "xall" OR
"instead.")
endif()
-elseif(architectures AND architectures_mode STREQUAL "xexplicit")
+elseif(architectures_mode STREQUAL "xexplicit")
# Sort since order mustn't matter.
- list(SORT architectures)
+ list(SORT architectures_detected)
list(SORT tested_architectures)
# We don't distinguish real/virtual architectures during testing.
@@ -634,12 +625,19 @@ elseif(architectures AND architectures_mode STREQUAL "xexplicit")
# Thus we need to remove duplicates before checking if they're equal.
list(REMOVE_DUPLICATES tested_architectures)
- if(NOT "${architectures}" STREQUAL "${tested_architectures}")
+ # Print the actual architectures for generic values (all and all-major).
+ if(NOT DEFINED architectures_explicit)
+ set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${tested_architectures})")
+ else()
+ set(architectures_error "${tested_architectures}")
+ endif()
+
+ if(NOT "${architectures_detected}" STREQUAL "${tested_architectures}")
message(FATAL_ERROR
"The CMAKE_CUDA_ARCHITECTURES:\n"
" ${CMAKE_CUDA_ARCHITECTURES}\n"
"do not all work with this compiler. Try:\n"
- " ${architectures}\n"
+ " ${architectures_detected}\n"
"instead.")
endif()
endif()