From 5f667d783a917a2dc70fcdfaede6c3fbdc3c9549 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sun, 19 Dec 2021 12:39:41 +0200 Subject: CUDA: Actually use reverse architecture deprecation order for Clang The code now matches what the comment describes. This mistake seems to have been present since the initial introduction in commit 5df21adf (CUDA: Add support for Clang compiler, 2020-05-07). --- Modules/CMakeDetermineCUDACompiler.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake index df71799..49e4c75 100644 --- a/Modules/CMakeDetermineCUDACompiler.cmake +++ b/Modules/CMakeDetermineCUDACompiler.cmake @@ -283,7 +283,7 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) if(NOT CMAKE_CUDA_ARCHITECTURES) # Clang doesn't automatically select an architecture supported by the SDK. # Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups). - foreach(arch "20" "30" "52") + foreach(arch "52" "30" "20") list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch}") endforeach() endif() -- cgit v0.12 From d19273bc7b361a54041706c02857993e244d3b50 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Tue, 28 Dec 2021 14:29:52 +0200 Subject: CUDA: Support all and all-major on Visual Studio The Visual Studio integration's CodeGeneration option only knows how to generate a -gencode flag, which doesn't recognize all. Add a special case to pass these two as regular additional flags. --- Source/cmVisualStudioGeneratorOptions.cxx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Source/cmVisualStudioGeneratorOptions.cxx b/Source/cmVisualStudioGeneratorOptions.cxx index 5c17a07..9045a4d 100644 --- a/Source/cmVisualStudioGeneratorOptions.cxx +++ b/Source/cmVisualStudioGeneratorOptions.cxx @@ -182,6 +182,10 @@ void cmVisualStudioGeneratorOptions::FixCudaCodeGeneration() // First entries for the -arch= [-code=,...] pair. if (!arch.empty()) { std::string arch_name = arch[0]; + if (arch_name == "all" || arch_name == "all-major") { + AppendFlagString("AdditionalOptions", "-arch=" + arch_name); + return; + } std::vector codes; if (!code.empty()) { codes = cmTokenize(code[0], ","); -- cgit v0.12 From 7a0d0983521cbd16030add2afbb0f7d9e75cce6f Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Mon, 27 Dec 2021 20:40:29 +0200 Subject: CUDA: Error on empty/invalid CMAKE_CUDA_ARCHITECTURES set by user If empty we otherwise treat it the same as unset in most places, but still end up failing eventually with a confusing "Failed to find a working CUDA architecture". This also detects some other basic invalid ones (e.g. "al"). --- Help/release/dev/cuda-invalid-architectures.rst | 5 +++++ Modules/CMakeDetermineCUDACompiler.cmake | 9 ++++++++- Tests/RunCMake/CMakeLists.txt | 4 ++++ Tests/RunCMake/CUDA_architectures/CMakeLists.txt | 3 +++ Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake | 4 ++++ Tests/RunCMake/CUDA_architectures/architectures-empty-result.txt | 1 + Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt | 5 +++++ Tests/RunCMake/CUDA_architectures/architectures-empty.cmake | 2 ++ .../RunCMake/CUDA_architectures/architectures-invalid-result.txt | 1 + .../RunCMake/CUDA_architectures/architectures-invalid-stderr.txt | 5 +++++ Tests/RunCMake/CUDA_architectures/architectures-invalid.cmake | 2 ++ 11 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 Help/release/dev/cuda-invalid-architectures.rst create mode 100644 Tests/RunCMake/CUDA_architectures/CMakeLists.txt create mode 100644 Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake create mode 100644 Tests/RunCMake/CUDA_architectures/architectures-empty-result.txt create mode 100644 Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt create mode 100644 Tests/RunCMake/CUDA_architectures/architectures-empty.cmake create mode 100644 Tests/RunCMake/CUDA_architectures/architectures-invalid-result.txt create mode 100644 Tests/RunCMake/CUDA_architectures/architectures-invalid-stderr.txt create mode 100644 Tests/RunCMake/CUDA_architectures/architectures-invalid.cmake diff --git a/Help/release/dev/cuda-invalid-architectures.rst b/Help/release/dev/cuda-invalid-architectures.rst new file mode 100644 index 0000000..3313dbb --- /dev/null +++ b/Help/release/dev/cuda-invalid-architectures.rst @@ -0,0 +1,5 @@ +cuda-invalid-architectures +-------------------------- + +* CUDA compiler detection now tries to detect invalid architectures and issue + an error. diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake index 49e4c75..73b1017 100644 --- a/Modules/CMakeDetermineCUDACompiler.cmake +++ b/Modules/CMakeDetermineCUDACompiler.cmake @@ -257,7 +257,7 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) endif() # Append user-specified architectures. - if(CMAKE_CUDA_ARCHITECTURES) + if(DEFINED CMAKE_CUDA_ARCHITECTURES) if("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall") string(APPEND nvcc_test_flags " -arch=all") set(architectures_mode all) @@ -279,6 +279,13 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) set(CMAKE_CUDA_COMPILER_ID_REQUIRE_SUCCESS ON) endif() + # Rest of the code treats an empty value as equivalent to "use the defaults". + # Error out early to prevent confusing errors as a result of this. + # Note that this also catches invalid non-numerical values such as "a". + if(architectures_mode STREQUAL "explicit" AND "${tested_architectures}" STREQUAL "") + message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES must be valid if set.") + endif() + if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang") if(NOT CMAKE_CUDA_ARCHITECTURES) # Clang doesn't automatically select an architecture supported by the SDK. diff --git a/Tests/RunCMake/CMakeLists.txt b/Tests/RunCMake/CMakeLists.txt index 6bea788..6a1c694 100644 --- a/Tests/RunCMake/CMakeLists.txt +++ b/Tests/RunCMake/CMakeLists.txt @@ -534,6 +534,10 @@ add_RunCMake_test(no_install_prefix) add_RunCMake_test(configure_file) add_RunCMake_test(CTestTimeout -DTIMEOUT=${CTestTestTimeout_TIME}) add_RunCMake_test(CTestTimeoutAfterMatch) +if(CMake_TEST_CUDA) + add_RunCMake_test(CUDA_architectures) + set_property(TEST RunCMake.CUDA_architectures APPEND PROPERTY LABELS "CUDA") +endif() add_RunCMake_test(DependencyGraph -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER}) # ctresalloc links against CMakeLib and CTestLib, which means it can't be built diff --git a/Tests/RunCMake/CUDA_architectures/CMakeLists.txt b/Tests/RunCMake/CUDA_architectures/CMakeLists.txt new file mode 100644 index 0000000..d8200fc --- /dev/null +++ b/Tests/RunCMake/CUDA_architectures/CMakeLists.txt @@ -0,0 +1,3 @@ +cmake_minimum_required(VERSION 3.22) +project(${RunCMake_TEST} NONE) +include(${RunCMake_TEST}.cmake) diff --git a/Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake b/Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake new file mode 100644 index 0000000..cbbf57c --- /dev/null +++ b/Tests/RunCMake/CUDA_architectures/RunCMakeTest.cmake @@ -0,0 +1,4 @@ +include(RunCMake) + +run_cmake(architectures-empty) +run_cmake(architectures-invalid) diff --git a/Tests/RunCMake/CUDA_architectures/architectures-empty-result.txt b/Tests/RunCMake/CUDA_architectures/architectures-empty-result.txt new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/Tests/RunCMake/CUDA_architectures/architectures-empty-result.txt @@ -0,0 +1 @@ +1 diff --git a/Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt b/Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt new file mode 100644 index 0000000..39640fa --- /dev/null +++ b/Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt @@ -0,0 +1,5 @@ +^CMake Error at .*/Modules/CMakeDetermineCUDACompiler\.cmake:[0-9]+ \(message\): + CMAKE_CUDA_ARCHITECTURES must be valid if set\. +Call Stack \(most recent call first\): + architectures-empty\.cmake:2 \(enable_language\) + CMakeLists\.txt:3 \(include\) diff --git a/Tests/RunCMake/CUDA_architectures/architectures-empty.cmake b/Tests/RunCMake/CUDA_architectures/architectures-empty.cmake new file mode 100644 index 0000000..4915248 --- /dev/null +++ b/Tests/RunCMake/CUDA_architectures/architectures-empty.cmake @@ -0,0 +1,2 @@ +set(CMAKE_CUDA_ARCHITECTURES "") +enable_language(CUDA) diff --git a/Tests/RunCMake/CUDA_architectures/architectures-invalid-result.txt b/Tests/RunCMake/CUDA_architectures/architectures-invalid-result.txt new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/Tests/RunCMake/CUDA_architectures/architectures-invalid-result.txt @@ -0,0 +1 @@ +1 diff --git a/Tests/RunCMake/CUDA_architectures/architectures-invalid-stderr.txt b/Tests/RunCMake/CUDA_architectures/architectures-invalid-stderr.txt new file mode 100644 index 0000000..7608730 --- /dev/null +++ b/Tests/RunCMake/CUDA_architectures/architectures-invalid-stderr.txt @@ -0,0 +1,5 @@ +^CMake Error at .*/Modules/CMakeDetermineCUDACompiler\.cmake:[0-9]+ \(message\): + CMAKE_CUDA_ARCHITECTURES must be valid if set\. +Call Stack \(most recent call first\): + architectures-invalid\.cmake:2 \(enable_language\) + CMakeLists\.txt:3 \(include\)$ diff --git a/Tests/RunCMake/CUDA_architectures/architectures-invalid.cmake b/Tests/RunCMake/CUDA_architectures/architectures-invalid.cmake new file mode 100644 index 0000000..e5c8628 --- /dev/null +++ b/Tests/RunCMake/CUDA_architectures/architectures-invalid.cmake @@ -0,0 +1,2 @@ +set(CMAKE_CUDA_ARCHITECTURES "invalid") +enable_language(CUDA) -- cgit v0.12 From daf372c4d686000fd2c6f380efa7f5ddfd915ceb Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Fri, 24 Dec 2021 21:29:37 +0200 Subject: CUDA: Fix issuing error if default architecture detection fails We require CUDA_ARCHITECTURES to be set for targets (see CMP0104). If not set anything after compiler detection such as ABI detection will fail to generate. This means we need to error if CMAKE_CUDA_ARCHITECTURES is not set to a valid value as a result of compiler detection. Currently we fail to issue the error if compiler detection failed and the ID is unset. In such a case we won't define detected_architecture making the code responsible for the error unreachable. Simplify the detection of architectures used during compiler detection by always detecting all of them, which enables us to simply the check in the "default to compiler" path if CMAKE_CUDA_ARCHITECTURES is empty. As a result we need to move the error checking and CMAKE_CUDA_ARCHITECTURES=OFF handling fully into the default path thus simplifying the code and unifying the code paths for NVCC and CUDA. This also happens to fix: 1. CMAKE_CUDA_ARCHITECTURES=OFF on Clang. 2. A theoretical issue of a compiler defaulting to multiple architectures. I've additionally added printing of the compiler output along the error to better reveal possible underlying compiler/system configuration issues. Fixes #23010. --- .../dev/cuda-compiler-detection-robustness.rst | 11 ++++ Modules/CMakeDetermineCUDACompiler.cmake | 62 +++++++++++----------- 2 files changed, 41 insertions(+), 32 deletions(-) create mode 100644 Help/release/dev/cuda-compiler-detection-robustness.rst diff --git a/Help/release/dev/cuda-compiler-detection-robustness.rst b/Help/release/dev/cuda-compiler-detection-robustness.rst new file mode 100644 index 0000000..cc49a8d --- /dev/null +++ b/Help/release/dev/cuda-compiler-detection-robustness.rst @@ -0,0 +1,11 @@ +cuda-compiler-detection-robustness +---------------------------------- + +* CUDA compiler detection now issues an error in all cases when it's unable to + compute the default architecture(s) if required (see :policy:`CMP0104`). + +* CUDA compiler detection now correctly handles ``OFF`` for + :variable:`CMAKE_CUDA_ARCHITECTURES` on Clang. + +* CUDA compiler detection now supports the theoretical case of multiple default + architectures. diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake index 73b1017..8fe07fe 100644 --- a/Modules/CMakeDetermineCUDACompiler.cmake +++ b/Modules/CMakeDetermineCUDACompiler.cmake @@ -353,18 +353,12 @@ if(${CMAKE_GENERATOR} MATCHES "Visual Studio") set(_SET_CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT "set(CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT \"${CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT}\")") elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang") - if(NOT CMAKE_CUDA_ARCHITECTURES) - # Find the architecture that we successfully compiled using and set it as the default. - string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") - set(detected_architecture "${CMAKE_MATCH_1}") - else() - string(REGEX MATCHALL "-target-cpu sm_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") + string(REGEX MATCHALL "-target-cpu sm_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") - foreach(cpu ${target_cpus}) - string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${cpu}") - list(APPEND architectures "${CMAKE_MATCH_1}") - endforeach() - endif() + foreach(cpu ${target_cpus}) + string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${cpu}") + list(APPEND architectures_detected "${CMAKE_MATCH_1}") + endforeach() # Find target directory when crosscompiling. if(CMAKE_CROSSCOMPILING) @@ -590,28 +584,25 @@ if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") "Failed to detect CUDA nvcc include information:\n${_nvcc_log}\n\n") endif() - # Parse default CUDA architecture. - cmake_policy(GET CMP0104 _CUDA_CMP0104) - if(NOT CMAKE_CUDA_ARCHITECTURES AND _CUDA_CMP0104 STREQUAL "NEW") - string(REGEX MATCH "arch[ =]compute_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") - set(detected_architecture "${CMAKE_MATCH_1}") - elseif(CMAKE_CUDA_ARCHITECTURES) - string(REGEX MATCHALL "-arch compute_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") - - foreach(cpu ${target_cpus}) - string(REGEX MATCH "-arch compute_([0-9]+)" dont_care "${cpu}") - list(APPEND architectures "${CMAKE_MATCH_1}") - endforeach() - endif() + string(REGEX MATCHALL "-arch compute_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") + + foreach(cpu ${target_cpus}) + string(REGEX MATCH "-arch compute_([0-9]+)" dont_care "${cpu}") + list(APPEND architectures_detected "${CMAKE_MATCH_1}") + endforeach() endif() # If the user didn't set the architectures, then set them to a default. # If the user did, then make sure those architectures worked. -if(DEFINED detected_architecture AND "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "") - set(CMAKE_CUDA_ARCHITECTURES "${detected_architecture}" CACHE STRING "CUDA architectures") +if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "") + cmake_policy(GET CMP0104 _CUDA_CMP0104) - if(NOT CMAKE_CUDA_ARCHITECTURES) - message(FATAL_ERROR "Failed to find a working CUDA architecture.") + if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" OR _CUDA_CMP0104 STREQUAL "NEW") + set(CMAKE_CUDA_ARCHITECTURES "${architectures_detected}" CACHE STRING "CUDA architectures") + + if(NOT CMAKE_CUDA_ARCHITECTURES) + message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") + endif() endif() elseif(architectures AND (architectures_mode STREQUAL "xall" OR architectures_mode STREQUAL "xall-major")) @@ -624,9 +615,9 @@ elseif(architectures AND (architectures_mode STREQUAL "xall" OR "instead.") endif() -elseif(architectures AND architectures_mode STREQUAL "xexplicit") +elseif(architectures_mode STREQUAL "xexplicit") # Sort since order mustn't matter. - list(SORT architectures) + list(SORT architectures_detected) list(SORT tested_architectures) # We don't distinguish real/virtual architectures during testing. @@ -634,12 +625,19 @@ elseif(architectures AND architectures_mode STREQUAL "xexplicit") # Thus we need to remove duplicates before checking if they're equal. list(REMOVE_DUPLICATES tested_architectures) - if(NOT "${architectures}" STREQUAL "${tested_architectures}") + # Print the actual architectures for generic values (all and all-major). + if(NOT DEFINED architectures_explicit) + set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${tested_architectures})") + else() + set(architectures_error "${tested_architectures}") + endif() + + if(NOT "${architectures_detected}" STREQUAL "${tested_architectures}") message(FATAL_ERROR "The CMAKE_CUDA_ARCHITECTURES:\n" " ${CMAKE_CUDA_ARCHITECTURES}\n" "do not all work with this compiler. Try:\n" - " ${architectures}\n" + " ${architectures_detected}\n" "instead.") endif() endif() -- cgit v0.12