From c267ed205a2ef2a2396b4d00b593d617b2befdcc Mon Sep 17 00:00:00 2001
From: Brad King <brad.king@kitware.com>
Date: Mon, 25 Apr 2022 15:01:36 -0400
Subject: CUDA: Defer architecture testing to the compiler testing step

Verifying the architectures during compiler identification is redundant,
and requires a lot more up-front information than we should need.
It also causes unsupported architectures to break the compiler id and
version detection, so the resulting output from CMake does not report
the compiler version, which is useful information to know why the
specified architectures are not supported.

The "detecting compiler ABI info" and "check for working compiler" steps
already pass `CMAKE_CUDA_ARCHITECTURES` into their test projects.
Therefore we can just drop the earlier architecture testing.  Bad
architectures will be reported as a not-working compiler, and the
output will include the compiler's error message.

This reverts the approach from:

* commit 19cc5bc296 (CUDA: Throw error if user-specified architectures
                     don't work, 2020-05-26, v3.18.0-rc1~79^2)
* commit 650c1029a0 (CUDA: Detect non-working user-specified architectures
                     on NVCC, 2020-05-28, v3.18.0-rc1~51^2)
* commit 01428c5560 (CUDA: Fail fast if CMAKE_CUDA_ARCHITECTURES
                     doesn't work during detection,
                     2020-08-29, v3.19.0-rc1~241^2).

Their goal was in part to avoid waiting until the test for working
compiler to detect unsupported architectures.  However, experience has
shown that failing earlier is more trouble than it's worth.

Fixes: #23161
Issue: #20756
---
 Modules/CMakeDetermineCUDACompiler.cmake           | 113 ++++-----------------
 Modules/CMakeDetermineCompilerId.cmake             |   8 +-
 Modules/CUDA/architectures.cmake                   |  60 -----------
 .../architectures-empty-stderr.txt                 |   2 +-
 .../architectures-invalid-stderr.txt               |  11 +-
 5 files changed, 32 insertions(+), 162 deletions(-)

diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index 66020e8..23da8ee 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -248,10 +248,6 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
     if(CMAKE_CUDA_COMPILER_ID_OUTPUT MATCHES [=[V([0-9]+\.[0-9]+\.[0-9]+)]=])
       set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION "${CMAKE_MATCH_1}")
     endif()
-
-    # Make the all, all-major, and native architecture information available.
-    # FIXME(#23161): Defer architecture detection until compiler testing.
-    include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
   endif()
 
   set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
@@ -273,76 +269,34 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
     endif()
   endif()
 
-  # FIXME(#23161): Defer architecture testing until compiler testing.
-  if(DEFINED CMAKE_CUDA_ARCHITECTURES)
-    if(CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major)$")
-      # For sufficiently new NVCC we can just use the all and all-major flags.
-      # For VS we don't test since we can't figure out the version this early (see #23161).
-      # For others select based on version.
-      if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.5)
-        string(APPEND nvcc_test_flags " -arch=${CMAKE_CUDA_ARCHITECTURES}")
-        set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
-      elseif(CMAKE_GENERATOR MATCHES "Visual Studio")
-        set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
-      else()
-        if(CMAKE_CUDA_ARCHITECTURES STREQUAL "all")
-          set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL})
-        elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "all-major")
-          set(architectures_test ${CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR})
-        endif()
-      endif()
-    elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
-      # For sufficiently new NVCC we can just use the 'native' value directly.
-      # For VS we don't test since we can't find nvcc this early (see #23161).
-      if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.6)
-        string(APPEND nvcc_test_flags " -arch=${CMAKE_CUDA_ARCHITECTURES}")
-        set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
-      elseif(CMAKE_GENERATOR MATCHES "Visual Studio")
-        set(architectures_tested "${CMAKE_CUDA_ARCHITECTURES}")
-      else()
-        set(architectures_test ${_CUDA_ARCHITECTURES_NATIVE})
-      endif()
-    elseif(CMAKE_CUDA_ARCHITECTURES OR "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
-      # Explicit architectures.  Test them during detection.
-      set(architectures_explicit TRUE)
-      set(architectures_test ${CMAKE_CUDA_ARCHITECTURES})
-    endif()
-  endif()
-
-  foreach(arch ${architectures_test})
-    # Strip specifiers as PTX vs binary doesn't matter.
-    string(REGEX MATCH "[0-9]+" arch_name "${arch}")
-    string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}")
-    string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}")
-    list(APPEND architectures_tested "${arch_name}")
-  endforeach()
-
   # Rest of the code treats an empty value as equivalent to "use the defaults".
   # Error out early to prevent confusing errors as a result of this.
   # Note that this also catches invalid non-numerical values such as "a".
-  if(DEFINED architectures_explicit AND "${architectures_tested}" STREQUAL "")
-    message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES must be valid if set.")
+  if(DEFINED CMAKE_CUDA_ARCHITECTURES)
+    if(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
+      message(FATAL_ERROR "CMAKE_CUDA_ARCHITECTURES must be non-empty if set.")
+    elseif(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^([0-9]+[;0-9]*|all|all-major|native)$")
+      message(FATAL_ERROR
+        "CMAKE_CUDA_ARCHITECTURES:\n"
+        "  ${CMAKE_CUDA_ARCHITECTURES}\n"
+        "is not one of the following:\n"
+        "* a semicolon-separated list of integers\n"
+        "* a special value: all, all-major, native\n"
+        )
+    endif()
   endif()
 
   if(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
-    if(NOT CMAKE_CUDA_ARCHITECTURES)
-      # Clang doesn't automatically select an architecture supported by the SDK.
-      # Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups).
-      foreach(arch "52" "30" "20")
-        list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch}")
-      endforeach()
-    endif()
-
-    # If the user specified CMAKE_CUDA_ARCHITECTURES this will include all the architecture flags.
-    # Otherwise this won't include any architecture flags and we'll fallback to Clang's defaults.
-    list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags}")
+    # Clang doesn't automatically select an architecture supported by the SDK.
+    # Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups).
+    foreach(arch "52" "30" "20")
+      list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch}")
+    endforeach()
   elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
     list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${nvcc_test_flags}")
   endif()
 
   # We perform compiler identification for a second time to extract implicit linking info and host compiler for NVCC.
-  # We also use it to verify that CMAKE_CUDA_ARCHITECTURES and additionally on Clang that CUDA toolkit path works.
-  # The latter could be done during compiler testing in the future to avoid doing this for Clang.
   # We need to unset the compiler ID otherwise CMAKE_DETERMINE_COMPILER_ID() doesn't work.
   set(CMAKE_CUDA_COMPILER_ID)
   set(CMAKE_CUDA_PLATFORM_ID)
@@ -357,12 +311,12 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
     get_filename_component(CMAKE_CUDA_COMPILER_TOOLKIT_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}" DIRECTORY)
     set(CMAKE_CUDA_COMPILER_LIBRARY_ROOT "${CMAKE_CUDA_COMPILER_TOOLKIT_ROOT}")
 
-    # We now know the version, so make the architecture variables available.
+    # The compiler comes with the toolkit, so the versions are the same.
     set(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION ${CMAKE_CUDA_COMPILER_VERSION})
-    # FIXME(#23161): Defer architecture detection until compiler testing.
-    include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
   endif()
 
+  include(${CMAKE_ROOT}/Modules/CUDA/architectures.cmake)
+
   _cmake_find_compiler_sysroot(CUDA)
 endif()
 
@@ -647,31 +601,6 @@ if("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
       message(FATAL_ERROR "Failed to detect a default CUDA architecture.\n\nCompiler output:\n${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
     endif()
   endif()
-elseif(CMAKE_CUDA_ARCHITECTURES AND NOT "${architectures_tested}" MATCHES "^(all|all-major|native)$")
-  # Sort since order mustn't matter.
-  list(SORT architectures_detected)
-  list(SORT architectures_tested)
-
-  # We don't distinguish real/virtual architectures during testing.
-  # For "70-real;70-virtual" we detect "70" as working and architectures_tested is "70;70".
-  # Thus we need to remove duplicates before checking if they're equal.
-  list(REMOVE_DUPLICATES architectures_tested)
-
-  # Print the actual architectures for generic values (all and all-major).
-  if(NOT DEFINED architectures_explicit)
-    set(architectures_error "${CMAKE_CUDA_ARCHITECTURES} (${architectures_tested})")
-  else()
-    set(architectures_error "${architectures_tested}")
-  endif()
-
-  if(NOT "${architectures_detected}" STREQUAL "${architectures_tested}")
-    message(FATAL_ERROR
-      "The CMAKE_CUDA_ARCHITECTURES:\n"
-      "  ${architectures_error}\n"
-      "do not all work with this compiler.  Try:\n"
-      "  ${architectures_detected}\n"
-      "instead.")
-  endif()
 endif()
 
 # configure all variables set in this file
@@ -687,9 +616,7 @@ unset(_CUDA_LIBRARY_DIR)
 unset(_CUDA_TARGET_DIR)
 unset(_CUDA_TARGET_NAME)
 
-unset(architectures_explicit)
 unset(architectures_detected)
-unset(architectures_tested)
 
 set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
 set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX")
diff --git a/Modules/CMakeDetermineCompilerId.cmake b/Modules/CMakeDetermineCompilerId.cmake
index a90fa5d..0e8b2af 100644
--- a/Modules/CMakeDetermineCompilerId.cmake
+++ b/Modules/CMakeDetermineCompilerId.cmake
@@ -495,13 +495,7 @@ Id flags: ${testflags} ${CMAKE_${lang}_COMPILER_ID_FLAGS_ALWAYS}
       if(CMAKE_VS_PLATFORM_NAME STREQUAL x64)
         set(cuda_target "<TargetMachinePlatform>64</TargetMachinePlatform>")
       endif()
-      if(CMAKE_CUDA_ARCHITECTURES AND NOT CMAKE_CUDA_ARCHITECTURES MATCHES "^(all|all-major|native)$")
-        foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
-          string(REGEX MATCH "[0-9]+" arch_name "${arch}")
-          string(APPEND cuda_codegen "compute_${arch_name},sm_${arch_name};")
-        endforeach()
-      endif()
-      set(id_ItemDefinitionGroup_entry "<CudaCompile>${cuda_target}<AdditionalOptions>%(AdditionalOptions)-v</AdditionalOptions><CodeGeneration>${cuda_codegen}</CodeGeneration></CudaCompile>")
+      set(id_ItemDefinitionGroup_entry "<CudaCompile>${cuda_target}<AdditionalOptions>%(AdditionalOptions)-v</AdditionalOptions></CudaCompile>")
       set(id_PostBuildEvent_Command [[echo CMAKE_CUDA_COMPILER=$(CudaToolkitBinDir)\nvcc.exe]])
       if(CMAKE_VS_PLATFORM_TOOLSET_CUDA_CUSTOM_DIR)
         # check for legacy cuda custom toolkit folder structure
diff --git a/Modules/CUDA/architectures.cmake b/Modules/CUDA/architectures.cmake
index 9b1f2b5..fa3a5a1 100644
--- a/Modules/CUDA/architectures.cmake
+++ b/Modules/CUDA/architectures.cmake
@@ -44,63 +44,3 @@ if(CMAKE_CUDA_COMPILER_TOOLKIT_VERSION VERSION_GREATER_EQUAL 11.4
    AND (NOT CMAKE_CUDA_COMPILER_ID STREQUAL "Clang"))
   list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 87)
 endif()
-
-# FIXME(#23161): Detect architectures early since we test them during
-# compiler detection.  We already have code to detect them later during
-# compiler testing, so we should not need to do this here.
-if(NOT CMAKE_GENERATOR MATCHES "Visual Studio")
-  set(_CUDA_ARCHS_EXE "${CMAKE_PLATFORM_INFO_DIR}/CMakeDetermineCUDACompilerArchs.bin")
-  execute_process(
-    COMMAND "${_CUDA_NVCC_EXECUTABLE}" -o "${_CUDA_ARCHS_EXE}" --cudart=static "${CMAKE_ROOT}/Modules/CMakeCUDACompilerABI.cu"
-    RESULT_VARIABLE _CUDA_ARCHS_RESULT
-    OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
-    ERROR_VARIABLE  _CUDA_ARCHS_OUTPUT
-    )
-  if(_CUDA_ARCHS_RESULT EQUAL 0)
-    execute_process(
-      COMMAND "${_CUDA_ARCHS_EXE}"
-      RESULT_VARIABLE _CUDA_ARCHS_RESULT
-      OUTPUT_VARIABLE _CUDA_ARCHS_OUTPUT
-      ERROR_VARIABLE  _CUDA_ARCHS_OUTPUT
-      OUTPUT_STRIP_TRAILING_WHITESPACE
-      )
-  endif()
-  if(_CUDA_ARCHS_RESULT EQUAL 0)
-    if("$ENV{CMAKE_CUDA_ARCHITECTURES_NATIVE_CLAMP}")
-      # Undocumented hook used by CMake's CI.
-      # Clamp native architecture to version range supported by this CUDA.
-      list(GET CMAKE_CUDA_ARCHITECTURES_ALL 0  _CUDA_ARCH_MIN)
-      list(GET CMAKE_CUDA_ARCHITECTURES_ALL -1 _CUDA_ARCH_MAX)
-      set(_CUDA_ARCHITECTURES_NATIVE "")
-      foreach(_CUDA_ARCH IN LISTS _CUDA_ARCHS_OUTPUT)
-        if(_CUDA_ARCH LESS _CUDA_ARCH_MIN)
-          set(_CUDA_ARCH "${_CUDA_ARCH_MIN}")
-        endif()
-        if(_CUDA_ARCH GREATER _CUDA_ARCH_MAX)
-          set(_CUDA_ARCH "${_CUDA_ARCH_MAX}")
-        endif()
-        list(APPEND _CUDA_ARCHITECTURES_NATIVE ${_CUDA_ARCH})
-      endforeach()
-      unset(_CUDA_ARCH)
-      unset(_CUDA_ARCH_MIN)
-      unset(_CUDA_ARCH_MAX)
-    else()
-      set(_CUDA_ARCHITECTURES_NATIVE "${_CUDA_ARCHS_OUTPUT}")
-    endif()
-    list(REMOVE_DUPLICATES _CUDA_ARCHITECTURES_NATIVE)
-  else()
-    if (NOT _CUDA_ARCHS_RESULT MATCHES "[0-9]+")
-      set(_CUDA_ARCHS_STATUS " (${_CUDA_ARCHS_RESULT})")
-    else()
-      set(_CUDA_ARCHS_STATUS "")
-    endif()
-    string(REPLACE "\n" "\n  " _CUDA_ARCHS_OUTPUT "  ${_CUDA_ARCHS_OUTPUT}")
-    file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log
-      "Detecting the CUDA native architecture(s) failed with "
-      "the following output:\n${_CUDA_ARCHS_OUTPUT}\n\n")
-    set(_CUDA_ARCHS_OUTPUT "")
-  endif()
-  unset(_CUDA_ARCHS_EXE)
-  unset(_CUDA_ARCHS_RESULT)
-  unset(_CUDA_ARCHS_OUTPUT)
-endif()
diff --git a/Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt b/Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt
index 39640fa..6c42612 100644
--- a/Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt
+++ b/Tests/RunCMake/CUDA_architectures/architectures-empty-stderr.txt
@@ -1,5 +1,5 @@
 ^CMake Error at .*/Modules/CMakeDetermineCUDACompiler\.cmake:[0-9]+ \(message\):
-  CMAKE_CUDA_ARCHITECTURES must be valid if set\.
+  CMAKE_CUDA_ARCHITECTURES must be non-empty if set\.
 Call Stack \(most recent call first\):
   architectures-empty\.cmake:2 \(enable_language\)
   CMakeLists\.txt:3 \(include\)
diff --git a/Tests/RunCMake/CUDA_architectures/architectures-invalid-stderr.txt b/Tests/RunCMake/CUDA_architectures/architectures-invalid-stderr.txt
index 7608730..48f379c 100644
--- a/Tests/RunCMake/CUDA_architectures/architectures-invalid-stderr.txt
+++ b/Tests/RunCMake/CUDA_architectures/architectures-invalid-stderr.txt
@@ -1,5 +1,14 @@
 ^CMake Error at .*/Modules/CMakeDetermineCUDACompiler\.cmake:[0-9]+ \(message\):
-  CMAKE_CUDA_ARCHITECTURES must be valid if set\.
+  CMAKE_CUDA_ARCHITECTURES:
+
+    invalid
+
+  is not one of the following:
+
+  \* a semicolon-separated list of integers
+
+  \* a special value: all, all-major, native
+
 Call Stack \(most recent call first\):
   architectures-invalid\.cmake:2 \(enable_language\)
   CMakeLists\.txt:3 \(include\)$
-- 
cgit v0.12