summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert Maynard <rmaynard@nvidia.com>2021-08-17 16:28:23 (GMT)
committerRobert Maynard <rmaynard@nvidia.com>2021-11-01 18:57:58 (GMT)
commit14d8a2768d8b8c2ba0f341b4bd59a875aaf6c2f4 (patch)
tree2074643d278024f89622b959a7795ee57438c222
parente1acb03cd9b53fe81dd9e1696ac293ae5ba468cc (diff)
downloadCMake-14d8a2768d8b8c2ba0f341b4bd59a875aaf6c2f4.zip
CMake-14d8a2768d8b8c2ba0f341b4bd59a875aaf6c2f4.tar.gz
CMake-14d8a2768d8b8c2ba0f341b4bd59a875aaf6c2f4.tar.bz2
CUDA: Support nvcc 11.5 new -arch=all|all-major flags
-rw-r--r--Help/prop_tgt/CUDA_ARCHITECTURES.rst12
-rw-r--r--Help/release/dev/cuda-new-arch-modes.rst10
-rw-r--r--Modules/CMakeDetermineCUDACompiler.cmake38
-rw-r--r--Source/cmGeneratorTarget.cxx19
-rw-r--r--Tests/CudaOnly/All/CMakeLists.txt44
-rw-r--r--Tests/CudaOnly/All/main.cu3
-rw-r--r--Tests/CudaOnly/CMakeLists.txt2
7 files changed, 117 insertions, 11 deletions
diff --git a/Help/prop_tgt/CUDA_ARCHITECTURES.rst b/Help/prop_tgt/CUDA_ARCHITECTURES.rst
index a3191e8..41e5ae4 100644
--- a/Help/prop_tgt/CUDA_ARCHITECTURES.rst
+++ b/Help/prop_tgt/CUDA_ARCHITECTURES.rst
@@ -20,6 +20,18 @@ variable if it is set when a target is created.
The ``CUDA_ARCHITECTURES`` target property must be set to a non-empty value on targets
that compile CUDA sources, or it is an error. See policy :policy:`CMP0104`.
+.. versionadded:: 3.23
+
+ The ``CUDA_ARCHITECTURES`` may be set to the following special keywords:
+
+ ``all``
+ Requires NVIDIA 11.5+. Will compile for all supported major and minor real
+ architectures, and the highest major virtual architecture.
+
+ ``all-major``
+ Requires NVIDIA 11.5+. Will compile for all supported major real
+ architectures, and the highest major virtual architecture.
+
Examples
^^^^^^^^
diff --git a/Help/release/dev/cuda-new-arch-modes.rst b/Help/release/dev/cuda-new-arch-modes.rst
new file mode 100644
index 0000000..549abc3
--- /dev/null
+++ b/Help/release/dev/cuda-new-arch-modes.rst
@@ -0,0 +1,10 @@
+cuda-new-arch-modes
+-------------------
+
+* The :prop_tgt:`CUDA_ARCHITECTURES` target property now supports the
+ `all`, and `all-major` values when the CUDA compiler id is ``NVIDIA``,
+ and version is 11.5+.
+
+* The :variable:`CMAKE_CUDA_ARCHITECTURES` variable now supports the
+ `all`, and `all-major` values when the `CUDA` compiler id is ``NVIDIA``,
+ and version is 11.5+.
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index d06315e..8479831 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -258,13 +258,22 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
# Append user-specified architectures.
if(CMAKE_CUDA_ARCHITECTURES)
- foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
- # Strip specifiers as PTX vs binary doesn't matter.
- string(REGEX MATCH "[0-9]+" arch_name "${arch}")
- string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}")
- string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}")
- list(APPEND tested_architectures "${arch_name}")
- endforeach()
+ if("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall")
+ string(APPEND nvcc_test_flags " -arch=all")
+ set(architectures_mode all)
+ elseif("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall-major")
+ string(APPEND nvcc_test_flags " -arch=all-major")
+ set(architectures_mode all-major)
+ else()
+ set(architectures_mode explicit)
+ foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
+ # Strip specifiers as PTX vs binary doesn't matter.
+ string(REGEX MATCH "[0-9]+" arch_name "${arch}")
+ string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}")
+ string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}")
+ list(APPEND tested_architectures "${arch_name}")
+ endforeach()
+ endif()
# If the user has specified architectures we'll want to fail during compiler detection if they don't work.
set(CMAKE_CUDA_COMPILER_ID_REQUIRE_SUCCESS ON)
@@ -597,7 +606,18 @@ if(DEFINED detected_architecture AND "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "")
if(NOT CMAKE_CUDA_ARCHITECTURES)
message(FATAL_ERROR "Failed to find a working CUDA architecture.")
endif()
-elseif(architectures)
+elseif(architectures AND (architectures_mode STREQUAL "xall" OR
+ architectures_mode STREQUAL "xall-major"))
+ if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+ message(FATAL_ERROR
+ "The CMAKE_CUDA_ARCHITECTURES:\n"
+ " ${CMAKE_CUDA_ARCHITECTURES}\n"
+ "is not supported with the ${CMAKE_CUDA_COMPILER_ID} compiler. Try:\n"
+ " ${architectures}\n"
+ "instead.")
+ endif()
+
+elseif(architectures AND architectures_mode STREQUAL "xexplicit")
# Sort since order mustn't matter.
list(SORT architectures)
list(SORT tested_architectures)
@@ -630,5 +650,7 @@ unset(_CUDA_LIBRARY_DIR)
unset(_CUDA_TARGET_DIR)
unset(_CUDA_TARGET_NAME)
+unset(architectures_mode)
+
set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX")
set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX")
diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx
index fc02a47..8cc524a 100644
--- a/Source/cmGeneratorTarget.cxx
+++ b/Source/cmGeneratorTarget.cxx
@@ -3317,6 +3317,22 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
return;
}
+ std::string const& compiler =
+ this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
+
+ // Check for special modes: `all`, `all-major`.
+ if (property == "all") {
+ if (compiler == "NVIDIA") {
+ flags += " -arch=all";
+ return;
+ }
+ } else if (property == "all-major") {
+ if (compiler == "NVIDIA") {
+ flags += " -arch=all-major";
+ return;
+ }
+ }
+
struct CudaArchitecture
{
std::string name;
@@ -3358,9 +3374,6 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
}
}
- std::string const& compiler =
- this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID");
-
if (compiler == "NVIDIA") {
for (CudaArchitecture& architecture : architectures) {
flags +=
diff --git a/Tests/CudaOnly/All/CMakeLists.txt b/Tests/CudaOnly/All/CMakeLists.txt
new file mode 100644
index 0000000..fe29bb0
--- /dev/null
+++ b/Tests/CudaOnly/All/CMakeLists.txt
@@ -0,0 +1,44 @@
+cmake_minimum_required(VERSION 3.20)
+project(CudaOnlyAll CUDA)
+
+if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND
+ CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.5.0)
+
+ set(compile_options -Wno-deprecated-gpu-targets)
+ function(verify_output flag output_var)
+ string(REGEX MATCHALL "-arch compute_([0-9]+)" target_archs "${${output_var}}")
+ list(LENGTH target_archs count)
+ if(count LESS 2)
+ message(FATAL_ERROR "${flag} failed to map to multiple architectures")
+ endif()
+ endfunction()
+endif()
+
+if(COMMAND verify_output)
+ set(try_compile_flags -v ${compile_options})
+
+ set(CMAKE_CUDA_ARCHITECTURES all)
+ try_compile(all_archs_compiles
+ ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_archs_compiles
+ ${CMAKE_CURRENT_SOURCE_DIR}/main.cu
+ COMPILE_DEFINITIONS ${try_compile_flags}
+ OUTPUT_VARIABLE output
+ )
+ verify_output(all output)
+
+ set(CMAKE_CUDA_ARCHITECTURES all-major)
+ try_compile(all_major_archs_compiles
+ ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_major_archs_compiles
+ ${CMAKE_CURRENT_SOURCE_DIR}/main.cu
+ COMPILE_DEFINITIONS ${try_compile_flags}
+ OUTPUT_VARIABLE output
+ )
+ verify_output(all-major output)
+
+ if(all_archs_compiles AND all_major_archs_compiles)
+ add_executable(CudaOnlyAll main.cu)
+ target_compile_options(CudaOnlyAll PRIVATE ${compile_options})
+ endif()
+else()
+ add_executable(CudaOnlyAll main.cu)
+endif()
diff --git a/Tests/CudaOnly/All/main.cu b/Tests/CudaOnly/All/main.cu
new file mode 100644
index 0000000..5047a34
--- /dev/null
+++ b/Tests/CudaOnly/All/main.cu
@@ -0,0 +1,3 @@
+int main()
+{
+}
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
index 65dfebb..cacfb76 100644
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -4,8 +4,10 @@ macro (add_cuda_test_macro name)
PROPERTY LABELS "CUDA")
endmacro ()
+add_cuda_test_macro(CudaOnly.All CudaOnlyAll)
add_cuda_test_macro(CudaOnly.Architecture Architecture)
add_cuda_test_macro(CudaOnly.CompileFlags CudaOnlyCompileFlags)
+
add_cuda_test_macro(CudaOnly.EnableStandard CudaOnlyEnableStandard)
add_cuda_test_macro(CudaOnly.ExportPTX CudaOnlyExportPTX)
add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit)