diff options
author | Robert Maynard <rmaynard@nvidia.com> | 2021-08-17 16:28:23 (GMT) |
---|---|---|
committer | Robert Maynard <rmaynard@nvidia.com> | 2021-11-01 18:57:58 (GMT) |
commit | 14d8a2768d8b8c2ba0f341b4bd59a875aaf6c2f4 (patch) | |
tree | 2074643d278024f89622b959a7795ee57438c222 | |
parent | e1acb03cd9b53fe81dd9e1696ac293ae5ba468cc (diff) | |
download | CMake-14d8a2768d8b8c2ba0f341b4bd59a875aaf6c2f4.zip CMake-14d8a2768d8b8c2ba0f341b4bd59a875aaf6c2f4.tar.gz CMake-14d8a2768d8b8c2ba0f341b4bd59a875aaf6c2f4.tar.bz2 |
CUDA: Support nvcc 11.5 new -arch=all|all-major flags
-rw-r--r-- | Help/prop_tgt/CUDA_ARCHITECTURES.rst | 12 | ||||
-rw-r--r-- | Help/release/dev/cuda-new-arch-modes.rst | 10 | ||||
-rw-r--r-- | Modules/CMakeDetermineCUDACompiler.cmake | 38 | ||||
-rw-r--r-- | Source/cmGeneratorTarget.cxx | 19 | ||||
-rw-r--r-- | Tests/CudaOnly/All/CMakeLists.txt | 44 | ||||
-rw-r--r-- | Tests/CudaOnly/All/main.cu | 3 | ||||
-rw-r--r-- | Tests/CudaOnly/CMakeLists.txt | 2 |
7 files changed, 117 insertions, 11 deletions
diff --git a/Help/prop_tgt/CUDA_ARCHITECTURES.rst b/Help/prop_tgt/CUDA_ARCHITECTURES.rst index a3191e8..41e5ae4 100644 --- a/Help/prop_tgt/CUDA_ARCHITECTURES.rst +++ b/Help/prop_tgt/CUDA_ARCHITECTURES.rst @@ -20,6 +20,18 @@ variable if it is set when a target is created. The ``CUDA_ARCHITECTURES`` target property must be set to a non-empty value on targets that compile CUDA sources, or it is an error. See policy :policy:`CMP0104`. +.. versionadded:: 3.23 + + The ``CUDA_ARCHITECTURES`` may be set to the following special keywords: + + ``all`` + Requires NVIDIA 11.5+. Will compile for all supported major and minor real + architectures, and the highest major virtual architecture. + + ``all-major`` + Requires NVIDIA 11.5+. Will compile for all supported major real + architectures, and the highest major virtual architecture. + Examples ^^^^^^^^ diff --git a/Help/release/dev/cuda-new-arch-modes.rst b/Help/release/dev/cuda-new-arch-modes.rst new file mode 100644 index 0000000..549abc3 --- /dev/null +++ b/Help/release/dev/cuda-new-arch-modes.rst @@ -0,0 +1,10 @@ +cuda-new-arch-modes +------------------- + +* The :prop_tgt:`CUDA_ARCHITECTURES` target property now supports the + `all`, and `all-major` values when the CUDA compiler id is ``NVIDIA``, + and version is 11.5+. + +* The :variable:`CMAKE_CUDA_ARCHITECTURES` variable now supports the + `all`, and `all-major` values when the `CUDA` compiler id is ``NVIDIA``, + and version is 11.5+. diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake index d06315e..8479831 100644 --- a/Modules/CMakeDetermineCUDACompiler.cmake +++ b/Modules/CMakeDetermineCUDACompiler.cmake @@ -258,13 +258,22 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) # Append user-specified architectures. if(CMAKE_CUDA_ARCHITECTURES) - foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) - # Strip specifiers as PTX vs binary doesn't matter. - string(REGEX MATCH "[0-9]+" arch_name "${arch}") - string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}") - string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}") - list(APPEND tested_architectures "${arch_name}") - endforeach() + if("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall") + string(APPEND nvcc_test_flags " -arch=all") + set(architectures_mode all) + elseif("x${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "xall-major") + string(APPEND nvcc_test_flags " -arch=all-major") + set(architectures_mode all-major) + else() + set(architectures_mode explicit) + foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) + # Strip specifiers as PTX vs binary doesn't matter. + string(REGEX MATCH "[0-9]+" arch_name "${arch}") + string(APPEND clang_test_flags " --cuda-gpu-arch=sm_${arch_name}") + string(APPEND nvcc_test_flags " -gencode=arch=compute_${arch_name},code=sm_${arch_name}") + list(APPEND tested_architectures "${arch_name}") + endforeach() + endif() # If the user has specified architectures we'll want to fail during compiler detection if they don't work. set(CMAKE_CUDA_COMPILER_ID_REQUIRE_SUCCESS ON) @@ -597,7 +606,18 @@ if(DEFINED detected_architecture AND "${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "") if(NOT CMAKE_CUDA_ARCHITECTURES) message(FATAL_ERROR "Failed to find a working CUDA architecture.") endif() -elseif(architectures) +elseif(architectures AND (architectures_mode STREQUAL "xall" OR + architectures_mode STREQUAL "xall-major")) + if(NOT CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + message(FATAL_ERROR + "The CMAKE_CUDA_ARCHITECTURES:\n" + " ${CMAKE_CUDA_ARCHITECTURES}\n" + "is not supported with the ${CMAKE_CUDA_COMPILER_ID} compiler. Try:\n" + " ${architectures}\n" + "instead.") + endif() + +elseif(architectures AND architectures_mode STREQUAL "xexplicit") # Sort since order mustn't matter. list(SORT architectures) list(SORT tested_architectures) @@ -630,5 +650,7 @@ unset(_CUDA_LIBRARY_DIR) unset(_CUDA_TARGET_DIR) unset(_CUDA_TARGET_NAME) +unset(architectures_mode) + set(CMAKE_CUDA_COMPILER_ENV_VAR "CUDACXX") set(CMAKE_CUDA_HOST_COMPILER_ENV_VAR "CUDAHOSTCXX") diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx index fc02a47..8cc524a 100644 --- a/Source/cmGeneratorTarget.cxx +++ b/Source/cmGeneratorTarget.cxx @@ -3317,6 +3317,22 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const return; } + std::string const& compiler = + this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID"); + + // Check for special modes: `all`, `all-major`. + if (property == "all") { + if (compiler == "NVIDIA") { + flags += " -arch=all"; + return; + } + } else if (property == "all-major") { + if (compiler == "NVIDIA") { + flags += " -arch=all-major"; + return; + } + } + struct CudaArchitecture { std::string name; @@ -3358,9 +3374,6 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const } } - std::string const& compiler = - this->Makefile->GetSafeDefinition("CMAKE_CUDA_COMPILER_ID"); - if (compiler == "NVIDIA") { for (CudaArchitecture& architecture : architectures) { flags += diff --git a/Tests/CudaOnly/All/CMakeLists.txt b/Tests/CudaOnly/All/CMakeLists.txt new file mode 100644 index 0000000..fe29bb0 --- /dev/null +++ b/Tests/CudaOnly/All/CMakeLists.txt @@ -0,0 +1,44 @@ +cmake_minimum_required(VERSION 3.20) +project(CudaOnlyAll CUDA) + +if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND + CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.5.0) + + set(compile_options -Wno-deprecated-gpu-targets) + function(verify_output flag output_var) + string(REGEX MATCHALL "-arch compute_([0-9]+)" target_archs "${${output_var}}") + list(LENGTH target_archs count) + if(count LESS 2) + message(FATAL_ERROR "${flag} failed to map to multiple architectures") + endif() + endfunction() +endif() + +if(COMMAND verify_output) + set(try_compile_flags -v ${compile_options}) + + set(CMAKE_CUDA_ARCHITECTURES all) + try_compile(all_archs_compiles + ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_archs_compiles + ${CMAKE_CURRENT_SOURCE_DIR}/main.cu + COMPILE_DEFINITIONS ${try_compile_flags} + OUTPUT_VARIABLE output + ) + verify_output(all output) + + set(CMAKE_CUDA_ARCHITECTURES all-major) + try_compile(all_major_archs_compiles + ${CMAKE_CURRENT_BINARY_DIR}/try_compile/all_major_archs_compiles + ${CMAKE_CURRENT_SOURCE_DIR}/main.cu + COMPILE_DEFINITIONS ${try_compile_flags} + OUTPUT_VARIABLE output + ) + verify_output(all-major output) + + if(all_archs_compiles AND all_major_archs_compiles) + add_executable(CudaOnlyAll main.cu) + target_compile_options(CudaOnlyAll PRIVATE ${compile_options}) + endif() +else() + add_executable(CudaOnlyAll main.cu) +endif() diff --git a/Tests/CudaOnly/All/main.cu b/Tests/CudaOnly/All/main.cu new file mode 100644 index 0000000..5047a34 --- /dev/null +++ b/Tests/CudaOnly/All/main.cu @@ -0,0 +1,3 @@ +int main() +{ +} diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index 65dfebb..cacfb76 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -4,8 +4,10 @@ macro (add_cuda_test_macro name) PROPERTY LABELS "CUDA") endmacro () +add_cuda_test_macro(CudaOnly.All CudaOnlyAll) add_cuda_test_macro(CudaOnly.Architecture Architecture) add_cuda_test_macro(CudaOnly.CompileFlags CudaOnlyCompileFlags) + add_cuda_test_macro(CudaOnly.EnableStandard CudaOnlyEnableStandard) add_cuda_test_macro(CudaOnly.ExportPTX CudaOnlyExportPTX) add_cuda_test_macro(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit) |