diff options
24 files changed, 444 insertions, 299 deletions
diff --git a/Help/prop_tgt/CUDA_STANDARD.rst b/Help/prop_tgt/CUDA_STANDARD.rst index a3a2f56..6d6774e 100644 --- a/Help/prop_tgt/CUDA_STANDARD.rst +++ b/Help/prop_tgt/CUDA_STANDARD.rst @@ -7,7 +7,7 @@ This property specifies the CUDA/C++ standard whose features are requested to build this target. For some compilers, this results in adding a flag such as ``-std=gnu++11`` to the compile line. -Supported values are ``98``, ``11``, ``14``. +Supported values are ``98``, ``03``, ``11``, ``14``, ``17``, ``20``. If the value requested does not result in a compile flag being added for the compiler in use, a previous standard flag will be added instead. This diff --git a/Help/release/dev/cuda-clang.rst b/Help/release/dev/cuda-clang.rst new file mode 100644 index 0000000..fa5cd5a --- /dev/null +++ b/Help/release/dev/cuda-clang.rst @@ -0,0 +1,4 @@ +cuda-clang +---------- + +* The ``CUDA`` language now supports Clang as a compiler. diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake index e8b60b6..f9f7574 100644 --- a/Modules/CMakeCUDAInformation.cmake +++ b/Modules/CMakeCUDAInformation.cmake @@ -8,6 +8,19 @@ else() endif() set(CMAKE_INCLUDE_FLAG_CUDA "-I") +# Set implicit links early so compiler-specific modules can use them. +set(__IMPLICT_LINKS ) +foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES}) + string(APPEND __IMPLICT_LINKS " -L\"${dir}\"") +endforeach() +foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES}) + if(${lib} MATCHES "/") + string(APPEND __IMPLICT_LINKS " \"${lib}\"") + else() + string(APPEND __IMPLICT_LINKS " -l${lib}") + endif() +endforeach() + # Load compiler-specific information. if(CMAKE_CUDA_COMPILER_ID) include(Compiler/${CMAKE_CUDA_COMPILER_ID}-CUDA OPTIONAL) @@ -97,22 +110,10 @@ include(CMakeCommonLanguageInclude) # CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION # CMAKE_CUDA_LINK_EXECUTABLE -if(CMAKE_CUDA_HOST_COMPILER) +if(CMAKE_CUDA_HOST_COMPILER AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") string(APPEND _CMAKE_CUDA_EXTRA_FLAGS " -ccbin=<CMAKE_CUDA_HOST_COMPILER>") endif() -set(__IMPLICT_LINKS ) -foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES}) - string(APPEND __IMPLICT_LINKS " -L\"${dir}\"") -endforeach() -foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES}) - if(${lib} MATCHES "/") - string(APPEND __IMPLICT_LINKS " \"${lib}\"") - else() - string(APPEND __IMPLICT_LINKS " -l${lib}") - endif() -endforeach() - # create a shared library if(NOT CMAKE_CUDA_CREATE_SHARED_LIBRARY) set(CMAKE_CUDA_CREATE_SHARED_LIBRARY diff --git a/Modules/CMakeCompilerIdDetection.cmake b/Modules/CMakeCompilerIdDetection.cmake index bb573b7..acd15df 100644 --- a/Modules/CMakeCompilerIdDetection.cmake +++ b/Modules/CMakeCompilerIdDetection.cmake @@ -89,9 +89,8 @@ function(compiler_id_detection outvar lang) ) endif() - #Currently the only CUDA compilers are NVIDIA if(lang STREQUAL CUDA) - set(ordered_compilers NVIDIA) + set(ordered_compilers NVIDIA Clang) endif() if(CID_ID_DEFINE) diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake index ead4125..af36688 100644 --- a/Modules/CMakeDetermineCUDACompiler.cmake +++ b/Modules/CMakeDetermineCUDACompiler.cmake @@ -2,7 +2,7 @@ # file Copyright.txt or https://cmake.org/licensing for details. include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake) -include(${CMAKE_ROOT}/Modules//CMakeParseImplicitLinkInfo.cmake) +include(${CMAKE_ROOT}/Modules/CMakeParseImplicitLinkInfo.cmake) if( NOT ( ("${CMAKE_GENERATOR}" MATCHES "Make") OR ("${CMAKE_GENERATOR}" MATCHES "Ninja") OR @@ -57,16 +57,39 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN) file(READ ${CMAKE_ROOT}/Modules/CMakePlatformId.h.in CMAKE_CUDA_COMPILER_ID_PLATFORM_CONTENT) - list(APPEND CMAKE_CUDA_COMPILER_ID_MATCH_VENDORS NVIDIA) - set(CMAKE_CUDA_COMPILER_ID_MATCH_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \(R\) Cuda compiler driver") + list(APPEND CMAKE_CUDA_COMPILER_ID_VENDORS NVIDIA Clang) + set(CMAKE_CUDA_COMPILER_ID_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \\(R\\) Cuda compiler driver") + set(CMAKE_CUDA_COMPILER_ID_VENDOR_REGEX_Clang "(clang version)") set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_REGEX "\nLd[^\n]*(\n[ \t]+[^\n]*)*\n[ \t]+([^ \t\r\n]+)[^\r\n]*-o[^\r\n]*CompilerIdCUDA/(\\./)?(CompilerIdCUDA.xctest/)?CompilerIdCUDA[ \t\n\\\"]") set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_INDEX 2) + set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v") - set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS -v --keep --keep-dir tmp) + # nvcc + set(nvcc_test_flags "--keep --keep-dir tmp") if(CMAKE_CUDA_HOST_COMPILER) - list(APPEND CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-ccbin=${CMAKE_CUDA_HOST_COMPILER}") + string(APPEND nvcc_test_flags " -ccbin=${CMAKE_CUDA_HOST_COMPILER}") endif() + list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST ${nvcc_test_flags}) + + # Clang + if(CMAKE_CROSSCOMPILING) + # Need to pass the host target and include directories if we're crosscompiling. + set(clang_test_flags "--sysroot=\"${CMAKE_SYSROOT}\" --target=${CMAKE_CUDA_COMPILER_TARGET}") + else() + set(clang_test_flags) + endif() + + # Clang doesn't automatically select an architecture supported by the SDK. + # Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups). + foreach(arch ${CMAKE_CUDA_ARCHITECTURES} "20" "30" "52") + # Strip specifiers. + string(REGEX MATCH "[0-9]+" arch_name "${arch}") + list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch_name}") + endforeach() + + # Finally also try the default. + list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags}") include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerId.cmake) CMAKE_DETERMINE_COMPILER_ID(CUDA CUDAFLAGS CMakeCUDACompilerId.cu) @@ -89,6 +112,33 @@ if(${CMAKE_GENERATOR} MATCHES "Visual Studio") set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "") set(CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "") set(CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") +elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang") + # Parse default CUDA architecture. + if(NOT CMAKE_CUDA_ARCHITECTURES) + string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") + set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_MATCH_1}" CACHE STRING "CUDA architectures") + + if(NOT CMAKE_CUDA_ARCHITECTURES) + message(FATAL_ERROR "Failed to find default CUDA architecture.") + endif() + endif() + + # Parsing implicit host linker info is as simple as for regular Clang. + CMAKE_PARSE_IMPLICIT_LINK_INFO("${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}" + CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES + CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES + CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES + log + "${CMAKE_CUDA_IMPLICIT_OBJECT_REGEX}") + + # Get SDK directory. + string(REGEX MATCH "Found CUDA installation: (.+), version" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") + set(__cuda_directory "${CMAKE_MATCH_1}") + + # Clang doesn't add the SDK library directory to the implicit link path. Do it ourselves, so stuff works. + include(Internal/CUDAToolkit) + set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "${CUDAToolkit_INCLUDE_DIR}") + list(APPEND CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "${CUDAToolkit_LIBRARY_DIR}") elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") set(_nvcc_log "") string(REPLACE "\r" "" _nvcc_output_orig "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}") diff --git a/Modules/CMakeTestCUDACompiler.cmake b/Modules/CMakeTestCUDACompiler.cmake index 05811a8..b3b62bd 100644 --- a/Modules/CMakeTestCUDACompiler.cmake +++ b/Modules/CMakeTestCUDACompiler.cmake @@ -74,20 +74,27 @@ else() # - cudart_static # - cudadevrt # + # Additionally on Linux: + # - rt + # - pthread + # - dl + # # These are controlled by CMAKE_CUDA_RUNTIME_LIBRARY - list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt) - list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt) + list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt rt pthread dl) + list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt rt pthread dl) - # Remove the CUDA Toolkit include directories from the set of - # implicit system include directories. - # This resolves the issue that NVCC doesn't specify these - # includes as SYSTEM includes when compiling device code, and sometimes - # they contain headers that generate warnings, so let users mark them - # as SYSTEM explicitly - if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) - list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} - ) + if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + # Remove the CUDA Toolkit include directories from the set of + # implicit system include directories. + # This resolves the issue that NVCC doesn't specify these + # includes as SYSTEM includes when compiling device code, and sometimes + # they contain headers that generate warnings, so let users mark them + # as SYSTEM explicitly + if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES) + list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + ) + endif() endif() # Re-configure to save learned information. diff --git a/Modules/Compiler/Clang-CUDA.cmake b/Modules/Compiler/Clang-CUDA.cmake new file mode 100644 index 0000000..a970985 --- /dev/null +++ b/Modules/Compiler/Clang-CUDA.cmake @@ -0,0 +1,25 @@ +include(Compiler/Clang) +__compiler_clang(CUDA) + +# C++03 isn't supported for CXX, but is for CUDA, so we need to set these manually. +# Do this before __compiler_clang_cxx_standards() since that adds the feature. +set(CMAKE_CUDA03_STANDARD_COMPILE_OPTION "-std=c++03") +set(CMAKE_CUDA03_EXTENSION_COMPILE_OPTION "-std=gnu++03") +__compiler_clang_cxx_standards(CUDA) + +set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE) +set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda") +set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S") + +# RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default. +set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}") +set(CMAKE_CUDA_CREATE_SHARED_LIBRARY "<CMAKE_CUDA_COMPILER> <CMAKE_SHARED_LIBRARY_CUDA_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>${__IMPLICT_LINKS}") + +set(CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT "STATIC") +set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static") +set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart") +set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "") + +if(UNIX) + list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl") +endif() diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake index 4b09e6f..eb8d55c 100644 --- a/Modules/Compiler/NVIDIA-CUDA.cmake +++ b/Modules/Compiler/NVIDIA-CUDA.cmake @@ -64,6 +64,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static") set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart") set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "") +if(UNIX) + list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl") +endif() + if("x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC") set(CMAKE_CUDA03_STANDARD_COMPILE_OPTION "") set(CMAKE_CUDA03_EXTENSION_COMPILE_OPTION "") diff --git a/Modules/FindCUDAToolkit.cmake b/Modules/FindCUDAToolkit.cmake index 8d20ff9..b28892e 100644 --- a/Modules/FindCUDAToolkit.cmake +++ b/Modules/FindCUDAToolkit.cmake @@ -473,168 +473,8 @@ Result variables # ############################################################################### -# For NVCC we can easily deduce the SDK binary directory from the compiler path. -if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") - get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY) - set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "") - mark_as_advanced(CUDAToolkit_BIN_DIR) - unset(cuda_dir) -endif() - -# Try language- or user-provided path first. -if(CUDAToolkit_BIN_DIR) - find_program(CUDAToolkit_NVCC_EXECUTABLE - NAMES nvcc nvcc.exe - PATHS ${CUDAToolkit_BIN_DIR} - NO_DEFAULT_PATH - ) -endif() - -# Search using CUDAToolkit_ROOT -find_program(CUDAToolkit_NVCC_EXECUTABLE - NAMES nvcc nvcc.exe - PATHS ENV CUDA_PATH - PATH_SUFFIXES bin -) - -# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error. -if (NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT})) - # Declare error messages now, print later depending on find_package args. - set(fail_base "Could not find nvcc executable in path specified by") - set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}") - set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}") - - if (CUDAToolkit_FIND_REQUIRED) - if (DEFINED CUDAToolkit_ROOT) - message(FATAL_ERROR ${cuda_root_fail}) - elseif (DEFINED ENV{CUDAToolkit_ROOT}) - message(FATAL_ERROR ${env_cuda_root_fail}) - endif() - else() - if (NOT CUDAToolkit_FIND_QUIETLY) - if (DEFINED CUDAToolkit_ROOT) - message(STATUS ${cuda_root_fail}) - elseif (DEFINED ENV{CUDAToolkit_ROOT}) - message(STATUS ${env_cuda_root_fail}) - endif() - endif() - set(CUDAToolkit_FOUND FALSE) - unset(fail_base) - unset(cuda_root_fail) - unset(env_cuda_root_fail) - return() - endif() -endif() - -# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults. -# -# - Linux: /usr/local/cuda-X.Y -# - macOS: /Developer/NVIDIA/CUDA-X.Y -# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y -# -# We will also search the default symlink location /usr/local/cuda first since -# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked -# directory is the desired location. -if (NOT CUDAToolkit_NVCC_EXECUTABLE) - if (UNIX) - if (NOT APPLE) - set(platform_base "/usr/local/cuda-") - else() - set(platform_base "/Developer/NVIDIA/CUDA-") - endif() - else() - set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v") - endif() - - # Build out a descending list of possible cuda installations, e.g. - file(GLOB possible_paths "${platform_base}*") - # Iterate the glob results and create a descending list. - set(possible_versions) - foreach (p ${possible_paths}) - # Extract version number from end of string - string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p}) - if (IS_DIRECTORY ${p} AND p_version) - list(APPEND possible_versions ${p_version}) - endif() - endforeach() - - # Cannot use list(SORT) because that is alphabetical, we need numerical. - # NOTE: this is not an efficient sorting strategy. But even if a user had - # every possible version of CUDA installed, this wouldn't create any - # significant overhead. - set(versions) - foreach (v ${possible_versions}) - list(LENGTH versions num_versions) - # First version, nothing to compare with so just append. - if (num_versions EQUAL 0) - list(APPEND versions ${v}) - else() - # Loop through list. Insert at an index when comparison is - # VERSION_GREATER since we want a descending list. Duplicates will not - # happen since this came from a glob list of directories. - set(i 0) - set(early_terminate FALSE) - while (i LESS num_versions) - list(GET versions ${i} curr) - if (v VERSION_GREATER curr) - list(INSERT versions ${i} ${v}) - set(early_terminate TRUE) - break() - endif() - math(EXPR i "${i} + 1") - endwhile() - # If it did not get inserted, place it at the end. - if (NOT early_terminate) - list(APPEND versions ${v}) - endif() - endif() - endforeach() - - # With a descending list of versions, populate possible paths to search. - set(search_paths) - foreach (v ${versions}) - list(APPEND search_paths "${platform_base}${v}") - endforeach() - - # Force the global default /usr/local/cuda to the front on Unix. - if (UNIX) - list(INSERT search_paths 0 "/usr/local/cuda") - endif() - - # Now search for nvcc again using the platform default search paths. - find_program(CUDAToolkit_NVCC_EXECUTABLE - NAMES nvcc nvcc.exe - PATHS ${search_paths} - PATH_SUFFIXES bin - ) - - # We are done with these variables now, cleanup for caller. - unset(platform_base) - unset(possible_paths) - unset(possible_versions) - unset(versions) - unset(i) - unset(early_terminate) - unset(search_paths) - - if (NOT CUDAToolkit_NVCC_EXECUTABLE) - if (CUDAToolkit_FIND_REQUIRED) - message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.") - elseif(NOT CUDAToolkit_FIND_QUIETLY) - message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.") - endif() - - set(CUDAToolkit_FOUND FALSE) - return() - endif() -endif() - -if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE) - get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY) - set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE) - mark_as_advanced(CUDAToolkit_BIN_DIR) - unset(cuda_dir) -endif() +# Include shared CUDA toolkit location code. +include(Internal/CUDAToolkit) if(CUDAToolkit_NVCC_EXECUTABLE AND CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER) @@ -658,72 +498,22 @@ else() unset(NVCC_OUT) endif() - -get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE) - -# Handle cross compilation -if(CMAKE_CROSSCOMPILING) - if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a") - # Support for NVPACK - set (CUDAToolkit_TARGET_NAME "armv7-linux-androideabi") - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") - # Support for arm cross compilation - set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf") - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") - # Support for aarch64 cross compilation - if (ANDROID_ARCH_NAME STREQUAL "arm64") - set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi") - else() - set(CUDAToolkit_TARGET_NAME "aarch64-linux") - endif (ANDROID_ARCH_NAME STREQUAL "arm64") - elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - set(CUDAToolkit_TARGET_NAME "x86_64-linux") - endif() - - if (EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") - set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") - # add known CUDA target root path to the set of directories we search for programs, libraries and headers - list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}") - - # Mark that we need to pop the root search path changes after we have - # found all cuda libraries so that searches for our cross-compilation - # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or - # PATh - set(_CUDAToolkit_Pop_ROOT_PATH True) - endif() -else() - # Not cross compiling - set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}") - # Now that we have the real ROOT_DIR, find components inside it. - list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR}) - - # Mark that we need to pop the prefix path changes after we have - # found the cudart library. - set(_CUDAToolkit_Pop_Prefix True) +if(NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY) + message(STATUS "Unable to find cudart library.") endif() - -# Find the include/ directory -find_path(CUDAToolkit_INCLUDE_DIR - NAMES cuda_runtime.h -) - -# And find the CUDA Runtime Library libcudart +# Find the CUDA Runtime Library libcudart find_library(CUDA_CUDART NAMES cudart PATH_SUFFIXES lib64 lib/x64 ) -if (NOT CUDA_CUDART) +if(NOT CUDA_CUDART) find_library(CUDA_CUDART NAMES cudart PATH_SUFFIXES lib64/stubs lib/x64/stubs ) endif() -if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY) - message(STATUS "Unable to find cudart library.") -endif() - unset(CUDAToolkit_ROOT_DIR) if(_CUDAToolkit_Pop_Prefix) list(REMOVE_AT CMAKE_PREFIX_PATH -1) @@ -749,8 +539,8 @@ mark_as_advanced(CUDA_CUDART #----------------------------------------------------------------------------- # Construct result variables if(CUDAToolkit_FOUND) - set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR}) - get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE) + set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR}) + get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE) endif() #----------------------------------------------------------------------------- diff --git a/Modules/Internal/CUDAToolkit.cmake b/Modules/Internal/CUDAToolkit.cmake new file mode 100644 index 0000000..ab1e568 --- /dev/null +++ b/Modules/Internal/CUDAToolkit.cmake @@ -0,0 +1,225 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +# This file is for sharing code for finding basic CUDA toolkit information between +# CMakeDetermineCUDACompiler.cmake and FindCUDAToolkit.cmake. + +# For NVCC we can easily deduce the SDK binary directory from the compiler path. +if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY) + set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "") + mark_as_advanced(CUDAToolkit_BIN_DIR) + unset(cuda_dir) +endif() + +# Try language- or user-provided path first. +if(CUDAToolkit_BIN_DIR) + find_program(CUDAToolkit_NVCC_EXECUTABLE + NAMES nvcc nvcc.exe + PATHS ${CUDAToolkit_BIN_DIR} + NO_DEFAULT_PATH + ) +endif() + +# Search using CUDAToolkit_ROOT +find_program(CUDAToolkit_NVCC_EXECUTABLE + NAMES nvcc nvcc.exe + PATHS ENV CUDA_PATH + PATH_SUFFIXES bin +) + +# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error. +if(NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT})) + # Declare error messages now, print later depending on find_package args. + set(fail_base "Could not find nvcc executable in path specified by") + set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}") + set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}") + + if(CUDAToolkit_FIND_REQUIRED) + if(DEFINED CUDAToolkit_ROOT) + message(FATAL_ERROR ${cuda_root_fail}) + elseif(DEFINED ENV{CUDAToolkit_ROOT}) + message(FATAL_ERROR ${env_cuda_root_fail}) + endif() + else() + if(NOT CUDAToolkit_FIND_QUIETLY) + if(DEFINED CUDAToolkit_ROOT) + message(STATUS ${cuda_root_fail}) + elseif(DEFINED ENV{CUDAToolkit_ROOT}) + message(STATUS ${env_cuda_root_fail}) + endif() + endif() + set(CUDAToolkit_FOUND FALSE) + unset(fail_base) + unset(cuda_root_fail) + unset(env_cuda_root_fail) + return() + endif() +endif() + +# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults. +# +# - Linux: /usr/local/cuda-X.Y +# - macOS: /Developer/NVIDIA/CUDA-X.Y +# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y +# +# We will also search the default symlink location /usr/local/cuda first since +# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked +# directory is the desired location. +if(NOT CUDAToolkit_NVCC_EXECUTABLE) + if(UNIX) + if(NOT APPLE) + set(platform_base "/usr/local/cuda-") + else() + set(platform_base "/Developer/NVIDIA/CUDA-") + endif() + else() + set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v") + endif() + + # Build out a descending list of possible cuda installations, e.g. + file(GLOB possible_paths "${platform_base}*") + # Iterate the glob results and create a descending list. + set(possible_versions) + foreach (p ${possible_paths}) + # Extract version number from end of string + string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p}) + if(IS_DIRECTORY ${p} AND p_version) + list(APPEND possible_versions ${p_version}) + endif() + endforeach() + + # Cannot use list(SORT) because that is alphabetical, we need numerical. + # NOTE: this is not an efficient sorting strategy. But even if a user had + # every possible version of CUDA installed, this wouldn't create any + # significant overhead. + set(versions) + foreach (v ${possible_versions}) + list(LENGTH versions num_versions) + # First version, nothing to compare with so just append. + if(num_versions EQUAL 0) + list(APPEND versions ${v}) + else() + # Loop through list. Insert at an index when comparison is + # VERSION_GREATER since we want a descending list. Duplicates will not + # happen since this came from a glob list of directories. + set(i 0) + set(early_terminate FALSE) + while (i LESS num_versions) + list(GET versions ${i} curr) + if(v VERSION_GREATER curr) + list(INSERT versions ${i} ${v}) + set(early_terminate TRUE) + break() + endif() + math(EXPR i "${i} + 1") + endwhile() + # If it did not get inserted, place it at the end. + if(NOT early_terminate) + list(APPEND versions ${v}) + endif() + endif() + endforeach() + + # With a descending list of versions, populate possible paths to search. + set(search_paths) + foreach (v ${versions}) + list(APPEND search_paths "${platform_base}${v}") + endforeach() + + # Force the global default /usr/local/cuda to the front on Unix. + if(UNIX) + list(INSERT search_paths 0 "/usr/local/cuda") + endif() + + # Now search for nvcc again using the platform default search paths. + find_program(CUDAToolkit_NVCC_EXECUTABLE + NAMES nvcc nvcc.exe + PATHS ${search_paths} + PATH_SUFFIXES bin + ) + + # We are done with these variables now, cleanup for caller. + unset(platform_base) + unset(possible_paths) + unset(possible_versions) + unset(versions) + unset(i) + unset(early_terminate) + unset(search_paths) + + if(NOT CUDAToolkit_NVCC_EXECUTABLE) + if(CUDAToolkit_FIND_REQUIRED) + message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.") + elseif(NOT CUDAToolkit_FIND_QUIETLY) + message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.") + endif() + + set(CUDAToolkit_FOUND FALSE) + return() + endif() +endif() + +if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE) + get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY) + set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE) + mark_as_advanced(CUDAToolkit_BIN_DIR) + unset(cuda_dir) +endif() + +get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE) + +# Handle cross compilation +if(CMAKE_CROSSCOMPILING) + if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a") + # Support for NVPACK + set(CUDAToolkit_TARGET_NAME "armv7-linux-androideabi") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") + # Support for arm cross compilation + set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + # Support for aarch64 cross compilation + if(ANDROID_ARCH_NAME STREQUAL "arm64") + set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi") + else() + set(CUDAToolkit_TARGET_NAME "aarch64-linux") + endif(ANDROID_ARCH_NAME STREQUAL "arm64") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + set(CUDAToolkit_TARGET_NAME "x86_64-linux") + endif() + + if(EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}") + # add known CUDA target root path to the set of directories we search for programs, libraries and headers + list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}") + + # Mark that we need to pop the root search path changes after we have + # found all cuda libraries so that searches for our cross-compilation + # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or + # PATh + set(_CUDAToolkit_Pop_ROOT_PATH True) + endif() +else() + # Not cross compiling + set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}") + # Now that we have the real ROOT_DIR, find components inside it. + list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR}) + + # Mark that we need to pop the prefix path changes after we have + # found the cudart library. + set(_CUDAToolkit_Pop_Prefix True) +endif() + +# Find the include/ directory +find_path(CUDAToolkit_INCLUDE_DIR + NAMES cuda_runtime.h +) + +# Find a tentative CUDAToolkit_LIBRARY_DIR. FindCUDAToolkit overrides it by searching for the CUDA runtime, +# but we can't do that here, as CMakeDetermineCUDACompiler wants to use it before the variables necessary +# for find_library() have been initialized. +if(EXISTS "${CUDAToolkit_TARGET_DIR}/lib64") + set(CUDAToolkit_LIBRARY_DIR "${CUDAToolkit_TARGET_DIR}/lib64") +elseif(EXISTS "${CUDAToolkit_TARGET_DIR}/lib") + set(CUDAToolkit_LIBRARY_DIR "${CUDAToolkit_TARGET_DIR}/lib") +endif() diff --git a/Modules/Platform/Windows-NVIDIA-CUDA.cmake b/Modules/Platform/Windows-NVIDIA-CUDA.cmake index f425cf8..f887594 100644 --- a/Modules/Platform/Windows-NVIDIA-CUDA.cmake +++ b/Modules/Platform/Windows-NVIDIA-CUDA.cmake @@ -74,6 +74,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static") set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart") set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE "") +if(UNIX) + list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl") +endif() + string(APPEND CMAKE_CUDA_FLAGS_INIT " ${PLATFORM_DEFINES_CUDA} -D_WINDOWS -Xcompiler=\"${_W3}${_FLAGS_CXX}\"") string(APPEND CMAKE_CUDA_FLAGS_DEBUG_INIT " -Xcompiler=\"${_MDd}-Zi -Ob0 -Od ${_RTC1}\"") string(APPEND CMAKE_CUDA_FLAGS_RELEASE_INIT " -Xcompiler=\"${_MD}-O2 -Ob2\" -DNDEBUG") diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx index fb0512e..335f7a4 100644 --- a/Source/cmGeneratorTarget.cxx +++ b/Source/cmGeneratorTarget.cxx @@ -3180,6 +3180,20 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const flags += "]"; } + } else if (compiler == "Clang") { + for (CudaArchitecture& architecture : architectures) { + flags += " --cuda-gpu-arch=sm_" + architecture.name; + + if (!architecture.real) { + Makefile->IssueMessage( + MessageType::WARNING, + "Clang doesn't support disabling CUDA real code generation."); + } + + if (!architecture.virtual_) { + flags += " --no-cuda-include-ptx=sm_" + architecture.name; + } + } } } diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt index 58b9b03..83f2f2d 100644 --- a/Tests/Cuda/CMakeLists.txt +++ b/Tests/Cuda/CMakeLists.txt @@ -1,5 +1,4 @@ -ADD_TEST_MACRO(Cuda.Complex CudaComplex) ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures) ADD_TEST_MACRO(Cuda.CXXStandardSetTwice CXXStandardSetTwice) ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary) @@ -12,10 +11,16 @@ ADD_TEST_MACRO(Cuda.NotEnabled CudaNotEnabled) ADD_TEST_MACRO(Cuda.SeparableCompCXXOnly SeparableCompCXXOnly) ADD_TEST_MACRO(Cuda.Toolkit Toolkit) ADD_TEST_MACRO(Cuda.IncludePathNoToolkit IncludePathNoToolkit) -ADD_TEST_MACRO(Cuda.ProperDeviceLibraries ProperDeviceLibraries) -ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags) ADD_TEST_MACRO(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit) +# Separable compilation is currently only supported on NVCC. Disable tests +# using it for other compilers. +if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + ADD_TEST_MACRO(Cuda.Complex CudaComplex) + ADD_TEST_MACRO(Cuda.ProperDeviceLibraries ProperDeviceLibraries) + ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags) +endif() + # The CUDA only ships the shared version of the toolkit libraries # on windows if(NOT WIN32) diff --git a/Tests/Cuda/ProperLinkFlags/CMakeLists.txt b/Tests/Cuda/ProperLinkFlags/CMakeLists.txt index b6e0e39..d38da6d 100644 --- a/Tests/Cuda/ProperLinkFlags/CMakeLists.txt +++ b/Tests/Cuda/ProperLinkFlags/CMakeLists.txt @@ -9,11 +9,17 @@ project (ProperLinkFlags CUDA CXX) #Specify a set of valid CUDA flags and an invalid set of CXX flags ( for CUDA ) #to make sure we don't use the CXX flags when linking CUDA executables -string(APPEND CMAKE_CUDA_FLAGS " -arch=sm_35 --use_fast_math") +if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + string(APPEND CMAKE_CUDA_FLAGS "--use_fast_math") +elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang") + string(APPEND CMAKE_CUDA_FLAGS "-ffast-math") +endif() + set(CMAKE_CXX_FLAGS "-Wall") set(CMAKE_CXX_STANDARD 11) set(CMAKE_CUDA_STANDARD 11) +set(CMAKE_CUDA_ARCHITECTURES 35) add_executable(ProperLinkFlags file1.cu main.cxx) set_target_properties( ProperLinkFlags diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index d74e810..ee7374e 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -1,17 +1,35 @@ ADD_TEST_MACRO(CudaOnly.Architecture Architecture) -ADD_TEST_MACRO(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine) ADD_TEST_MACRO(CudaOnly.CompileFlags CudaOnlyCompileFlags) ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard) ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX) -ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) -ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) -ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) ADD_TEST_MACRO(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit) ADD_TEST_MACRO(CudaOnly.Standard98 CudaOnlyStandard98) ADD_TEST_MACRO(CudaOnly.Toolkit CudaOnlyToolkit) ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs) +if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + # Separable compilation is currently only supported on NVCC. Disable tests + # using it for other compilers. + ADD_TEST_MACRO(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine) + ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) + ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) + + add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND + ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION> + --build-and-test + "${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/" + "${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/" + ${build_generator_args} + --build-project DontResolveDeviceSymbols + --build-options ${build_options} + --test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION> + ) + + # Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode. + ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) +endif() + # The CUDA only ships the shared version of the toolkit libraries # on windows if(NOT WIN32) @@ -22,17 +40,6 @@ if(MSVC) ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB) endif() -add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND - ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION> - --build-and-test - "${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/" - "${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/" - ${build_generator_args} - --build-project DontResolveDeviceSymbols - --build-options ${build_options} - --test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION> - ) - add_test(NAME CudaOnly.RuntimeControls COMMAND ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION> --build-and-test diff --git a/Tests/CudaOnly/CompileFlags/CMakeLists.txt b/Tests/CudaOnly/CompileFlags/CMakeLists.txt index cbce7d6..5e8a8e4 100644 --- a/Tests/CudaOnly/CompileFlags/CMakeLists.txt +++ b/Tests/CudaOnly/CompileFlags/CMakeLists.txt @@ -1,16 +1,15 @@ cmake_minimum_required(VERSION 3.17) -cmake_policy(SET CMP0104 OLD) project(CompileFlags CUDA) -# Clear defaults. -set(CMAKE_CUDA_ARCHITECTURES) - add_executable(CudaOnlyCompileFlags main.cu) # Try passing CUDA architecture flags explicitly. if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") target_compile_options(CudaOnlyCompileFlags PRIVATE -gencode arch=compute_50,code=compute_50 - --compiler-options=-DHOST_DEFINE ) +else() + set_property(TARGET CudaOnlyCompileFlags PROPERTY CUDA_ARCHITECTURES 50-real) endif() + +target_compile_options(CudaOnlyCompileFlags PRIVATE -DALWAYS_DEFINE) diff --git a/Tests/CudaOnly/CompileFlags/main.cu b/Tests/CudaOnly/CompileFlags/main.cu index 573d230..999c056 100644 --- a/Tests/CudaOnly/CompileFlags/main.cu +++ b/Tests/CudaOnly/CompileFlags/main.cu @@ -4,11 +4,8 @@ # endif #endif -// Check HOST_DEFINE only for nvcc -#ifndef __CUDA__ -# ifndef HOST_DEFINE -# error "HOST_DEFINE not defined!" -# endif +#ifndef ALWAYS_DEFINE +# error "ALWAYS_DEFINE not defined!" #endif int main() diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt index ff6e77c..ee5f54d 100644 --- a/Tests/CudaOnly/ExportPTX/CMakeLists.txt +++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt @@ -34,16 +34,15 @@ static std::string ptx_paths = "$<TARGET_OBJECTS:CudaPTX>"; # need to also pass the --name option set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h) -get_filename_component(cuda_compiler_bin "${CMAKE_CUDA_COMPILER}" DIRECTORY) +find_package(CUDAToolkit REQUIRED) find_program(bin_to_c NAMES bin2c - PATHS ${cuda_compiler_bin} + PATHS ${CUDAToolkit_BIN_DIR} ) if(NOT bin_to_c) message(FATAL_ERROR "bin2c not found:\n" - " CMAKE_CUDA_COMPILER='${CMAKE_CUDA_COMPILER}'\n" - " cuda_compiler_bin='${cuda_compiler_bin}'\n" + " CUDAToolkit_BIN_DIR='${CUDAToolkit_BIN_DIR}'\n" ) endif() diff --git a/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt b/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt index fbef15f..325723a 100644 --- a/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt +++ b/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt @@ -2,18 +2,19 @@ cmake_minimum_required(VERSION 3.7) project (GPUDebugFlag CUDA) -#Goal for this example: -#verify that -G enables gpu debug flags -string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_30,code=compute_30") -string(APPEND CMAKE_CUDA_FLAGS " -G") set(CMAKE_CUDA_STANDARD 11) +set(CMAKE_CUDA_ARCHITECTURES 30) + +# Goal for this example: +# Verify that enabling device debug works. +string(APPEND CMAKE_CUDA_FLAGS "-G") add_executable(CudaOnlyGPUDebugFlag main.cu) +#CUDA's __CUDACC_DEBUG__ define was added in NVCC 9.0 +#so if we are below 9.0.0 we will manually add the define so that the test +#passes if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9.0.0) - #CUDA's __CUDACC_DEBUG__ define was added in 9.0 - #so if we are below 9.0.0 we will manually add the define so that the test - #passes target_compile_definitions(CudaOnlyGPUDebugFlag PRIVATE "__CUDACC_DEBUG__") endif() diff --git a/Tests/CudaOnly/WithDefs/CMakeLists.txt b/Tests/CudaOnly/WithDefs/CMakeLists.txt index add8131..0ed81d8 100644 --- a/Tests/CudaOnly/WithDefs/CMakeLists.txt +++ b/Tests/CudaOnly/WithDefs/CMakeLists.txt @@ -18,7 +18,7 @@ target_compile_options(CudaOnlyWithDefs PRIVATE -DFLAG_COMPILE_LANG_$<COMPILE_LANGUAGE> -DFLAG_LANG_IS_CUDA=$<COMPILE_LANGUAGE:CUDA> - --compiler-options=-DHOST_DEFINE + $<$<CUDA_COMPILER_ID:NVIDIA>:--compiler-options=-DHOST_DEFINE> # Host-only defines are possible only on NVCC. ) target_compile_definitions(CudaOnlyWithDefs diff --git a/Tests/CudaOnly/WithDefs/main.notcu b/Tests/CudaOnly/WithDefs/main.notcu index a5f4ed6..9119eba 100644 --- a/Tests/CudaOnly/WithDefs/main.notcu +++ b/Tests/CudaOnly/WithDefs/main.notcu @@ -7,8 +7,10 @@ # error "INC_CUDA not defined!" #endif -#ifndef HOST_DEFINE -# error "HOST_DEFINE not defined!" +#ifdef __NVCC__ +# ifndef HOST_DEFINE +# error "HOST_DEFINE not defined!" +# endif #endif #ifndef PACKED_DEFINE diff --git a/Tests/RunCMake/CMP0104/CMP0104-WARN.cmake b/Tests/RunCMake/CMP0104/CMP0104-WARN.cmake index 841d0a8..2b4a8f5 100644 --- a/Tests/RunCMake/CMP0104/CMP0104-WARN.cmake +++ b/Tests/RunCMake/CMP0104/CMP0104-WARN.cmake @@ -1 +1,2 @@ include(CMP0104-Common.cmake) +set_property(TARGET cuda PROPERTY CUDA_ARCHITECTURES) diff --git a/Tests/RunCMake/NinjaMultiConfig/RunCMakeTest.cmake b/Tests/RunCMake/NinjaMultiConfig/RunCMakeTest.cmake index 6472f46..449db4a 100644 --- a/Tests/RunCMake/NinjaMultiConfig/RunCMakeTest.cmake +++ b/Tests/RunCMake/NinjaMultiConfig/RunCMakeTest.cmake @@ -285,7 +285,8 @@ run_cmake_command(NoUnusedVariables ${CMAKE_COMMAND} ${CMAKE_CURRENT_LIST_DIR} "-DCMAKE_DEFAULT_CONFIGS=all" ) -if(CMake_TEST_CUDA) +# CudaSimple uses separable compilation, which is currently only supported on NVCC. +if(CMake_TEST_CUDA AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") set(RunCMake_TEST_BINARY_DIR ${RunCMake_BINARY_DIR}/CudaSimple-build) run_cmake_configure(CudaSimple) include(${RunCMake_TEST_BINARY_DIR}/target_files.cmake) diff --git a/Tests/RunCMake/target_link_options/RunCMakeTest.cmake b/Tests/RunCMake/target_link_options/RunCMakeTest.cmake index b919f48..2035eb4 100644 --- a/Tests/RunCMake/target_link_options/RunCMakeTest.cmake +++ b/Tests/RunCMake/target_link_options/RunCMakeTest.cmake @@ -50,10 +50,14 @@ if (NOT CMAKE_C_COMPILER_ID STREQUAL "Intel") run_cmake_target(genex_DEVICE_LINK interface LinkOptions_shared_interface --config Release) run_cmake_target(genex_DEVICE_LINK private LinkOptions_private --config Release) if (CMake_TEST_CUDA) - run_cmake_target(genex_DEVICE_LINK CMP0105_UNSET LinkOptions_CMP0105_UNSET --config Release) - run_cmake_target(genex_DEVICE_LINK CMP0105_OLD LinkOptions_CMP0105_OLD --config Release) - run_cmake_target(genex_DEVICE_LINK CMP0105_NEW LinkOptions_CMP0105_NEW --config Release) - run_cmake_target(genex_DEVICE_LINK device LinkOptions_device --config Release) + # Separable compilation is only supported on NVCC. + if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA") + run_cmake_target(genex_DEVICE_LINK CMP0105_UNSET LinkOptions_CMP0105_UNSET --config Release) + run_cmake_target(genex_DEVICE_LINK CMP0105_OLD LinkOptions_CMP0105_OLD --config Release) + run_cmake_target(genex_DEVICE_LINK CMP0105_NEW LinkOptions_CMP0105_NEW --config Release) + run_cmake_target(genex_DEVICE_LINK device LinkOptions_device --config Release) + endif() + run_cmake_target(genex_DEVICE_LINK no_device LinkOptions_no_device --config Release) endif() |