diff options
Diffstat (limited to 'Modules/FindCUDA.cmake')
-rw-r--r-- | Modules/FindCUDA.cmake | 778 |
1 files changed, 489 insertions, 289 deletions
diff --git a/Modules/FindCUDA.cmake b/Modules/FindCUDA.cmake index 2705d32..05f38eb 100644 --- a/Modules/FindCUDA.cmake +++ b/Modules/FindCUDA.cmake @@ -1,292 +1,460 @@ -# - Tools for building CUDA C files: libraries and build dependencies. -# This script locates the NVIDIA CUDA C tools. It should work on linux, windows, -# and mac and should be reasonably up to date with CUDA C releases. -# -# This script makes use of the standard find_package arguments of <VERSION>, -# REQUIRED and QUIET. CUDA_FOUND will report if an acceptable version of CUDA -# was found. -# -# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if the prefix -# cannot be determined by the location of nvcc in the system path and REQUIRED -# is specified to find_package(). To use a different installed version of the -# toolkit set the environment variable CUDA_BIN_PATH before running cmake -# (e.g. CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default /usr/local/cuda) -# or set CUDA_TOOLKIT_ROOT_DIR after configuring. If you change the value of -# CUDA_TOOLKIT_ROOT_DIR, various components that depend on the path will be -# relocated. +#.rst: +# FindCUDA +# -------- +# +# Tools for building CUDA C files: libraries and build dependencies. +# +# This script locates the NVIDIA CUDA C tools. It should work on linux, +# windows, and mac and should be reasonably up to date with CUDA C +# releases. +# +# This script makes use of the standard find_package arguments of +# <VERSION>, REQUIRED and QUIET. CUDA_FOUND will report if an +# acceptable version of CUDA was found. +# +# The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if +# the prefix cannot be determined by the location of nvcc in the system +# path and REQUIRED is specified to find_package(). To use a different +# installed version of the toolkit set the environment variable +# CUDA_BIN_PATH before running cmake (e.g. +# CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default +# /usr/local/cuda) or set CUDA_TOOLKIT_ROOT_DIR after configuring. If +# you change the value of CUDA_TOOLKIT_ROOT_DIR, various components that +# depend on the path will be relocated. # # It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain -# platforms, or to use a cuda runtime not installed in the default location. In -# newer versions of the toolkit the cuda library is included with the graphics -# driver- be sure that the driver version matches what is needed by the cuda -# runtime version. -# -# The following variables affect the behavior of the macros in the script (in -# alphebetical order). Note that any of these flags can be changed multiple -# times in the same directory before calling CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX or CUDA_WRAP_SRCS. -# -# CUDA_64_BIT_DEVICE_CODE (Default matches host bit size) -# -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code. -# Note that making this different from the host code when generating object -# or C files from CUDA code just won't work, because size_t gets defined by -# nvcc in the generated source. If you compile to PTX and then load the -# file yourself, you can mix bit sizes between device and host. -# -# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON) -# -- Set to ON if you want the custom build rule to be attached to the source -# file in Visual Studio. Turn OFF if you add the same cuda file to multiple -# targets. -# -# This allows the user to build the target from the CUDA file; however, bad -# things can happen if the CUDA source file is added to multiple targets. -# When performing parallel builds it is possible for the custom build -# command to be run more than once and in parallel causing cryptic build -# errors. VS runs the rules for every source file in the target, and a -# source can have only one rule no matter how many projects it is added to. -# When the rule is run from multiple targets race conditions can occur on -# the generated file. Eventually everything will get built, but if the user -# is unaware of this behavior, there may be confusion. It would be nice if -# this script could detect the reuse of source files across multiple targets -# and turn the option off for the user, but no good solution could be found. -# -# CUDA_BUILD_CUBIN (Default OFF) -# -- Set to ON to enable and extra compilation pass with the -cubin option in -# Device mode. The output is parsed and register, shared memory usage is -# printed during build. -# -# CUDA_BUILD_EMULATION (Default OFF for device mode) -# -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files -# when CUDA_BUILD_EMULATION is TRUE. -# -# CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR) -# -- Set to the path you wish to have the generated files placed. If it is -# blank output files will be placed in CMAKE_CURRENT_BINARY_DIR. -# Intermediate files will always be placed in -# CMAKE_CURRENT_BINARY_DIR/CMakeFiles. -# -# CUDA_HOST_COMPILATION_CPP (Default ON) -# -- Set to OFF for C compilation of host code. -# -# CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER, $(VCInstallDir)/bin for VS) -# -- Set the host compiler to be used by nvcc. Ignored if -ccbin or -# --compiler-bindir is already present in the CUDA_NVCC_FLAGS or -# CUDA_NVCC_FLAGS_<CONFIG> variables. For Visual Studio targets -# $(VCInstallDir)/bin is a special value that expands out to the path when -# the command is run from withing VS. -# -# CUDA_NVCC_FLAGS -# CUDA_NVCC_FLAGS_<CONFIG> -# -- Additional NVCC command line arguments. NOTE: multiple arguments must be -# semi-colon delimited (e.g. --compiler-options;-Wall) -# -# CUDA_PROPAGATE_HOST_FLAGS (Default ON) -# -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration -# dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the -# host compiler through nvcc's -Xcompiler flag. This helps make the -# generated host code match the rest of the system better. Sometimes -# certain flags give nvcc problems, and this will help you turn the flag -# propagation off. This does not affect the flags supplied directly to nvcc -# via CUDA_NVCC_FLAGS or through the OPTION flags specified through -# CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS. Flags used for -# shared library compilation are not affected by this flag. -# -# CUDA_SEPARABLE_COMPILATION (Default OFF) -# -- If set this will enable separable compilation for all CUDA runtime object -# files. If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY -# (e.g. calling CUDA_WRAP_SRCS directly), -# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called. -# -# CUDA_VERBOSE_BUILD (Default OFF) -# -- Set to ON to see all the commands used when building the CUDA file. When -# using a Makefile generator the value defaults to VERBOSE (run make -# VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will -# always print the output. +# platforms, or to use a cuda runtime not installed in the default +# location. In newer versions of the toolkit the cuda library is +# included with the graphics driver- be sure that the driver version +# matches what is needed by the cuda runtime version. +# +# The following variables affect the behavior of the macros in the +# script (in alphebetical order). Note that any of these flags can be +# changed multiple times in the same directory before calling +# CUDA_ADD_EXECUTABLE, CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX +# or CUDA_WRAP_SRCS. +# +# :: +# +# CUDA_64_BIT_DEVICE_CODE (Default matches host bit size) +# -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code. +# Note that making this different from the host code when generating object +# or C files from CUDA code just won't work, because size_t gets defined by +# nvcc in the generated source. If you compile to PTX and then load the +# file yourself, you can mix bit sizes between device and host. +# +# +# +# :: +# +# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON) +# -- Set to ON if you want the custom build rule to be attached to the source +# file in Visual Studio. Turn OFF if you add the same cuda file to multiple +# targets. +# +# +# +# :: +# +# This allows the user to build the target from the CUDA file; however, bad +# things can happen if the CUDA source file is added to multiple targets. +# When performing parallel builds it is possible for the custom build +# command to be run more than once and in parallel causing cryptic build +# errors. VS runs the rules for every source file in the target, and a +# source can have only one rule no matter how many projects it is added to. +# When the rule is run from multiple targets race conditions can occur on +# the generated file. Eventually everything will get built, but if the user +# is unaware of this behavior, there may be confusion. It would be nice if +# this script could detect the reuse of source files across multiple targets +# and turn the option off for the user, but no good solution could be found. +# +# +# +# :: +# +# CUDA_BUILD_CUBIN (Default OFF) +# -- Set to ON to enable and extra compilation pass with the -cubin option in +# Device mode. The output is parsed and register, shared memory usage is +# printed during build. +# +# +# +# :: +# +# CUDA_BUILD_EMULATION (Default OFF for device mode) +# -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files +# when CUDA_BUILD_EMULATION is TRUE. +# +# +# +# :: +# +# CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR) +# -- Set to the path you wish to have the generated files placed. If it is +# blank output files will be placed in CMAKE_CURRENT_BINARY_DIR. +# Intermediate files will always be placed in +# CMAKE_CURRENT_BINARY_DIR/CMakeFiles. +# +# +# +# :: +# +# CUDA_HOST_COMPILATION_CPP (Default ON) +# -- Set to OFF for C compilation of host code. +# +# +# +# :: +# +# CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER, $(VCInstallDir)/bin for VS) +# -- Set the host compiler to be used by nvcc. Ignored if -ccbin or +# --compiler-bindir is already present in the CUDA_NVCC_FLAGS or +# CUDA_NVCC_FLAGS_<CONFIG> variables. For Visual Studio targets +# $(VCInstallDir)/bin is a special value that expands out to the path when +# the command is run from withing VS. +# +# +# +# :: +# +# CUDA_NVCC_FLAGS +# CUDA_NVCC_FLAGS_<CONFIG> +# -- Additional NVCC command line arguments. NOTE: multiple arguments must be +# semi-colon delimited (e.g. --compiler-options;-Wall) +# +# +# +# :: +# +# CUDA_PROPAGATE_HOST_FLAGS (Default ON) +# -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration +# dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the +# host compiler through nvcc's -Xcompiler flag. This helps make the +# generated host code match the rest of the system better. Sometimes +# certain flags give nvcc problems, and this will help you turn the flag +# propagation off. This does not affect the flags supplied directly to nvcc +# via CUDA_NVCC_FLAGS or through the OPTION flags specified through +# CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS. Flags used for +# shared library compilation are not affected by this flag. +# +# +# +# :: +# +# CUDA_SEPARABLE_COMPILATION (Default OFF) +# -- If set this will enable separable compilation for all CUDA runtime object +# files. If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY +# (e.g. calling CUDA_WRAP_SRCS directly), +# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and +# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called. +# +# +# +# :: +# +# CUDA_VERBOSE_BUILD (Default OFF) +# -- Set to ON to see all the commands used when building the CUDA file. When +# using a Makefile generator the value defaults to VERBOSE (run make +# VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will +# always print the output. +# +# # # The script creates the following macros (in alphebetical order): # -# CUDA_ADD_CUFFT_TO_TARGET( cuda_target ) -# -- Adds the cufft library to the target (can be any target). Handles whether -# you are in emulation mode or not. -# -# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target ) -# -- Adds the cublas library to the target (can be any target). Handles -# whether you are in emulation mode or not. -# -# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ... -# [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) -# -- Creates an executable "cuda_target" which is made up of the files -# specified. All of the non CUDA C files are compiled using the standard -# build rules specified by CMAKE and the cuda files are compiled to object -# files using nvcc and the host compiler. In addition CUDA_INCLUDE_DIRS is -# added automatically to include_directories(). Some standard CMake target -# calls can be used on the target after calling this macro -# (e.g. set_target_properties and target_link_libraries), but setting -# properties that adjust compilation flags will not affect code compiled by -# nvcc. Such flags should be modified before calling CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS. -# -# CUDA_ADD_LIBRARY( cuda_target file0 file1 ... -# [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) -# -- Same as CUDA_ADD_EXECUTABLE except that a library is created. -# -# CUDA_BUILD_CLEAN_TARGET() -# -- Creates a convience target that deletes all the dependency files -# generated. You should make clean after running this target to ensure the -# dependency files get regenerated. -# -# CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE] -# [OPTIONS ...] ) -# -- Returns a list of generated files from the input source files to be used -# with ADD_LIBRARY or ADD_EXECUTABLE. -# -# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] ) -# -- Returns a list of PTX files generated from the input source files. -# -# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var -# cuda_target -# object_files ) -# -- Compute the name of the intermediate link file used for separable -# compilation. This file name is typically passed into -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS. output_file_var is produced -# based on cuda_target the list of objects files that need separable -# compilation as specified by object_files. If the object_files list is -# empty, then output_file_var will be empty. This function is called -# automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE. Note that -# this is a function and not a macro. -# -# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... ) -# -- Sets the directories that should be passed to nvcc -# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu -# files. -# -# -# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target -# nvcc_flags object_files) -# -# -- Generates the link object required by separable compilation from the given -# object files. This is called automatically for CUDA_ADD_EXECUTABLE and -# CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS -# directly. When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the -# nvcc_flags passed in are the same as the flags passed in via the OPTIONS -# argument. The only nvcc flag added automatically is the bitness flag as -# specified by CUDA_64_BIT_DEVICE_CODE. Note that this is a function -# instead of a macro. -# -# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ... -# [STATIC | SHARED | MODULE] [OPTIONS ...] ) -# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE, -# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this -# function under the hood. -# -# Given the list of files (file0 file1 ... fileN) this macro generates -# custom commands that generate either PTX or linkable objects (use "PTX" or -# "OBJ" for the format argument to switch). Files that don't end with .cu -# or have the HEADER_FILE_ONLY property are ignored. -# -# The arguments passed in after OPTIONS are extra command line options to -# give to nvcc. You can also specify per configuration options by -# specifying the name of the configuration followed by the options. General -# options must preceed configuration specific options. Not all -# configurations need to be specified, only the ones provided will be used. -# -# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag" -# DEBUG -g -# RELEASE --use_fast_math -# RELWITHDEBINFO --use_fast_math;-g -# MINSIZEREL --use_fast_math -# -# For certain configurations (namely VS generating object files with -# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will -# be produced for the given cuda file. This is because when you add the -# cuda file to Visual Studio it knows that this file produces an object file -# and will link in the resulting object file automatically. -# -# This script will also generate a separate cmake script that is used at -# build time to invoke nvcc. This is for several reasons. -# -# 1. nvcc can return negative numbers as return values which confuses -# Visual Studio into thinking that the command succeeded. The script now -# checks the error codes and produces errors when there was a problem. -# -# 2. nvcc has been known to not delete incomplete results when it -# encounters problems. This confuses build systems into thinking the -# target was generated when in fact an unusable file exists. The script -# now deletes the output files if there was an error. -# -# 3. By putting all the options that affect the build into a file and then -# make the build rule dependent on the file, the output files will be -# regenerated when the options change. -# -# This script also looks at optional arguments STATIC, SHARED, or MODULE to -# determine when to target the object compilation for a shared library. -# BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in -# CUDA_ADD_LIBRARY. On some systems special flags are added for building -# objects intended for shared libraries. A preprocessor macro, -# <target_name>_EXPORTS is defined when a shared library compilation is -# detected. -# -# Flags passed into add_definitions with -D or /D are passed along to nvcc. +# :: +# +# CUDA_ADD_CUFFT_TO_TARGET( cuda_target ) +# -- Adds the cufft library to the target (can be any target). Handles whether +# you are in emulation mode or not. +# +# +# +# :: +# +# CUDA_ADD_CUBLAS_TO_TARGET( cuda_target ) +# -- Adds the cublas library to the target (can be any target). Handles +# whether you are in emulation mode or not. +# +# +# +# :: +# +# CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ... +# [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) +# -- Creates an executable "cuda_target" which is made up of the files +# specified. All of the non CUDA C files are compiled using the standard +# build rules specified by CMAKE and the cuda files are compiled to object +# files using nvcc and the host compiler. In addition CUDA_INCLUDE_DIRS is +# added automatically to include_directories(). Some standard CMake target +# calls can be used on the target after calling this macro +# (e.g. set_target_properties and target_link_libraries), but setting +# properties that adjust compilation flags will not affect code compiled by +# nvcc. Such flags should be modified before calling CUDA_ADD_EXECUTABLE, +# CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS. +# +# +# +# :: +# +# CUDA_ADD_LIBRARY( cuda_target file0 file1 ... +# [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] ) +# -- Same as CUDA_ADD_EXECUTABLE except that a library is created. +# +# +# +# :: +# +# CUDA_BUILD_CLEAN_TARGET() +# -- Creates a convience target that deletes all the dependency files +# generated. You should make clean after running this target to ensure the +# dependency files get regenerated. +# +# +# +# :: +# +# CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE] +# [OPTIONS ...] ) +# -- Returns a list of generated files from the input source files to be used +# with ADD_LIBRARY or ADD_EXECUTABLE. +# +# +# +# :: +# +# CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] ) +# -- Returns a list of PTX files generated from the input source files. +# +# +# +# :: +# +# CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var +# cuda_target +# object_files ) +# -- Compute the name of the intermediate link file used for separable +# compilation. This file name is typically passed into +# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS. output_file_var is produced +# based on cuda_target the list of objects files that need separable +# compilation as specified by object_files. If the object_files list is +# empty, then output_file_var will be empty. This function is called +# automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE. Note that +# this is a function and not a macro. +# +# +# +# :: +# +# CUDA_INCLUDE_DIRECTORIES( path0 path1 ... ) +# -- Sets the directories that should be passed to nvcc +# (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu +# files. +# +# +# +# +# +# :: +# +# CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target +# nvcc_flags object_files) +# +# +# +# :: +# +# -- Generates the link object required by separable compilation from the given +# object files. This is called automatically for CUDA_ADD_EXECUTABLE and +# CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS +# directly. When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the +# nvcc_flags passed in are the same as the flags passed in via the OPTIONS +# argument. The only nvcc flag added automatically is the bitness flag as +# specified by CUDA_64_BIT_DEVICE_CODE. Note that this is a function +# instead of a macro. +# +# +# +# :: +# +# CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ... +# [STATIC | SHARED | MODULE] [OPTIONS ...] ) +# -- This is where all the magic happens. CUDA_ADD_EXECUTABLE, +# CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this +# function under the hood. +# +# +# +# :: +# +# Given the list of files (file0 file1 ... fileN) this macro generates +# custom commands that generate either PTX or linkable objects (use "PTX" or +# "OBJ" for the format argument to switch). Files that don't end with .cu +# or have the HEADER_FILE_ONLY property are ignored. +# +# +# +# :: +# +# The arguments passed in after OPTIONS are extra command line options to +# give to nvcc. You can also specify per configuration options by +# specifying the name of the configuration followed by the options. General +# options must preceed configuration specific options. Not all +# configurations need to be specified, only the ones provided will be used. +# +# +# +# :: +# +# OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag" +# DEBUG -g +# RELEASE --use_fast_math +# RELWITHDEBINFO --use_fast_math;-g +# MINSIZEREL --use_fast_math +# +# +# +# :: +# +# For certain configurations (namely VS generating object files with +# CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will +# be produced for the given cuda file. This is because when you add the +# cuda file to Visual Studio it knows that this file produces an object file +# and will link in the resulting object file automatically. +# +# +# +# :: +# +# This script will also generate a separate cmake script that is used at +# build time to invoke nvcc. This is for several reasons. +# +# +# +# :: +# +# 1. nvcc can return negative numbers as return values which confuses +# Visual Studio into thinking that the command succeeded. The script now +# checks the error codes and produces errors when there was a problem. +# +# +# +# :: +# +# 2. nvcc has been known to not delete incomplete results when it +# encounters problems. This confuses build systems into thinking the +# target was generated when in fact an unusable file exists. The script +# now deletes the output files if there was an error. +# +# +# +# :: +# +# 3. By putting all the options that affect the build into a file and then +# make the build rule dependent on the file, the output files will be +# regenerated when the options change. +# +# +# +# :: +# +# This script also looks at optional arguments STATIC, SHARED, or MODULE to +# determine when to target the object compilation for a shared library. +# BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in +# CUDA_ADD_LIBRARY. On some systems special flags are added for building +# objects intended for shared libraries. A preprocessor macro, +# <target_name>_EXPORTS is defined when a shared library compilation is +# detected. +# +# +# +# :: +# +# Flags passed into add_definitions with -D or /D are passed along to nvcc. +# +# # # The script defines the following variables: # -# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc. -# CUDA_VERSION_MINOR -- The minor version. -# CUDA_VERSION -# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR -# -# CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set). -# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the -# SDK. This script will not directly support finding -# specific libraries or headers, as that isn't -# supported by NVIDIA. If you want to change -# libraries when the path changes see the -# FindCUDA.cmake script for an example of how to clear -# these variables. There are also examples of how to -# use the CUDA_SDK_ROOT_DIR to locate headers or -# libraries, if you so choose (at your own risk). -# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically -# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY. -# CUDA_LIBRARIES -- Cuda RT library. -# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT -# implementation (alternative to: -# CUDA_ADD_CUFFT_TO_TARGET macro) -# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS -# implementation (alterative to: -# CUDA_ADD_CUBLAS_TO_TARGET macro). -# CUDA_cupti_LIBRARY -- CUDA Profiling Tools Interface library. -# Only available for CUDA version 4.0+. -# CUDA_curand_LIBRARY -- CUDA Random Number Generation library. -# Only available for CUDA version 3.2+. -# CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library. -# Only available for CUDA version 3.2+. -# CUDA_npp_LIBRARY -- NVIDIA Performance Primitives library. -# Only available for CUDA version 4.0+. -# CUDA_nppc_LIBRARY -- NVIDIA Performance Primitives library (core). -# Only available for CUDA version 5.5+. -# CUDA_nppi_LIBRARY -- NVIDIA Performance Primitives library (image processing). -# Only available for CUDA version 5.5+. -# CUDA_npps_LIBRARY -- NVIDIA Performance Primitives library (signal processing). -# Only available for CUDA version 5.5+. -# CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library. -# Only available for CUDA version 3.2+. -# Windows only. -# CUDA_nvcuvid_LIBRARY -- CUDA Video Decoder library. -# Only available for CUDA version 3.2+. -# Windows only. -# -# -# James Bigler, NVIDIA Corp (nvidia.com - jbigler) -# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html -# -# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. -# -# Copyright (c) 2007-2009 -# Scientific Computing and Imaging Institute, University of Utah -# -# This code is licensed under the MIT License. See the FindCUDA.cmake script -# for the text of the license. +# :: +# +# CUDA_VERSION_MAJOR -- The major version of cuda as reported by nvcc. +# CUDA_VERSION_MINOR -- The minor version. +# CUDA_VERSION +# CUDA_VERSION_STRING -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR +# +# +# +# :: +# +# CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set). +# CUDA_SDK_ROOT_DIR -- Path to the CUDA SDK. Use this to find files in the +# SDK. This script will not directly support finding +# specific libraries or headers, as that isn't +# supported by NVIDIA. If you want to change +# libraries when the path changes see the +# FindCUDA.cmake script for an example of how to clear +# these variables. There are also examples of how to +# use the CUDA_SDK_ROOT_DIR to locate headers or +# libraries, if you so choose (at your own risk). +# CUDA_INCLUDE_DIRS -- Include directory for cuda headers. Added automatically +# for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY. +# CUDA_LIBRARIES -- Cuda RT library. +# CUDA_CUFFT_LIBRARIES -- Device or emulation library for the Cuda FFT +# implementation (alternative to: +# CUDA_ADD_CUFFT_TO_TARGET macro) +# CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS +# implementation (alterative to: +# CUDA_ADD_CUBLAS_TO_TARGET macro). +# CUDA_cupti_LIBRARY -- CUDA Profiling Tools Interface library. +# Only available for CUDA version 4.0+. +# CUDA_curand_LIBRARY -- CUDA Random Number Generation library. +# Only available for CUDA version 3.2+. +# CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library. +# Only available for CUDA version 3.2+. +# CUDA_npp_LIBRARY -- NVIDIA Performance Primitives library. +# Only available for CUDA version 4.0+. +# CUDA_nppc_LIBRARY -- NVIDIA Performance Primitives library (core). +# Only available for CUDA version 5.5+. +# CUDA_nppi_LIBRARY -- NVIDIA Performance Primitives library (image processing). +# Only available for CUDA version 5.5+. +# CUDA_npps_LIBRARY -- NVIDIA Performance Primitives library (signal processing). +# Only available for CUDA version 5.5+. +# CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library. +# Only available for CUDA version 3.2+. +# Windows only. +# CUDA_nvcuvid_LIBRARY -- CUDA Video Decoder library. +# Only available for CUDA version 3.2+. +# Windows only. +# +# +# +# +# +# :: +# +# James Bigler, NVIDIA Corp (nvidia.com - jbigler) +# Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html +# +# +# +# :: +# +# Copyright (c) 2008 - 2009 NVIDIA Corporation. All rights reserved. +# +# +# +# :: +# +# Copyright (c) 2007-2009 +# Scientific Computing and Imaging Institute, University of Utah +# +# +# +# :: +# +# This code is licensed under the MIT License. See the FindCUDA.cmake script +# for the text of the license. # The MIT License # @@ -481,19 +649,15 @@ endforeach() ############################################################################### ############################################################################### -# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed, -# if they have then clear the cache variables, so that will be detected again. -if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}") - unset(CUDA_NVCC_EXECUTABLE CACHE) +macro(cuda_unset_include_and_libraries) unset(CUDA_TOOLKIT_INCLUDE CACHE) unset(CUDA_CUDART_LIBRARY CACHE) + unset(CUDA_CUDA_LIBRARY CACHE) # Make sure you run this before you unset CUDA_VERSION. if(CUDA_VERSION VERSION_EQUAL "3.0") # This only existed in the 3.0 version of the CUDA toolkit unset(CUDA_CUDARTEMU_LIBRARY CACHE) endif() - unset(CUDA_VERSION CACHE) - unset(CUDA_CUDA_LIBRARY CACHE) unset(CUDA_cupti_LIBRARY CACHE) unset(CUDA_cublas_LIBRARY CACHE) unset(CUDA_cublasemu_LIBRARY CACHE) @@ -507,6 +671,19 @@ if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}") unset(CUDA_npps_LIBRARY CACHE) unset(CUDA_nvcuvenc_LIBRARY CACHE) unset(CUDA_nvcuvid_LIBRARY CACHE) +endmacro() + +# Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed, +# if they have then clear the cache variables, so that will be detected again. +if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}") + unset(CUDA_TOOLKIT_TARGET_DIR CACHE) + unset(CUDA_NVCC_EXECUTABLE CACHE) + unset(CUDA_VERSION CACHE) + cuda_unset_include_and_libraries() +endif() + +if(NOT "${CUDA_TOOLKIT_TARGET_DIR}" STREQUAL "${CUDA_TOOLKIT_TARGET_DIR_INTERNAL}") + cuda_unset_include_and_libraries() endif() if(NOT "${CUDA_SDK_ROOT_DIR}" STREQUAL "${CUDA_SDK_ROOT_DIR_INTERNAL}") @@ -581,10 +758,27 @@ endif() # Always set this convenience variable set(CUDA_VERSION_STRING "${CUDA_VERSION}") +# Support for arm cross compilation with CUDA 5.5 +if(CUDA_VERSION VERSION_GREATER "5.0" AND CMAKE_CROSSCOMPILING AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" AND EXISTS "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-gnueabihf") + set(CUDA_TOOLKIT_TARGET_DIR "${CUDA_TOOLKIT_ROOT_DIR}/targets/armv7-linux-gnueabihf" CACHE PATH "Toolkit target location.") +else() + set(CUDA_TOOLKIT_TARGET_DIR "${CUDA_TOOLKIT_ROOT_DIR}" CACHE PATH "Toolkit target location.") +endif() +mark_as_advanced(CUDA_TOOLKIT_TARGET_DIR) + +# Target CPU architecture +if(CUDA_VERSION VERSION_GREATER "5.0" AND CMAKE_CROSSCOMPILING AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") + set(_cuda_target_cpu_arch_initial "ARM") +else() + set(_cuda_target_cpu_arch_initial "") +endif() +set(CUDA_TARGET_CPU_ARCH ${_cuda_target_cpu_arch_initial} CACHE STRING "Specify the name of the class of CPU architecture for which the input files must be compiled.") +mark_as_advanced(CUDA_TARGET_CPU_ARCH) + # CUDA_TOOLKIT_INCLUDE find_path(CUDA_TOOLKIT_INCLUDE device_functions.h # Header included in toolkit - PATHS "${CUDA_TOOLKIT_ROOT_DIR}" + PATHS "${CUDA_TOOLKIT_TARGET_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}" ENV CUDA_PATH ENV CUDA_INC_PATH PATH_SUFFIXES include @@ -608,7 +802,7 @@ macro(cuda_find_library_local_first_with_path_ext _var _names _doc _path_ext ) # (lib/Win32) and the old path (lib). find_library(${_var} NAMES ${_names} - PATHS "${CUDA_TOOLKIT_ROOT_DIR}" + PATHS "${CUDA_TOOLKIT_TARGET_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}" ENV CUDA_PATH ENV CUDA_LIB_PATH PATH_SUFFIXES ${_cuda_64bit_lib_dir} "${_path_ext}lib/Win32" "${_path_ext}lib" "${_path_ext}libWin32" @@ -799,6 +993,8 @@ set(CUDA_FOUND TRUE) set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL "This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE) +set(CUDA_TOOLKIT_TARGET_DIR_INTERNAL "${CUDA_TOOLKIT_TARGET_DIR}" CACHE INTERNAL + "This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was set successfully." FORCE) set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE) @@ -1023,6 +1219,10 @@ macro(CUDA_WRAP_SRCS cuda_target format generated_files) set(nvcc_flags ${nvcc_flags} -m32) endif() + if(CUDA_TARGET_CPU_ARCH) + set(nvcc_flags ${nvcc_flags} "--target-cpu-architecture=${CUDA_TARGET_CPU_ARCH}") + endif() + # This needs to be passed in at this stage, because VS needs to fill out the # value of VCInstallDir from within VS. Note that CCBIN is only used if # -ccbin or --compiler-bindir isn't used and CUDA_HOST_COMPILER matches |