diff options
-rw-r--r-- | Source/cmLinkLineDeviceComputer.cxx | 54 | ||||
-rw-r--r-- | Source/cmNinjaNormalTargetGenerator.cxx | 2 | ||||
-rw-r--r-- | Tests/CudaOnly/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt | 15 | ||||
-rw-r--r-- | Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu | 77 |
5 files changed, 118 insertions, 31 deletions
diff --git a/Source/cmLinkLineDeviceComputer.cxx b/Source/cmLinkLineDeviceComputer.cxx index 3beeae3..557fa41 100644 --- a/Source/cmLinkLineDeviceComputer.cxx +++ b/Source/cmLinkLineDeviceComputer.cxx @@ -3,9 +3,9 @@ #include "cmLinkLineDeviceComputer.h" -#include <set> #include <sstream> +#include "cmAlgorithms.h" #include "cmComputeLinkInformation.h" #include "cmGeneratorTarget.h" #include "cmGlobalNinjaGenerator.h" @@ -32,38 +32,32 @@ std::string cmLinkLineDeviceComputer::ComputeLinkLibraries( ItemVector const& items = cli.GetItems(); std::string config = cli.GetConfig(); for (auto const& item : items) { - if (!item.Target) { - continue; - } - - bool skippable = false; - switch (item.Target->GetType()) { - case cmStateEnums::SHARED_LIBRARY: - case cmStateEnums::MODULE_LIBRARY: - case cmStateEnums::INTERFACE_LIBRARY: - skippable = true; - break; - case cmStateEnums::STATIC_LIBRARY: - // If a static library is resolving its device linking, it should - // be removed for other device linking - skippable = - item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS"); - break; - default: - break; - } - - if (skippable) { - continue; - } - - std::set<std::string> langs; - item.Target->GetLanguages(langs, config); - if (langs.count("CUDA") == 0) { - continue; + if (item.Target) { + bool skip = false; + switch (item.Target->GetType()) { + case cmStateEnums::MODULE_LIBRARY: + case cmStateEnums::INTERFACE_LIBRARY: + skip = true; + break; + case cmStateEnums::STATIC_LIBRARY: + skip = item.Target->GetPropertyAsBool("CUDA_RESOLVE_DEVICE_SYMBOLS"); + break; + default: + break; + } + if (skip) { + continue; + } } if (item.IsPath) { + // nvcc understands absolute paths to libraries ending in '.a' should + // be passed to nvlink. Other extensions like '.so' or '.dylib' are + // rejected by the nvcc front-end even though nvlink knows to ignore + // them. Bypass the front-end via '-Xnvlink'. + if (!cmHasLiteralSuffix(item.Value, ".a")) { + fout << "-Xnvlink "; + } fout << this->ConvertToOutputFormat( this->ConvertToLinkReference(item.Value)); } else { diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx index f1fb2d2..52e3677 100644 --- a/Source/cmNinjaNormalTargetGenerator.cxx +++ b/Source/cmNinjaNormalTargetGenerator.cxx @@ -187,7 +187,7 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkRule(bool useResponseFile) std::string responseFlag; if (!useResponseFile) { vars.Objects = "$in"; - vars.LinkLibraries = "$LINK_LIBRARIES"; + vars.LinkLibraries = "$LINK_PATH $LINK_LIBRARIES"; } else { std::string cmakeVarLang = "CMAKE_"; cmakeVarLang += this->TargetLinkLanguage; diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index 5ad6e6b..565baca 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -2,6 +2,7 @@ ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard) ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX) ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag) +ADD_TEST_MACRO(CudaOnly.LinkSystemDeviceLibraries CudaOnlyLinkSystemDeviceLibraries) ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols) ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs) diff --git a/Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt b/Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt new file mode 100644 index 0000000..62be1e6 --- /dev/null +++ b/Tests/CudaOnly/LinkSystemDeviceLibraries/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.8) +project(CudaOnlyLinkSystemDeviceLibraries CUDA) + +string(APPEND CMAKE_CUDA_FLAGS " -gencode arch=compute_35,code=compute_35 -gencode arch=compute_35,code=sm_35") +set(CMAKE_CUDA_STANDARD 11) + +add_executable(CudaOnlyLinkSystemDeviceLibraries main.cu) +set_target_properties( CudaOnlyLinkSystemDeviceLibraries + PROPERTIES CUDA_SEPARABLE_COMPILATION ON) +target_link_libraries( CudaOnlyLinkSystemDeviceLibraries PRIVATE cublas_device) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET CudaOnlyLinkSystemDeviceLibraries PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() diff --git a/Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu b/Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu new file mode 100644 index 0000000..7eecec1 --- /dev/null +++ b/Tests/CudaOnly/LinkSystemDeviceLibraries/main.cu @@ -0,0 +1,77 @@ + +#include <cublas_v2.h> +#include <cuda_runtime.h> +#include <iostream> + +__global__ void deviceCublasSgemm(int n, float alpha, float beta, + const float* d_A, const float* d_B, + float* d_C) +{ + cublasHandle_t cnpHandle; + cublasStatus_t status = cublasCreate(&cnpHandle); + + if (status != CUBLAS_STATUS_SUCCESS) { + return; + } + + // Call function defined in the cublas_device system static library. + // This way we can verify that we properly pass system libraries to the + // device link line + status = cublasSgemm(cnpHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, n, n, &alpha, + d_A, n, d_B, n, &beta, d_C, n); + + cublasDestroy(cnpHandle); +} + +int choose_cuda_device() +{ + int nDevices = 0; + cudaError_t err = cudaGetDeviceCount(&nDevices); + if (err != cudaSuccess) { + std::cerr << "Failed to retrieve the number of CUDA enabled devices" + << std::endl; + return 1; + } + for (int i = 0; i < nDevices; ++i) { + cudaDeviceProp prop; + cudaError_t err = cudaGetDeviceProperties(&prop, i); + if (err != cudaSuccess) { + std::cerr << "Could not retrieve properties from CUDA device " << i + << std::endl; + return 1; + } + + if (prop.major > 3 || (prop.major == 3 && prop.minor >= 5)) { + err = cudaSetDevice(i); + if (err != cudaSuccess) { + std::cout << "Could not select CUDA device " << i << std::endl; + } else { + return 0; + } + } + } + + std::cout << "Could not find a CUDA enabled card supporting compute >=3.5" + << std::endl; + return 1; +} + +int main(int argc, char** argv) +{ + int ret = choose_cuda_device(); + if (ret) { + return 0; + } + + // initial values that will make sure that the cublasSgemm won't actually + // do any work + int n = 0; + float alpha = 1; + float beta = 1; + float* d_A = nullptr; + float* d_B = nullptr; + float* d_C = nullptr; + deviceCublasSgemm<<<1, 1>>>(n, alpha, beta, d_A, d_B, d_C); + + return 0; +} |