From 574b492b47316dbb860535e870a74cdf60b7e446 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 8 Dec 2021 09:19:31 -0500 Subject: CUDA: Visual Studio Generator propagates definitions for PTX files From CUDA 9.0 to CUDA 11.4 the CUDA Visual Studio integration defines omitted user defines from PTX generation. With CUDA 11.5 this has been resolved, so we backport the fix to allow for consistent behavior when using CMake --- Source/cmVisualStudio10TargetGenerator.cxx | 17 +++++++++++++++++ Tests/CudaOnly/ExportPTX/CMakeLists.txt | 1 + Tests/CudaOnly/ExportPTX/kernelA.cu | 4 ++++ Tests/CudaOnly/ExportPTX/kernelB.cu | 3 +++ 4 files changed, 25 insertions(+) diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx index e9ff758..1c59450 100644 --- a/Source/cmVisualStudio10TargetGenerator.cxx +++ b/Source/cmVisualStudio10TargetGenerator.cxx @@ -3228,6 +3228,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions( this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable()); Options& cudaOptions = *pOptions; + auto cudaVersion = this->GlobalGenerator->GetPlatformToolsetCudaString(); + // Get compile flags for CUDA in this directory. std::string flags; this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA", @@ -3263,7 +3265,22 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions( // to not have the source file extension at all cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx"); notPtx = false; + + if (cmSystemTools::VersionCompare(cmSystemTools::OP_GREATER_EQUAL, + cudaVersion, "9.0") && + cmSystemTools::VersionCompare(cmSystemTools::OP_LESS, cudaVersion, + "11.5")) { + // The DriverApi flag before 11.5 ( verified back to 9.0 ) which controls + // PTX compilation doesn't propagate user defines causing + // target_compile_definitions to behave differently for VS + + // PTX compared to other generators so we patch the rules + // to normalize behavior + cudaOptions.AddFlag("DriverApiCommandLineTemplate", + "%(BaseCommandLineTemplate) [CompileOut] [FastMath] " + "[Defines] \"%(FullPath)\""); + } } + if (notPtx && cmSystemTools::VersionCompareGreaterEq( "8.0", this->GlobalGenerator->GetPlatformToolsetCudaString())) { diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt index f1667af..e97274d 100644 --- a/Tests/CudaOnly/ExportPTX/CMakeLists.txt +++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt @@ -11,6 +11,7 @@ list(SUBLIST CMAKE_CUDA_ARCHITECTURES 0 1 CMAKE_CUDA_ARCHITECTURES) string(APPEND CMAKE_CUDA_ARCHITECTURES "-virtual") add_library(CudaPTX OBJECT kernelA.cu kernelB.cu) +target_compile_definitions(CudaPTX PRIVATE "CUDA_PTX_COMPILATION") set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON) #Test ObjectFiles with file(GENERATE) diff --git a/Tests/CudaOnly/ExportPTX/kernelA.cu b/Tests/CudaOnly/ExportPTX/kernelA.cu index fbe0d26..8967298 100644 --- a/Tests/CudaOnly/ExportPTX/kernelA.cu +++ b/Tests/CudaOnly/ExportPTX/kernelA.cu @@ -1,4 +1,8 @@ +#ifndef CUDA_PTX_COMPILATION +# error "CUDA_PTX_COMPILATION define not provided" +#endif + __global__ void kernelA(float* r, float* x, float* y, float* z, int size) { for (int i = threadIdx.x; i < size; i += blockDim.x) { diff --git a/Tests/CudaOnly/ExportPTX/kernelB.cu b/Tests/CudaOnly/ExportPTX/kernelB.cu index 11872e4..be4613a 100644 --- a/Tests/CudaOnly/ExportPTX/kernelB.cu +++ b/Tests/CudaOnly/ExportPTX/kernelB.cu @@ -1,4 +1,7 @@ +#ifndef CUDA_PTX_COMPILATION +# error "CUDA_PTX_COMPILATION define not provided" +#endif __global__ void kernelB(float* r, float* x, float* y, float* z, int size) { -- cgit v0.12