diff options
-rw-r--r-- | Source/cmVisualStudio10TargetGenerator.cxx | 17 | ||||
-rw-r--r-- | Tests/CudaOnly/ExportPTX/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Tests/CudaOnly/ExportPTX/kernelA.cu | 4 | ||||
-rw-r--r-- | Tests/CudaOnly/ExportPTX/kernelB.cu | 3 |
4 files changed, 25 insertions, 0 deletions
diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx index 5bec7d3..4d0773b 100644 --- a/Source/cmVisualStudio10TargetGenerator.cxx +++ b/Source/cmVisualStudio10TargetGenerator.cxx @@ -3245,6 +3245,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions( this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable()); Options& cudaOptions = *pOptions; + auto cudaVersion = this->GlobalGenerator->GetPlatformToolsetCudaString(); + // Get compile flags for CUDA in this directory. std::string flags; this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA", @@ -3280,7 +3282,22 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions( // to not have the source file extension at all cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx"); notPtx = false; + + if (cmSystemTools::VersionCompare(cmSystemTools::OP_GREATER_EQUAL, + cudaVersion, "9.0") && + cmSystemTools::VersionCompare(cmSystemTools::OP_LESS, cudaVersion, + "11.5")) { + // The DriverApi flag before 11.5 ( verified back to 9.0 ) which controls + // PTX compilation doesn't propagate user defines causing + // target_compile_definitions to behave differently for VS + + // PTX compared to other generators so we patch the rules + // to normalize behavior + cudaOptions.AddFlag("DriverApiCommandLineTemplate", + "%(BaseCommandLineTemplate) [CompileOut] [FastMath] " + "[Defines] \"%(FullPath)\""); + } } + if (notPtx && cmSystemTools::VersionCompareGreaterEq( "8.0", this->GlobalGenerator->GetPlatformToolsetCudaString())) { diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt index f1667af..e97274d 100644 --- a/Tests/CudaOnly/ExportPTX/CMakeLists.txt +++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt @@ -11,6 +11,7 @@ list(SUBLIST CMAKE_CUDA_ARCHITECTURES 0 1 CMAKE_CUDA_ARCHITECTURES) string(APPEND CMAKE_CUDA_ARCHITECTURES "-virtual") add_library(CudaPTX OBJECT kernelA.cu kernelB.cu) +target_compile_definitions(CudaPTX PRIVATE "CUDA_PTX_COMPILATION") set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON) #Test ObjectFiles with file(GENERATE) diff --git a/Tests/CudaOnly/ExportPTX/kernelA.cu b/Tests/CudaOnly/ExportPTX/kernelA.cu index fbe0d26..8967298 100644 --- a/Tests/CudaOnly/ExportPTX/kernelA.cu +++ b/Tests/CudaOnly/ExportPTX/kernelA.cu @@ -1,4 +1,8 @@ +#ifndef CUDA_PTX_COMPILATION +# error "CUDA_PTX_COMPILATION define not provided" +#endif + __global__ void kernelA(float* r, float* x, float* y, float* z, int size) { for (int i = threadIdx.x; i < size; i += blockDim.x) { diff --git a/Tests/CudaOnly/ExportPTX/kernelB.cu b/Tests/CudaOnly/ExportPTX/kernelB.cu index 11872e4..be4613a 100644 --- a/Tests/CudaOnly/ExportPTX/kernelB.cu +++ b/Tests/CudaOnly/ExportPTX/kernelB.cu @@ -1,4 +1,7 @@ +#ifndef CUDA_PTX_COMPILATION +# error "CUDA_PTX_COMPILATION define not provided" +#endif __global__ void kernelB(float* r, float* x, float* y, float* z, int size) { |