4 files changed, 25 insertions, 0 deletions
diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx
index 5bec7d3..4d0773b 100644
--- a/Source/cmVisualStudio10TargetGenerator.cxx
+++ b/Source/cmVisualStudio10TargetGenerator.cxx
@@ -3245,6 +3245,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
     this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable());
   Options& cudaOptions = *pOptions;
 
+  auto cudaVersion = this->GlobalGenerator->GetPlatformToolsetCudaString();
+
   // Get compile flags for CUDA in this directory.
   std::string flags;
   this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA",
@@ -3280,7 +3282,22 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
     // to not have the source file extension at all
     cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx");
     notPtx = false;
+
+    if (cmSystemTools::VersionCompare(cmSystemTools::OP_GREATER_EQUAL,
+                                      cudaVersion, "9.0") &&
+        cmSystemTools::VersionCompare(cmSystemTools::OP_LESS, cudaVersion,
+                                      "11.5")) {
+      // The DriverApi flag before 11.5 ( verified back to 9.0 ) which controls
+      // PTX compilation doesn't propagate user defines causing
+      // target_compile_definitions to behave differently for VS +
+      // PTX compared to other generators so we patch the rules
+      // to normalize behavior
+      cudaOptions.AddFlag("DriverApiCommandLineTemplate",
+                          "%(BaseCommandLineTemplate) [CompileOut] [FastMath] "
+                          "[Defines] \"%(FullPath)\"");
+    }
   }
+
   if (notPtx &&
       cmSystemTools::VersionCompareGreaterEq(
         "8.0", this->GlobalGenerator->GetPlatformToolsetCudaString())) {
diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
index f1667af..e97274d 100644
--- a/Tests/CudaOnly/ExportPTX/CMakeLists.txt
+++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
@@ -11,6 +11,7 @@ list(SUBLIST CMAKE_CUDA_ARCHITECTURES 0 1 CMAKE_CUDA_ARCHITECTURES)
 string(APPEND CMAKE_CUDA_ARCHITECTURES "-virtual")
 
 add_library(CudaPTX OBJECT kernelA.cu kernelB.cu)
+target_compile_definitions(CudaPTX PRIVATE "CUDA_PTX_COMPILATION")
 set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON)
 
 #Test ObjectFiles with file(GENERATE)
diff --git a/Tests/CudaOnly/ExportPTX/kernelA.cu b/Tests/CudaOnly/ExportPTX/kernelA.cu
index fbe0d26..8967298 100644
--- a/Tests/CudaOnly/ExportPTX/kernelA.cu
+++ b/Tests/CudaOnly/ExportPTX/kernelA.cu
@@ -1,4 +1,8 @@
 
+#ifndef CUDA_PTX_COMPILATION
+#  error "CUDA_PTX_COMPILATION define not provided"
+#endif
+
 __global__ void kernelA(float* r, float* x, float* y, float* z, int size)
 {
   for (int i = threadIdx.x; i < size; i += blockDim.x) {
diff --git a/Tests/CudaOnly/ExportPTX/kernelB.cu b/Tests/CudaOnly/ExportPTX/kernelB.cu
index 11872e4..be4613a 100644
--- a/Tests/CudaOnly/ExportPTX/kernelB.cu
+++ b/Tests/CudaOnly/ExportPTX/kernelB.cu
@@ -1,4 +1,7 @@
 
+#ifndef CUDA_PTX_COMPILATION
+#  error "CUDA_PTX_COMPILATION define not provided"
+#endif
 
 __global__ void kernelB(float* r, float* x, float* y, float* z, int size)
 {