summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Source/cmVisualStudio10TargetGenerator.cxx17
-rw-r--r--Tests/CudaOnly/ExportPTX/CMakeLists.txt1
-rw-r--r--Tests/CudaOnly/ExportPTX/kernelA.cu4
-rw-r--r--Tests/CudaOnly/ExportPTX/kernelB.cu3
4 files changed, 25 insertions, 0 deletions
diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx
index 5bec7d3..4d0773b 100644
--- a/Source/cmVisualStudio10TargetGenerator.cxx
+++ b/Source/cmVisualStudio10TargetGenerator.cxx
@@ -3245,6 +3245,8 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
this->LocalGenerator, Options::CudaCompiler, gg->GetCudaFlagTable());
Options& cudaOptions = *pOptions;
+ auto cudaVersion = this->GlobalGenerator->GetPlatformToolsetCudaString();
+
// Get compile flags for CUDA in this directory.
std::string flags;
this->LocalGenerator->AddLanguageFlags(flags, this->GeneratorTarget, "CUDA",
@@ -3280,7 +3282,22 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions(
// to not have the source file extension at all
cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx");
notPtx = false;
+
+ if (cmSystemTools::VersionCompare(cmSystemTools::OP_GREATER_EQUAL,
+ cudaVersion, "9.0") &&
+ cmSystemTools::VersionCompare(cmSystemTools::OP_LESS, cudaVersion,
+ "11.5")) {
+ // The DriverApi flag before 11.5 ( verified back to 9.0 ) which controls
+ // PTX compilation doesn't propagate user defines causing
+ // target_compile_definitions to behave differently for VS +
+ // PTX compared to other generators so we patch the rules
+ // to normalize behavior
+ cudaOptions.AddFlag("DriverApiCommandLineTemplate",
+ "%(BaseCommandLineTemplate) [CompileOut] [FastMath] "
+ "[Defines] \"%(FullPath)\"");
+ }
}
+
if (notPtx &&
cmSystemTools::VersionCompareGreaterEq(
"8.0", this->GlobalGenerator->GetPlatformToolsetCudaString())) {
diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
index f1667af..e97274d 100644
--- a/Tests/CudaOnly/ExportPTX/CMakeLists.txt
+++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
@@ -11,6 +11,7 @@ list(SUBLIST CMAKE_CUDA_ARCHITECTURES 0 1 CMAKE_CUDA_ARCHITECTURES)
string(APPEND CMAKE_CUDA_ARCHITECTURES "-virtual")
add_library(CudaPTX OBJECT kernelA.cu kernelB.cu)
+target_compile_definitions(CudaPTX PRIVATE "CUDA_PTX_COMPILATION")
set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON)
#Test ObjectFiles with file(GENERATE)
diff --git a/Tests/CudaOnly/ExportPTX/kernelA.cu b/Tests/CudaOnly/ExportPTX/kernelA.cu
index fbe0d26..8967298 100644
--- a/Tests/CudaOnly/ExportPTX/kernelA.cu
+++ b/Tests/CudaOnly/ExportPTX/kernelA.cu
@@ -1,4 +1,8 @@
+#ifndef CUDA_PTX_COMPILATION
+# error "CUDA_PTX_COMPILATION define not provided"
+#endif
+
__global__ void kernelA(float* r, float* x, float* y, float* z, int size)
{
for (int i = threadIdx.x; i < size; i += blockDim.x) {
diff --git a/Tests/CudaOnly/ExportPTX/kernelB.cu b/Tests/CudaOnly/ExportPTX/kernelB.cu
index 11872e4..be4613a 100644
--- a/Tests/CudaOnly/ExportPTX/kernelB.cu
+++ b/Tests/CudaOnly/ExportPTX/kernelB.cu
@@ -1,4 +1,7 @@
+#ifndef CUDA_PTX_COMPILATION
+# error "CUDA_PTX_COMPILATION define not provided"
+#endif
__global__ void kernelB(float* r, float* x, float* y, float* z, int size)
{