From 23691d789e198c228e5c2bb7016b3b1194fd0635 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 23 Mar 2017 09:31:45 -0400 Subject: CUDA: Allow sources to be compiled to .ptx files When the target property `CUDA_PTX_COMPILATION` is enabled CUDA OBJECT libraries will generate ptx files instead of object files. --- Help/manual/cmake-properties.7.rst | 1 + Help/prop_tgt/CUDA_PTX_COMPILATION.rst | 12 ++++ Help/release/dev/enable_ptx_compilation.rst | 6 ++ Source/cmGeneratorTarget.cxx | 12 ++++ Source/cmGeneratorTarget.h | 1 + Source/cmLocalGenerator.cxx | 10 +++- Source/cmLocalGenerator.h | 3 +- Source/cmLocalNinjaGenerator.cxx | 7 ++- Source/cmLocalUnixMakefileGenerator3.cxx | 7 ++- Source/cmLocalVisualStudioGenerator.cxx | 18 +++++- Source/cmMakefileTargetGenerator.cxx | 3 + Source/cmNinjaTargetGenerator.cxx | 3 + Source/cmTarget.cxx | 8 +++ Source/cmVisualStudio10TargetGenerator.cxx | 6 ++ Tests/CudaOnly/CMakeLists.txt | 1 + Tests/CudaOnly/ExportPTX/CMakeLists.txt | 82 ++++++++++++++++++++++++++++ Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake | 19 +++++++ Tests/CudaOnly/ExportPTX/kernelA.cu | 7 +++ Tests/CudaOnly/ExportPTX/kernelB.cu | 8 +++ Tests/CudaOnly/ExportPTX/main.cu | 28 ++++++++++ 20 files changed, 231 insertions(+), 11 deletions(-) create mode 100644 Help/prop_tgt/CUDA_PTX_COMPILATION.rst create mode 100644 Help/release/dev/enable_ptx_compilation.rst create mode 100644 Tests/CudaOnly/ExportPTX/CMakeLists.txt create mode 100644 Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake create mode 100644 Tests/CudaOnly/ExportPTX/kernelA.cu create mode 100644 Tests/CudaOnly/ExportPTX/kernelB.cu create mode 100644 Tests/CudaOnly/ExportPTX/main.cu diff --git a/Help/manual/cmake-properties.7.rst b/Help/manual/cmake-properties.7.rst index 31b2389..344bc09 100644 --- a/Help/manual/cmake-properties.7.rst +++ b/Help/manual/cmake-properties.7.rst @@ -152,6 +152,7 @@ Properties on Targets /prop_tgt/CONFIG_OUTPUT_NAME /prop_tgt/CONFIG_POSTFIX /prop_tgt/CROSSCOMPILING_EMULATOR + /prop_tgt/CUDA_PTX_COMPILATION /prop_tgt/CUDA_SEPARABLE_COMPILATION /prop_tgt/CUDA_EXTENSIONS /prop_tgt/CUDA_STANDARD diff --git a/Help/prop_tgt/CUDA_PTX_COMPILATION.rst b/Help/prop_tgt/CUDA_PTX_COMPILATION.rst new file mode 100644 index 0000000..0ee372b --- /dev/null +++ b/Help/prop_tgt/CUDA_PTX_COMPILATION.rst @@ -0,0 +1,12 @@ +CUDA_PTX_COMPILATION +-------------------- + +Compile CUDA sources to ``.ptx`` files instead of ``.obj`` files +within :ref:`Object Libraries`. + +For example: + +.. code-block:: cmake + + add_library(myptx OBJECT a.cu b.cu) + set_property(TARGET myptx PROPERTY CUDA_PTX_COMPILATION ON) diff --git a/Help/release/dev/enable_ptx_compilation.rst b/Help/release/dev/enable_ptx_compilation.rst new file mode 100644 index 0000000..965247f --- /dev/null +++ b/Help/release/dev/enable_ptx_compilation.rst @@ -0,0 +1,6 @@ +enable_ptx_compilation +---------------------- + +* The :prop_tgt:`CUDA_PTX_COMPILATION` target property was added to + :ref:`Object Libraries` to support compiling to ``.ptx`` files + instead of host object files. diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx index 2799505..5161f09 100644 --- a/Source/cmGeneratorTarget.cxx +++ b/Source/cmGeneratorTarget.cxx @@ -528,6 +528,18 @@ const std::string& cmGeneratorTarget::GetObjectName(cmSourceFile const* file) return this->Objects[file]; } +const char* cmGeneratorTarget::GetCustomObjectExtension() const +{ + static std::string extension; + const bool has_ptx_extension = + this->GetPropertyAsBool("CUDA_PTX_COMPILATION"); + if (has_ptx_extension) { + extension = ".ptx"; + return extension.c_str(); + } + return CM_NULLPTR; +} + void cmGeneratorTarget::AddExplicitObjectName(cmSourceFile const* sf) { this->ExplicitObjectName.insert(sf); diff --git a/Source/cmGeneratorTarget.h b/Source/cmGeneratorTarget.h index 5b903de..fd3f479 100644 --- a/Source/cmGeneratorTarget.h +++ b/Source/cmGeneratorTarget.h @@ -124,6 +124,7 @@ public: void GetObjectSources(std::vector&, const std::string& config) const; const std::string& GetObjectName(cmSourceFile const* file); + const char* GetCustomObjectExtension() const; bool HasExplicitObjectName(cmSourceFile const* file) const; void AddExplicitObjectName(cmSourceFile const* sf); diff --git a/Source/cmLocalGenerator.cxx b/Source/cmLocalGenerator.cxx index 93db192..424ab6f 100644 --- a/Source/cmLocalGenerator.cxx +++ b/Source/cmLocalGenerator.cxx @@ -2162,7 +2162,7 @@ bool cmLocalGenerator::IsNMake() const std::string cmLocalGenerator::GetObjectFileNameWithoutTarget( const cmSourceFile& source, std::string const& dir_max, - bool* hasSourceExtension) + bool* hasSourceExtension, char const* customOutputExtension) { // Construct the object file name using the full path to the source // file which is its only unique identification. @@ -2223,7 +2223,7 @@ std::string cmLocalGenerator::GetObjectFileNameWithoutTarget( } // Remove the source extension if it is to be replaced. - if (replaceExt) { + if (replaceExt || customOutputExtension) { keptSourceExtension = false; std::string::size_type dot_pos = objectName.rfind('.'); if (dot_pos != std::string::npos) { @@ -2232,7 +2232,11 @@ std::string cmLocalGenerator::GetObjectFileNameWithoutTarget( } // Store the new extension. - objectName += this->GlobalGenerator->GetLanguageOutputExtension(source); + if (customOutputExtension) { + objectName += customOutputExtension; + } else { + objectName += this->GlobalGenerator->GetLanguageOutputExtension(source); + } } if (hasSourceExtension) { *hasSourceExtension = keptSourceExtension; diff --git a/Source/cmLocalGenerator.h b/Source/cmLocalGenerator.h index 3047257..1459a05 100644 --- a/Source/cmLocalGenerator.h +++ b/Source/cmLocalGenerator.h @@ -273,7 +273,8 @@ public: // Compute object file names. std::string GetObjectFileNameWithoutTarget( const cmSourceFile& source, std::string const& dir_max, - bool* hasSourceExtension = CM_NULLPTR); + bool* hasSourceExtension = CM_NULLPTR, + char const* customOutputExtension = CM_NULLPTR); /** Fill out the static linker flags for the given target. */ void GetStaticLibraryFlags(std::string& flags, std::string const& config, diff --git a/Source/cmLocalNinjaGenerator.cxx b/Source/cmLocalNinjaGenerator.cxx index c2d9d57..00f5e4b 100644 --- a/Source/cmLocalNinjaGenerator.cxx +++ b/Source/cmLocalNinjaGenerator.cxx @@ -249,12 +249,15 @@ void cmLocalNinjaGenerator::ComputeObjectFilenames( std::map& mapping, cmGeneratorTarget const* gt) { + // Determine if these object files should use a custom extension + char const* custom_ext = gt->GetCustomObjectExtension(); for (std::map::iterator si = mapping.begin(); si != mapping.end(); ++si) { cmSourceFile const* sf = si->first; - si->second = - this->GetObjectFileNameWithoutTarget(*sf, gt->ObjectDirectory); + bool keptSourceExtension; + si->second = this->GetObjectFileNameWithoutTarget( + *sf, gt->ObjectDirectory, &keptSourceExtension, custom_ext); } } diff --git a/Source/cmLocalUnixMakefileGenerator3.cxx b/Source/cmLocalUnixMakefileGenerator3.cxx index 3313127..5f52786 100644 --- a/Source/cmLocalUnixMakefileGenerator3.cxx +++ b/Source/cmLocalUnixMakefileGenerator3.cxx @@ -159,12 +159,15 @@ void cmLocalUnixMakefileGenerator3::ComputeObjectFilenames( std::map& mapping, cmGeneratorTarget const* gt) { + // Determine if these object files should use a custom extension + char const* custom_ext = gt->GetCustomObjectExtension(); for (std::map::iterator si = mapping.begin(); si != mapping.end(); ++si) { cmSourceFile const* sf = si->first; - si->second = - this->GetObjectFileNameWithoutTarget(*sf, gt->ObjectDirectory); + bool keptSourceExtension; + si->second = this->GetObjectFileNameWithoutTarget( + *sf, gt->ObjectDirectory, &keptSourceExtension, custom_ext); } } diff --git a/Source/cmLocalVisualStudioGenerator.cxx b/Source/cmLocalVisualStudioGenerator.cxx index e20fe50..60235d6 100644 --- a/Source/cmLocalVisualStudioGenerator.cxx +++ b/Source/cmLocalVisualStudioGenerator.cxx @@ -32,6 +32,7 @@ void cmLocalVisualStudioGenerator::ComputeObjectFilenames( std::map& mapping, cmGeneratorTarget const* gt) { + char const* custom_ext = gt->GetCustomObjectExtension(); std::string dir_max = this->ComputeLongestObjectDirectory(gt); // Count the number of object files with each name. Note that @@ -44,7 +45,12 @@ void cmLocalVisualStudioGenerator::ComputeObjectFilenames( cmSourceFile const* sf = si->first; std::string objectNameLower = cmSystemTools::LowerCase( cmSystemTools::GetFilenameWithoutLastExtension(sf->GetFullPath())); - objectNameLower += this->GlobalGenerator->GetLanguageOutputExtension(*sf); + if (custom_ext) { + objectNameLower += custom_ext; + } else { + objectNameLower += + this->GlobalGenerator->GetLanguageOutputExtension(*sf); + } counts[objectNameLower] += 1; } @@ -57,10 +63,16 @@ void cmLocalVisualStudioGenerator::ComputeObjectFilenames( cmSourceFile const* sf = si->first; std::string objectName = cmSystemTools::GetFilenameWithoutLastExtension(sf->GetFullPath()); - objectName += this->GlobalGenerator->GetLanguageOutputExtension(*sf); + if (custom_ext) { + objectName += custom_ext; + } else { + objectName += this->GlobalGenerator->GetLanguageOutputExtension(*sf); + } if (counts[cmSystemTools::LowerCase(objectName)] > 1) { const_cast(gt)->AddExplicitObjectName(sf); - objectName = this->GetObjectFileNameWithoutTarget(*sf, dir_max); + bool keptSourceExtension; + objectName = this->GetObjectFileNameWithoutTarget( + *sf, dir_max, &keptSourceExtension, custom_ext); } si->second = objectName; } diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx index ed38024..92796ba 100644 --- a/Source/cmMakefileTargetGenerator.cxx +++ b/Source/cmMakefileTargetGenerator.cxx @@ -593,6 +593,9 @@ void cmMakefileTargetGenerator::WriteObjectBuildFile( if (this->GeneratorTarget->GetPropertyAsBool( "CUDA_SEPARABLE_COMPILATION")) { cmdVar = std::string("CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION"); + } else if (this->GeneratorTarget->GetPropertyAsBool( + "CUDA_PTX_COMPILATION")) { + cmdVar = std::string("CMAKE_CUDA_COMPILE_PTX_COMPILATION"); } else { cmdVar = std::string("CMAKE_CUDA_COMPILE_WHOLE_COMPILATION"); } diff --git a/Source/cmNinjaTargetGenerator.cxx b/Source/cmNinjaTargetGenerator.cxx index b008158..7c417a4 100644 --- a/Source/cmNinjaTargetGenerator.cxx +++ b/Source/cmNinjaTargetGenerator.cxx @@ -589,6 +589,9 @@ void cmNinjaTargetGenerator::WriteCompileRule(const std::string& lang) if (this->GeneratorTarget->GetPropertyAsBool( "CUDA_SEPARABLE_COMPILATION")) { cmdVar = std::string("CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION"); + } else if (this->GeneratorTarget->GetPropertyAsBool( + "CUDA_PTX_COMPILATION")) { + cmdVar = std::string("CMAKE_CUDA_COMPILE_PTX_COMPILATION"); } else { cmdVar = std::string("CMAKE_CUDA_COMPILE_WHOLE_COMPILATION"); } diff --git a/Source/cmTarget.cxx b/Source/cmTarget.cxx index 0b1bb06..a1e5a47 100644 --- a/Source/cmTarget.cxx +++ b/Source/cmTarget.cxx @@ -940,6 +940,14 @@ void cmTarget::SetProperty(const std::string& prop, const char* value) } else if (cmHasLiteralPrefix(prop, "IMPORTED_LIBNAME") && !this->CheckImportedLibName(prop, value ? value : "")) { /* error was reported by check method */ + } else if (prop == "CUDA_PTX_COMPILATION" && + this->GetType() != cmStateEnums::OBJECT_LIBRARY) { + std::ostringstream e; + e << "CUDA_PTX_COMPILATION property can only be applied to OBJECT " + "targets (\"" + << this->Name << "\")\n"; + this->Makefile->IssueMessage(cmake::FATAL_ERROR, e.str()); + return; } else { this->Properties.SetProperty(prop, value); } diff --git a/Source/cmVisualStudio10TargetGenerator.cxx b/Source/cmVisualStudio10TargetGenerator.cxx index f0f04a8..0aadcbf 100644 --- a/Source/cmVisualStudio10TargetGenerator.cxx +++ b/Source/cmVisualStudio10TargetGenerator.cxx @@ -2467,6 +2467,12 @@ bool cmVisualStudio10TargetGenerator::ComputeCudaOptions( if (this->GeneratorTarget->GetPropertyAsBool("CUDA_SEPARABLE_COMPILATION")) { cudaOptions.AddFlag("GenerateRelocatableDeviceCode", "true"); + } else if (this->GeneratorTarget->GetPropertyAsBool( + "CUDA_PTX_COMPILATION")) { + cudaOptions.AddFlag("NvccCompilation", "ptx"); + // We drop the %(Extension) component as CMake expects all PTX files + // to not have the source file extension at all + cudaOptions.AddFlag("CompileOut", "$(IntDir)%(Filename).ptx"); } // Convert the host compiler options to the toolset's abstractions diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt index 85a2051..a3bd707 100644 --- a/Tests/CudaOnly/CMakeLists.txt +++ b/Tests/CudaOnly/CMakeLists.txt @@ -1,4 +1,5 @@ ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard) +ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX) ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation) ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs) diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt new file mode 100644 index 0000000..10249c6 --- /dev/null +++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt @@ -0,0 +1,82 @@ +cmake_minimum_required(VERSION 3.8) +project (CudaOnlyExportPTX CUDA) + +#Goal for this example: +# How to generate PTX files instead of OBJECT files +# How to reference PTX files for custom commands +# How to install PTX files + +add_library(CudaPTX OBJECT kernelA.cu kernelB.cu) +set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON) + +#Test ObjectFiles with file(GENERATE) +file(GENERATE + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/gen_$/>path_to_objs.h + CONTENT [[ + +#include +#include + +#ifndef path_to_objs +#define path_to_objs + +static std::string ptx_paths = "$"; + +#endif + +]] +) +#We are going to need a wrapper around bin2c for multiple reasons +# 1. bin2c only converts a single file at a time +# 2. bin2c has only standard out support, so we have to manually +# redirect to a cmake buffer +# 3. We want to pack everything into a single output file, so we +# need to also pass the --name option +set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h) + +get_filename_component(cuda_compiler_bin "${CMAKE_CUDA_COMPILER}" DIRECTORY) +find_program(bin_to_c + NAMES bin2c + PATHS ${cuda_compiler_bin} + ) +if(NOT bin_to_c) + message(FATAL_ERROR + "bin2c not found:\n" + " CMAKE_CUDA_COMPILER='${CMAKE_CUDA_COMPILER}'\n" + " cuda_compiler_bin='${cuda_compiler_bin}'\n" + ) +endif() + +add_custom_command( + OUTPUT "${output_file}" + COMMAND ${CMAKE_COMMAND} + "-DBIN_TO_C_COMMAND=${bin_to_c}" + "-DOBJECTS=$" + "-DOUTPUT=${output_file}" + -P ${CMAKE_CURRENT_SOURCE_DIR}/bin2c_wrapper.cmake + VERBATIM + DEPENDS $ + COMMENT "Converting Object files to a C header" + ) + +add_executable(CudaOnlyExportPTX main.cu ${output_file}) +add_dependencies(CudaOnlyExportPTX CudaPTX) +target_include_directories(CudaOnlyExportPTX PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} ) +target_compile_definitions(CudaOnlyExportPTX PRIVATE + "CONFIG_TYPE=gen_$>") + +if(APPLE) + # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that + # the static cuda runtime can find it at runtime. + target_link_libraries(CudaOnlyExportPTX PRIVATE -Wl,-rpath,/usr/local/cuda/lib) +endif() + +#Verify that we can install object targets properly +install(TARGETS CudaPTX CudaOnlyExportPTX + EXPORT cudaPTX + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + OBJECTS DESTINATION objs + ) +install(EXPORT cudaPTX DESTINATION lib/cudaPTX) diff --git a/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake b/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake new file mode 100644 index 0000000..0baf934 --- /dev/null +++ b/Tests/CudaOnly/ExportPTX/bin2c_wrapper.cmake @@ -0,0 +1,19 @@ + +set(file_contents) +foreach(obj ${OBJECTS}) + get_filename_component(obj_ext ${obj} EXT) + get_filename_component(obj_name ${obj} NAME_WE) + get_filename_component(obj_dir ${obj} DIRECTORY) + + if(obj_ext MATCHES ".ptx") + set(args --name ${obj_name} ${obj}) + execute_process(COMMAND "${BIN_TO_C_COMMAND}" ${args} + WORKING_DIRECTORY ${obj_dir} + RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE error_var + ) + set(file_contents "${file_contents} \n${output}") + endif() +endforeach() +file(WRITE "${OUTPUT}" "${file_contents}") diff --git a/Tests/CudaOnly/ExportPTX/kernelA.cu b/Tests/CudaOnly/ExportPTX/kernelA.cu new file mode 100644 index 0000000..fbe0d26 --- /dev/null +++ b/Tests/CudaOnly/ExportPTX/kernelA.cu @@ -0,0 +1,7 @@ + +__global__ void kernelA(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/ExportPTX/kernelB.cu b/Tests/CudaOnly/ExportPTX/kernelB.cu new file mode 100644 index 0000000..11872e4 --- /dev/null +++ b/Tests/CudaOnly/ExportPTX/kernelB.cu @@ -0,0 +1,8 @@ + + +__global__ void kernelB(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/Tests/CudaOnly/ExportPTX/main.cu b/Tests/CudaOnly/ExportPTX/main.cu new file mode 100644 index 0000000..132377c --- /dev/null +++ b/Tests/CudaOnly/ExportPTX/main.cu @@ -0,0 +1,28 @@ + +#include + +/* + Define GENERATED_HEADER macro to allow c++ files to include headers + generated based on different configuration types. +*/ + +/* clang-format off */ +#define GENERATED_HEADER(x) GENERATED_HEADER0(CONFIG_TYPE/x) +/* clang-format on */ +#define GENERATED_HEADER0(x) GENERATED_HEADER1(x) +#define GENERATED_HEADER1(x) + +#include GENERATED_HEADER(path_to_objs.h) + +#include "embedded_objs.h" + +int main(int argc, char** argv) +{ + (void)argc; + (void)argv; + + unsigned char* ka = kernelA; + unsigned char* kb = kernelB; + + return (ka != NULL && kb != NULL) ? 0 : 1; +} -- cgit v0.12