From 3975678fcc3928f2a7dcd79fe9b9e9ebf3abe2b2 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 27 Jul 2021 23:38:36 +0300 Subject: CUDA/Clang: Simplify --register-link-binaries logic Move the logic for appending cubin afterwards, so the check can simply be empty(). With the Makefile generator the option is now at the front instead of being intermixed with the actual bins. --- Source/cmMakefileTargetGenerator.cxx | 20 ++++++++++---------- Source/cmNinjaNormalTargetGenerator.cxx | 10 +++++----- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Source/cmMakefileTargetGenerator.cxx b/Source/cmMakefileTargetGenerator.cxx index 6324b2e..98c61fe 100644 --- a/Source/cmMakefileTargetGenerator.cxx +++ b/Source/cmMakefileTargetGenerator.cxx @@ -1519,22 +1519,13 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( // Link device code for each architecture. for (const std::string& architectureKind : architectures) { - // Clang always generates real code, so strip the specifier. - const std::string architecture = - architectureKind.substr(0, architectureKind.find('-')); - const std::string cubin = - cmStrCat(objectDir, "sm_", architecture, ".cubin"); - - profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); - fatbinaryDepends.emplace_back(cubin); - std::string registerFileCmd; // The generated register file contains macros that when expanded // register the device routines. Because the routines are the same for // all architectures the register file will be the same too. Thus // generate it only on the first invocation to reduce overhead. - if (fatbinaryDepends.size() == 1) { + if (fatbinaryDepends.empty()) { std::string const registerFileRel = cmStrCat(relPath, relObjectDir, "cmake_cuda_register.h"); registerFileCmd = @@ -1542,6 +1533,15 @@ void cmMakefileTargetGenerator::WriteDeviceLinkRule( cleanFiles.push_back(registerFileRel); } + // Clang always generates real code, so strip the specifier. + const std::string architecture = + architectureKind.substr(0, architectureKind.find('-')); + const std::string cubin = + cmStrCat(objectDir, "sm_", architecture, ".cubin"); + + profiles += cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); + fatbinaryDepends.emplace_back(cubin); + std::string command = cmStrCat( this->Makefile->GetRequiredDefinition("CMAKE_CUDA_DEVICE_LINKER"), " -arch=sm_", architecture, registerFileCmd, " -o=$@ ", diff --git a/Source/cmNinjaNormalTargetGenerator.cxx b/Source/cmNinjaNormalTargetGenerator.cxx index 5a4c652..493bd4a 100644 --- a/Source/cmNinjaNormalTargetGenerator.cxx +++ b/Source/cmNinjaNormalTargetGenerator.cxx @@ -753,10 +753,6 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( const std::string cubin = cmStrCat(ninjaOutputDir, "/sm_", architecture, ".cubin"); - fatbinary.Variables["PROFILES"] += - cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); - fatbinary.ExplicitDeps.emplace_back(cubin); - cmNinjaBuild dlink(this->LanguageLinkerCudaDeviceRule(config)); dlink.ExplicitDeps = explicitDeps; dlink.Outputs = { cubin }; @@ -766,11 +762,15 @@ void cmNinjaNormalTargetGenerator::WriteDeviceLinkStatements( // the device routines. Because the routines are the same for all // architectures the register file will be the same too. Thus generate it // only on the first invocation to reduce overhead. - if (fatbinary.ExplicitDeps.size() == 1) { + if (fatbinary.ExplicitDeps.empty()) { dlink.Variables["REGISTER"] = cmStrCat( "--register-link-binaries=", ninjaOutputDir, "/cmake_cuda_register.h"); } + fatbinary.Variables["PROFILES"] += + cmStrCat(" -im=profile=sm_", architecture, ",file=", cubin); + fatbinary.ExplicitDeps.emplace_back(cubin); + this->GetGlobalGenerator()->WriteBuild(this->GetCommonFileStream(), dlink); } -- cgit v0.12