From ed19e8136d59d4ae7fbee20d9a549f4c06237e59 Mon Sep 17 00:00:00 2001 From: Matthias Maennich Date: Thu, 31 Aug 2017 23:48:02 +0200 Subject: Ninja: Improve performance with deeply-dependent custom targets The commit v3.7.0-rc1~339^2 (Ninja: Fix inter-target order-only dependencies of custom command, 2016-07-20) might cause performance degradations for larger projects. Especially when using custom commands as an input for each compilation rule (e.g. generated headers). For reference in the following I am referring to Source/cmGlobalNinjaGenerator.cxx: -> cmGlobalNinjaGenerator::AppendTargetDependsClosure -> cmGlobalNinjaGenerator::ComputeTargetDependsClosure It turned out that the mentioned commit is doing (indirectly) some redundant work that might impact performance when generating large projects. Imagine the dependency tree of custom targets: A \ C - D - E / B For each target the transitive closure is calculated recursively, but as the TargetDependsClosures are only cached on the top most level, everything downstream has to be recalculated. I.e. A->C->D->E B->C->D->E This ultimately leads to a lot of redundant calls to AppendTargetOutputs. The recursive nature of the algorithm itself is not significant to the problem, but reducing the work to actually to be done work, eliminates the performance problem. This patch changes the way, intermediate results are cached. Rather than caching the closure of targets, we cache the closure of outputs. Such that in the example above at B->C the cache already would kick in. Caching the outputs has one disadvantage that the patch takes care of. In case of such a structure A E \ / \ C - D G / \ / B F the calling order for A would be A->C->D->E->G (at which time G is seen to the recursion) then the recursion returns until it reaches A->C->D->F (at which the seen G would prevent to recurse down to G) But this would poison the cache for F with a wrong value (without G). Hence we use a local result set to ensure the cache is still consistently populated. For a large C++ project with around 25k targets this reduced the CMake configure / generate time from ~40s to ~29s. Signed-off-by: Matthias Maennich --- Source/cmGlobalNinjaGenerator.cxx | 62 ++++++++++++++++++++++++--------------- Source/cmGlobalNinjaGenerator.h | 7 ++--- Source/cmNinjaTypes.h | 2 ++ 3 files changed, 44 insertions(+), 27 deletions(-) diff --git a/Source/cmGlobalNinjaGenerator.cxx b/Source/cmGlobalNinjaGenerator.cxx index eee63c9..8370fe6 100644 --- a/Source/cmGlobalNinjaGenerator.cxx +++ b/Source/cmGlobalNinjaGenerator.cxx @@ -1037,35 +1037,51 @@ void cmGlobalNinjaGenerator::AppendTargetDepends( void cmGlobalNinjaGenerator::AppendTargetDependsClosure( cmGeneratorTarget const* target, cmNinjaDeps& outputs) { - TargetDependsClosureMap::iterator i = - this->TargetDependsClosures.find(target); - if (i == this->TargetDependsClosures.end()) { - TargetDependsClosureMap::value_type e( - target, std::set()); - i = this->TargetDependsClosures.insert(e).first; - this->ComputeTargetDependsClosure(target, i->second); - } - std::set const& targets = i->second; - cmNinjaDeps outs; - for (auto tgt : targets) { - this->AppendTargetOutputs(tgt, outs); - } - std::sort(outs.begin(), outs.end()); + cmNinjaOuts outs; + this->AppendTargetDependsClosure(target, outs, true); + outputs.insert(outputs.end(), outs.begin(), outs.end()); } -void cmGlobalNinjaGenerator::ComputeTargetDependsClosure( - cmGeneratorTarget const* target, std::set& depends) +void cmGlobalNinjaGenerator::AppendTargetDependsClosure( + cmGeneratorTarget const* target, cmNinjaOuts& outputs, bool omit_self) { - cmTargetDependSet const& targetDeps = this->GetTargetDirectDepends(target); - for (auto targetDep : targetDeps) { - if (targetDep->GetType() == cmStateEnums::INTERFACE_LIBRARY) { - continue; - } - if (depends.insert(targetDep).second) { - this->ComputeTargetDependsClosure(targetDep, depends); + + // try to locate the target in the cache + auto find = this->TargetDependsClosures.lower_bound(target); + + if (find == this->TargetDependsClosures.end() || find->first != target) { + // We now calculate the closure outputs by inspecting the dependent + // targets recursively. + // For that we have to distinguish between a local result set that is only + // relevant for filling the cache entries properly isolated and a global + // result set that is relevant for the result of the top level call to + // AppendTargetDependsClosure. + auto const& targetDeps = this->GetTargetDirectDepends(target); + cmNinjaOuts this_outs; // this will be the new cache entry + + for (auto const& dep_target : targetDeps) { + if (dep_target->GetType() == cmStateEnums::INTERFACE_LIBRARY) { + continue; + } + + // Collect the dependent targets for _this_ target + this->AppendTargetDependsClosure(dep_target, this_outs, false); } + find = this->TargetDependsClosures.emplace_hint(find, target, + std::move(this_outs)); + } + + // now fill the outputs of the final result from the newly generated cache + // entry + outputs.insert(find->second.begin(), find->second.end()); + + // finally generate the outputs of the target itself, if applicable + cmNinjaDeps outs; + if (!omit_self) { + this->AppendTargetOutputs(target, outs); } + outputs.insert(outs.begin(), outs.end()); } void cmGlobalNinjaGenerator::AddTargetAlias(const std::string& alias, diff --git a/Source/cmGlobalNinjaGenerator.h b/Source/cmGlobalNinjaGenerator.h index f556ce1..00bf91c 100644 --- a/Source/cmGlobalNinjaGenerator.h +++ b/Source/cmGlobalNinjaGenerator.h @@ -320,6 +320,8 @@ public: cmNinjaTargetDepends depends = DependOnTargetArtifact); void AppendTargetDependsClosure(cmGeneratorTarget const* target, cmNinjaDeps& outputs); + void AppendTargetDependsClosure(cmGeneratorTarget const* target, + cmNinjaOuts& outputs, bool omit_self); void AddDependencyToAll(cmGeneratorTarget* target); void AddDependencyToAll(const std::string& input); @@ -448,10 +450,7 @@ private: typedef std::map TargetAliasMap; TargetAliasMap TargetAliases; - typedef std::map> - TargetDependsClosureMap; - TargetDependsClosureMap TargetDependsClosures; + std::map TargetDependsClosures; std::string NinjaCommand; std::string NinjaVersion; diff --git a/Source/cmNinjaTypes.h b/Source/cmNinjaTypes.h index ec435d9..9e962f1 100644 --- a/Source/cmNinjaTypes.h +++ b/Source/cmNinjaTypes.h @@ -6,6 +6,7 @@ #include "cmConfigure.h" // IWYU pragma: keep #include +#include #include #include @@ -16,6 +17,7 @@ enum cmNinjaTargetDepends }; typedef std::vector cmNinjaDeps; +typedef std::set cmNinjaOuts; typedef std::map cmNinjaVars; #endif // ! cmNinjaTypes_h -- cgit v0.12 From 7374cb857cc509daa7bf6dc44630f51e080bc054 Mon Sep 17 00:00:00 2001 From: Matthias Maennich Date: Thu, 31 Aug 2017 09:10:46 +0200 Subject: Ninja: Cache ConvertToNinjaPath results to avoid repeat work Calls to this method may dominate generation time in some cases. Measurements for configuring cmake itself show a cache hit rate of ~57% (7753 total calls, 4453 cache hits). For a larger project (that also makes use of custom targets as prerequisite for all compile targets), the measured cache hit ratio is ~96% (2530827 total calls, 2433124 cache hits). For this project the observable cmake runtime could be reduced from 40s to 30s. Signed-off-by: Matthias Maennich --- Source/cmGlobalNinjaGenerator.cxx | 14 ++++++++++---- Source/cmGlobalNinjaGenerator.h | 6 +++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Source/cmGlobalNinjaGenerator.cxx b/Source/cmGlobalNinjaGenerator.cxx index 8370fe6..3e9e995 100644 --- a/Source/cmGlobalNinjaGenerator.cxx +++ b/Source/cmGlobalNinjaGenerator.cxx @@ -861,18 +861,24 @@ static void EnsureTrailingSlash(std::string& path) #endif } -std::string cmGlobalNinjaGenerator::ConvertToNinjaPath( +std::string const& cmGlobalNinjaGenerator::ConvertToNinjaPath( const std::string& path) const { + auto const f = ConvertToNinjaPathCache.find(path); + if (f != ConvertToNinjaPathCache.end()) { + return f->second; + } + cmLocalNinjaGenerator* ng = static_cast(this->LocalGenerators[0]); - std::string convPath = ng->ConvertToRelativePath( - this->LocalGenerators[0]->GetState()->GetBinaryDirectory(), path); + const char* bin_dir = ng->GetState()->GetBinaryDirectory(); + std::string convPath = ng->ConvertToRelativePath(bin_dir, path); convPath = this->NinjaOutputPath(convPath); #ifdef _WIN32 std::replace(convPath.begin(), convPath.end(), '/', '\\'); #endif - return convPath; + return ConvertToNinjaPathCache.emplace(path, std::move(convPath)) + .first->second; } void cmGlobalNinjaGenerator::AddCXXCompileCommand( diff --git a/Source/cmGlobalNinjaGenerator.h b/Source/cmGlobalNinjaGenerator.h index 00bf91c..7f80d08 100644 --- a/Source/cmGlobalNinjaGenerator.h +++ b/Source/cmGlobalNinjaGenerator.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -245,7 +246,7 @@ public: return this->RulesFileStream; } - std::string ConvertToNinjaPath(const std::string& path) const; + std::string const& ConvertToNinjaPath(const std::string& path) const; struct MapToNinjaPathImpl { @@ -452,6 +453,9 @@ private: std::map TargetDependsClosures; + /// the local cache for calls to ConvertToNinjaPath + mutable std::unordered_map ConvertToNinjaPathCache; + std::string NinjaCommand; std::string NinjaVersion; bool NinjaSupportsConsolePool; -- cgit v0.12