From c83167fb6e5d862fe6389e3996f2a293e2e6554c Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Fri, 27 Aug 2021 17:15:27 +0100 Subject: Improve heuristic for unknown cost edges --- src/build.cc | 92 ++++++++++++++++++++++++++++++++++++++++++------------------ src/build.h | 23 +++++++-------- 2 files changed, 77 insertions(+), 38 deletions(-) diff --git a/src/build.cc b/src/build.cc index 99e6efa..6f9cf27 100644 --- a/src/build.cc +++ b/src/build.cc @@ -450,6 +450,67 @@ struct SeenBefore { } }; +// Assign run_time_ms_ for all wanted edges, and returns total time for all edges +// For phony edges, 0 cost. +// For edges with a build history, use the last build time. +// For edges without history, use the 75th percentile time for edges with history. +// Or, if there is no history at all just use 1 +int64_t AssignEdgeRuntime(BuildLog* build_log, + const std::map& want) { + bool missing_durations = false; + std::vector durations; + int64_t total_time = 0; + + for (std::map::const_iterator it = want.begin(), + end = want.end(); + it != end; ++it) { + Edge* edge = it->first; + if (edge->is_phony()) { + continue; + } + BuildLog::LogEntry* entry = + build_log->LookupByOutput(edge->outputs_[0]->path()); + if (!entry) { + missing_durations = true; + edge->run_time_ms_ = -1; // -1 to mark as needing filled in + continue; + } + const int64_t duration = entry->end_time - entry->start_time; + edge->run_time_ms_ = duration; + total_time += duration; + durations.push_back(duration); + } + + if (!missing_durations) { + return total_time; + } + + // Heuristic: for unknown edges, take the 75th percentile time. + // This allows the known-slowest jobs to run first, but isn't so + // small that it is always the lowest priority. Which for slow jobs, + // might bottleneck the build. + int64_t p75_time = 1; + int64_t num_durations = static_cast(durations.size()); + if (num_durations > 0) { + size_t p75_idx = (num_durations - 1) - num_durations / 4; + std::vector::iterator p75_it = durations.begin() + p75_idx; + std::nth_element(durations.begin(), p75_it, durations.end()); + p75_time = *p75_it; + } + + for (std::map::const_iterator it = want.begin(), + end = want.end(); + it != end; ++it) { + Edge* edge = it->first; + if (edge->run_time_ms_ >= 0) { + continue; + } + edge->run_time_ms_ = p75_time; + total_time += p75_time; + } + return total_time; +} + } // namespace void Plan::ComputeCriticalTime(BuildLog* build_log) { @@ -466,33 +527,10 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { targets_.end()); } - // this is total time if building all edges in serial, so this value is big - // enough to ensure higher priority target initial critical time always bigger - // than lower one - int64_t total_time = 0; - // Critical path scheduling. - // 0. Assign costs to all edges, using: - // a) The time the edge needed last time, if available. - // b) A fixed cost if this type of edge hasn't run before (0 for - // phony target, 1 for others) - // TODO: Find a better heuristic for edges without log entries - for (std::map::iterator it = want_.begin(), end = want_.end(); - it != end; ++it) { - Edge* edge = it->first; - if (edge->is_phony()) { - continue; - } - BuildLog::LogEntry* entry = - build_log->LookupByOutput(edge->outputs_[0]->path()); - if (!entry) { - edge->run_time_ms_ = 1; - total_time += 1; - continue; - } - const int64_t duration = entry->end_time - entry->start_time; - edge->run_time_ms_ = duration; - total_time += duration; - } + // total time if building all edges in serial. This value is big + // enough to ensure higher priority target's initial critical time + // is always bigger than lower ones + int64_t total_time = AssignEdgeRuntime(build_log, want_); // Use backflow algorithm to compute critical times for all nodes, starting // from the destination nodes. diff --git a/src/build.h b/src/build.h index 4e36e16..084607b 100644 --- a/src/build.h +++ b/src/build.h @@ -118,17 +118,6 @@ struct Plan { /// by information loaded from a dyndep file. bool DyndepsLoaded(DependencyScan* scan, const Node* node, const DyndepFile& ddf, std::string* err); -private: - bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err); - void UnmarkDependents(const Node* node, std::set* dependents); - bool AddSubTarget(const Node* node, const Node* dependent, std::string* err, - std::set* dyndep_walk); - - /// Update plan with knowledge that the given node is up to date. - /// If the node is a dyndep binding on any of its dependents, this - /// loads dynamic dependencies from the node's path. - /// Returns 'false' if loading dyndep info fails and 'true' otherwise. - bool NodeFinished(Node* node, std::string* err); /// Enumerate possible steps we want for an edge. enum Want @@ -143,6 +132,18 @@ private: kWantToFinish }; +private: + bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err); + void UnmarkDependents(const Node* node, std::set* dependents); + bool AddSubTarget(const Node* node, const Node* dependent, std::string* err, + std::set* dyndep_walk); + + /// Update plan with knowledge that the given node is up to date. + /// If the node is a dyndep binding on any of its dependents, this + /// loads dynamic dependencies from the node's path. + /// Returns 'false' if loading dyndep info fails and 'true' otherwise. + bool NodeFinished(Node* node, std::string* err); + void EdgeWanted(const Edge* edge); bool EdgeMaybeReady(std::map::iterator want_e, std::string* err); -- cgit v0.12