From 4af9fc5c51040944178b112e6defda8473d40105 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Sun, 21 Sep 2014 16:10:54 -0700 Subject: support explicit build order --- src/build.cc | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/build.h | 5 ++ src/graph.h | 12 +++- 3 files changed, 200 insertions(+), 6 deletions(-) diff --git a/src/build.cc b/src/build.cc index cf07846..8f23e96 100644 --- a/src/build.cc +++ b/src/build.cc @@ -89,6 +89,7 @@ void Plan::Reset() { } bool Plan::AddTarget(const Node* target, string* err) { + targets_.push_back(target); return AddSubTarget(target, NULL, err, NULL); } @@ -151,9 +152,20 @@ void Plan::EdgeWanted(const Edge* edge) { Edge* Plan::FindWork() { if (ready_.empty()) return NULL; - EdgeSet::iterator e = ready_.begin(); - Edge* edge = *e; - ready_.erase(e); + set::iterator i = ready_.end(); + for (list::iterator it = priority_list_.begin(), + end = priority_list_.end(); + it != end; ++it) { + i = ready_.find(*it); + if (i != ready_.end()) { + priority_list_.erase(it); + break; + } + } + if (i == ready_.end()) + i = ready_.begin(); + Edge* edge = *i; + ready_.erase(i); return edge; } @@ -424,6 +436,175 @@ void Plan::UnmarkDependents(const Node* node, set* dependents) { } } +namespace { +bool EdgeCom(const Edge* lhs, const Edge* rhs) { + // Note: > to sort in decreasing order. + return lhs->critical_time() > rhs->critical_time(); +} +} // namespace + +void Plan::ComputePriorityList(BuildLog* build_log) { + + //testcase have no build_log + if (!build_log) + return; + + METRIC_RECORD("ComputePriorityList"); + set dedup; + vector deduped; + for (vector::iterator it = targets_.begin(); it != targets_.end(); + ++it) { + pair::iterator, bool> insert_result = dedup.insert(*it); + if (!insert_result.second) + continue; + deduped.push_back(*it); + } + targets_.swap(deduped); + + + vector edges; + map num_out_edges; + for (map::iterator it = want_.begin(), end = want_.end(); + it != end; ++it) { + if (it->second != kWantNothing) + continue; + Edge* e = it->first; + edges.push_back(e); + //printf("in\n"); + set ins; // protect against #308; also sanity + for (size_t nit = 0; nit < e->inputs_.size(); ++nit) { + Node* n = e->inputs_[nit]; + if (ins.count(n) == 0) { + //printf("%s %s\n", (*nit)->path().c_str(), e->rule().name().c_str()); + num_out_edges[n]++; + ins.insert(n); + } + } + //printf("\n"); + } + + if (false) { + for (map::iterator it = num_out_edges.begin(), + end = num_out_edges.end(); + it != end; ++it) { + printf("%s %d\n", it->first->path().c_str(), it->second); + } + } + + + + // this is total time if building all edges in serial, so this value is big + // enough to ensure higher priority target initial critical time always bigger + // than lower one + uint64_t total_time = 0; + // Critical path scheduling. + // 0. Assign costs to all edges, using: + // a) The time the edge needed last time, if available. + // b) The average time this edge type needed, if this edge hasn't run before. + // (not implemented .log entries is not grouped by rule type, and even + // similar rule type may not have same name , for example two compile rule + // with different compile flags) + // c) A fixed cost if this type of edge hasn't run before (0 for phony target, + // 1 for others) + // + for (vector::iterator it = edges.begin(), end = edges.end(); it != end; + total_time += (*it)->run_time_ms_, ++it) { + Edge* edge = *it; + if (edge->is_phony()) + continue; + BuildLog::LogEntry* entry = + build_log->LookupByOutput(edge->outputs_[0]->path()); + if (!entry) { + edge->run_time_ms_ = 1; + continue; + } + int duration = entry->end_time - entry->start_time; // XXX: + 1? + edge->run_time_ms_ = duration; + } + + + // Dump graph to stdout for debugging / prototyping. + if (false) { + for (vector::iterator it = edges.begin(), end = edges.end(); + it != end; ++it) { + Edge* edge = *it; + Node* input = edge->inputs_[0]; + Node* output = edge->outputs_[0]; + printf("%s %s %d\n", input->path().c_str(), output->path().c_str(), + edge->run_time_ms_); + } + } + + // 1. Use backflow algorithm to compute critical times for all nodes, starting + // from the destination nodes. use priority_weight = total_time * N as + // initial critical time to makes forward edgs of higher priority always + // get higher critical time value + // XXX: ignores pools + queue edgesQ; + + // Makes sure that each edge is added to the queue just once. This is needed + // for example if a binary is used to generate 50 source files, and all the + // source file cxx lines are added. Without this, the edge generating that + // binary would be added ot the queue 50 times. + set done; + + for (vector::reverse_iterator it = targets_.rbegin(), + end = targets_.rend(); + it != end; ++it) { + if (Edge* in = (*it)->in_edge()) { + uint64_t priority_weight = (it - targets_.rbegin()) * total_time; + in->set_critical_time( + max(max(in->run_time_ms_, priority_weight), + in->critical_time())); + if (done.count(in) == 0) { + edgesQ.push(in); + done.insert(in); + } + } + } + while (!edgesQ.empty()) { + Edge* e = edgesQ.front(); edgesQ.pop(); + bool all_nodes_ready = true; + uint64_t max_crit = 0; + for (vector::iterator it = e->outputs_.begin(), + end = e->outputs_.end(); + it != end; ++it) { + if (num_out_edges[*it] > 0) { + all_nodes_ready = false; + continue; + } + for (vector::const_iterator eit = (*it)->out_edges().begin(), + eend = (*it)->out_edges().end(); + eit != eend; ++eit) { + max_crit = max((*eit)->critical_time(), max_crit); + } + } + if (!all_nodes_ready) { + // To the back it goes. + // XXX: think about how often this can happen. + edgesQ.push(e); + continue; + } + e->set_critical_time(max(max_crit + e->run_time_ms_, e->critical_time())); + + for (vector::iterator it = e->inputs_.begin(), + end = e->inputs_.end(); + it != end; ++it) { + num_out_edges[*it]--; + if (Edge* in = (*it)->in_edge()) { + if (done.count(in) == 0) { + edgesQ.push(in); + done.insert(in); + } + } + } + } + + // 2. Build priority list in decreasing order of critical times. + sort(edges.begin(), edges.end(), EdgeCom); + priority_list_ = list(edges.begin(), edges.end()); +} + void Plan::Dump() const { printf("pending: %d\n", (int)want_.size()); for (map::const_iterator e = want_.begin(); e != want_.end(); ++e) { @@ -574,6 +755,8 @@ bool Builder::AlreadyUpToDate() const { bool Builder::Build(string* err) { assert(!AlreadyUpToDate()); + plan_.ComputePriorityList(scan_.build_log()); + status_->PlanHasTotalEdges(plan_.command_edge_count()); int pending_commands = 0; int failures_allowed = config_.failures_allowed; diff --git a/src/build.h b/src/build.h index d697dfb..11c87da 100644 --- a/src/build.h +++ b/src/build.h @@ -16,6 +16,7 @@ #define NINJA_BUILD_H_ #include +#include #include #include #include @@ -75,6 +76,7 @@ struct Plan { /// Reset state. Clears want and ready sets. void Reset(); + void ComputePriorityList(BuildLog* build_log); /// Update the build plan to account for modifications made to the graph /// by information loaded from a dyndep file. @@ -122,6 +124,9 @@ private: EdgeSet ready_; Builder* builder_; + /// user provided targets in build order, earlier one have higher priority + vector targets_; + list priority_list_; /// Total number of edges that have commands (not phony). int command_edges_; diff --git a/src/graph.h b/src/graph.h index bb4f10c..47a2e57 100644 --- a/src/graph.h +++ b/src/graph.h @@ -146,9 +146,10 @@ struct Edge { Edge() : rule_(NULL), pool_(NULL), dyndep_(NULL), env_(NULL), mark_(VisitNone), - id_(0), outputs_ready_(false), deps_loaded_(false), - deps_missing_(false), generated_by_dep_loader_(false), - implicit_deps_(0), order_only_deps_(0), implicit_outs_(0) {} + id_(0), run_time_ms_(0), critical_time_(0), outputs_ready_(false), + deps_loaded_(false), deps_missing_(false), + generated_by_dep_loader_(false), implicit_deps_(0), + order_only_deps_(0), implicit_outs_(0) {} /// Return true if all inputs' in-edges are ready. bool AllInputsReady() const; @@ -171,6 +172,9 @@ struct Edge { void Dump(const char* prefix="") const; + uint64_t critical_time() const { return critical_time_; } + void set_critical_time(uint64_t critical_time) { critical_time_ = critical_time; } + const Rule* rule_; Pool* pool_; std::vector inputs_; @@ -179,6 +183,8 @@ struct Edge { BindingEnv* env_; VisitMark mark_; size_t id_; + int run_time_ms_; + uint64_t critical_time_; bool outputs_ready_; bool deps_loaded_; bool deps_missing_; -- cgit v0.12 From 12b5b7cd535f218895433c4ba10f6556bd44ac42 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Wed, 25 Aug 2021 12:11:04 +0100 Subject: Use explicit std:: style and remove debug print statements --- src/build.cc | 82 +++++++++++++++++++++--------------------------------------- src/build.h | 4 +-- 2 files changed, 30 insertions(+), 56 deletions(-) diff --git a/src/build.cc b/src/build.cc index 8f23e96..5b6ad2f 100644 --- a/src/build.cc +++ b/src/build.cc @@ -152,9 +152,9 @@ void Plan::EdgeWanted(const Edge* edge) { Edge* Plan::FindWork() { if (ready_.empty()) return NULL; - set::iterator i = ready_.end(); - for (list::iterator it = priority_list_.begin(), - end = priority_list_.end(); + std::set::iterator i = ready_.end(); + for (std::list::iterator it = priority_list_.begin(), + end = priority_list_.end(); it != end; ++it) { i = ready_.find(*it); if (i != ready_.end()) { @@ -450,11 +450,11 @@ void Plan::ComputePriorityList(BuildLog* build_log) { return; METRIC_RECORD("ComputePriorityList"); - set dedup; - vector deduped; - for (vector::iterator it = targets_.begin(); it != targets_.end(); + std::set dedup; + std::vector deduped; + for (std::vector::iterator it = targets_.begin(); it != targets_.end(); ++it) { - pair::iterator, bool> insert_result = dedup.insert(*it); + std::pair::iterator, bool> insert_result = dedup.insert(*it); if (!insert_result.second) continue; deduped.push_back(*it); @@ -462,37 +462,24 @@ void Plan::ComputePriorityList(BuildLog* build_log) { targets_.swap(deduped); - vector edges; - map num_out_edges; - for (map::iterator it = want_.begin(), end = want_.end(); + std::vector edges; + std::map num_out_edges; + for (std::map::iterator it = want_.begin(), end = want_.end(); it != end; ++it) { if (it->second != kWantNothing) continue; Edge* e = it->first; edges.push_back(e); - //printf("in\n"); - set ins; // protect against #308; also sanity + std::set ins; // protect against #308; also sanity for (size_t nit = 0; nit < e->inputs_.size(); ++nit) { Node* n = e->inputs_[nit]; if (ins.count(n) == 0) { - //printf("%s %s\n", (*nit)->path().c_str(), e->rule().name().c_str()); num_out_edges[n]++; ins.insert(n); } } - //printf("\n"); - } - - if (false) { - for (map::iterator it = num_out_edges.begin(), - end = num_out_edges.end(); - it != end; ++it) { - printf("%s %d\n", it->first->path().c_str(), it->second); - } } - - // this is total time if building all edges in serial, so this value is big // enough to ensure higher priority target initial critical time always bigger // than lower one @@ -507,7 +494,7 @@ void Plan::ComputePriorityList(BuildLog* build_log) { // c) A fixed cost if this type of edge hasn't run before (0 for phony target, // 1 for others) // - for (vector::iterator it = edges.begin(), end = edges.end(); it != end; + for (std::vector::iterator it = edges.begin(), end = edges.end(); it != end; total_time += (*it)->run_time_ms_, ++it) { Edge* edge = *it; if (edge->is_phony()) @@ -518,43 +505,30 @@ void Plan::ComputePriorityList(BuildLog* build_log) { edge->run_time_ms_ = 1; continue; } - int duration = entry->end_time - entry->start_time; // XXX: + 1? + int duration = entry->end_time - entry->start_time; edge->run_time_ms_ = duration; } - - // Dump graph to stdout for debugging / prototyping. - if (false) { - for (vector::iterator it = edges.begin(), end = edges.end(); - it != end; ++it) { - Edge* edge = *it; - Node* input = edge->inputs_[0]; - Node* output = edge->outputs_[0]; - printf("%s %s %d\n", input->path().c_str(), output->path().c_str(), - edge->run_time_ms_); - } - } - // 1. Use backflow algorithm to compute critical times for all nodes, starting // from the destination nodes. use priority_weight = total_time * N as // initial critical time to makes forward edgs of higher priority always // get higher critical time value // XXX: ignores pools - queue edgesQ; + std::queue edgesQ; // Makes sure that each edge is added to the queue just once. This is needed // for example if a binary is used to generate 50 source files, and all the // source file cxx lines are added. Without this, the edge generating that // binary would be added ot the queue 50 times. - set done; + std::set done; - for (vector::reverse_iterator it = targets_.rbegin(), - end = targets_.rend(); + for (std::vector::reverse_iterator it = targets_.rbegin(), + end = targets_.rend(); it != end; ++it) { if (Edge* in = (*it)->in_edge()) { uint64_t priority_weight = (it - targets_.rbegin()) * total_time; in->set_critical_time( - max(max(in->run_time_ms_, priority_weight), + std::max(std::max(in->run_time_ms_, priority_weight), in->critical_time())); if (done.count(in) == 0) { edgesQ.push(in); @@ -566,17 +540,17 @@ void Plan::ComputePriorityList(BuildLog* build_log) { Edge* e = edgesQ.front(); edgesQ.pop(); bool all_nodes_ready = true; uint64_t max_crit = 0; - for (vector::iterator it = e->outputs_.begin(), - end = e->outputs_.end(); + for (std::vector::iterator it = e->outputs_.begin(), + end = e->outputs_.end(); it != end; ++it) { if (num_out_edges[*it] > 0) { all_nodes_ready = false; continue; } - for (vector::const_iterator eit = (*it)->out_edges().begin(), - eend = (*it)->out_edges().end(); + for (std::vector::const_iterator eit = (*it)->out_edges().begin(), + eend = (*it)->out_edges().end(); eit != eend; ++eit) { - max_crit = max((*eit)->critical_time(), max_crit); + max_crit = std::max((*eit)->critical_time(), max_crit); } } if (!all_nodes_ready) { @@ -585,10 +559,10 @@ void Plan::ComputePriorityList(BuildLog* build_log) { edgesQ.push(e); continue; } - e->set_critical_time(max(max_crit + e->run_time_ms_, e->critical_time())); + e->set_critical_time(std::max(max_crit + e->run_time_ms_, e->critical_time())); - for (vector::iterator it = e->inputs_.begin(), - end = e->inputs_.end(); + for (std::vector::iterator it = e->inputs_.begin(), + end = e->inputs_.end(); it != end; ++it) { num_out_edges[*it]--; if (Edge* in = (*it)->in_edge()) { @@ -601,8 +575,8 @@ void Plan::ComputePriorityList(BuildLog* build_log) { } // 2. Build priority list in decreasing order of critical times. - sort(edges.begin(), edges.end(), EdgeCom); - priority_list_ = list(edges.begin(), edges.end()); + std::sort(edges.begin(), edges.end(), EdgeCom); + priority_list_.assign(edges.begin(), edges.end()); } void Plan::Dump() const { diff --git a/src/build.h b/src/build.h index 11c87da..5ee5650 100644 --- a/src/build.h +++ b/src/build.h @@ -125,8 +125,8 @@ private: Builder* builder_; /// user provided targets in build order, earlier one have higher priority - vector targets_; - list priority_list_; + std::vector targets_; + std::list priority_list_; /// Total number of edges that have commands (not phony). int command_edges_; -- cgit v0.12 From 8e232000f07d7a9c50bb42acb50b9647aba7f80d Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Wed, 25 Aug 2021 15:49:03 +0100 Subject: Change priority_list_ into a std::priority_queue of ready edges --- src/build.cc | 82 +++++++++++++++++++++++++++++------------------------------- src/build.h | 57 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 47 deletions(-) diff --git a/src/build.cc b/src/build.cc index 5b6ad2f..76df857 100644 --- a/src/build.cc +++ b/src/build.cc @@ -75,6 +75,16 @@ bool DryRunCommandRunner::WaitForCommand(Result* result) { } // namespace + +bool EdgeQueue::EdgePriorityCompare::operator()(const Edge* e1, const Edge* e2) const { + const uint64_t ct1 = e1->critical_time(); + const uint64_t ct2 = e2->critical_time(); + if (ct1 != ct2) { + return ct1 < ct2; + } + return e1->id_ < e2->id_; +} + Plan::Plan(Builder* builder) : builder_(builder) , command_edges_(0) @@ -152,21 +162,7 @@ void Plan::EdgeWanted(const Edge* edge) { Edge* Plan::FindWork() { if (ready_.empty()) return NULL; - std::set::iterator i = ready_.end(); - for (std::list::iterator it = priority_list_.begin(), - end = priority_list_.end(); - it != end; ++it) { - i = ready_.find(*it); - if (i != ready_.end()) { - priority_list_.erase(it); - break; - } - } - if (i == ready_.end()) - i = ready_.begin(); - Edge* edge = *i; - ready_.erase(i); - return edge; + return ready_.pop(); } void Plan::ScheduleWork(map::iterator want_e) { @@ -184,10 +180,12 @@ void Plan::ScheduleWork(map::iterator want_e) { Pool* pool = edge->pool(); if (pool->ShouldDelayEdge()) { pool->DelayEdge(edge); - pool->RetrieveReadyEdges(&ready_); + EdgeSet new_edges; + pool->RetrieveReadyEdges(&new_edges); + ready_.push(new_edges.begin(), new_edges.end()); } else { pool->EdgeScheduled(*edge); - ready_.insert(edge); + ready_.push(edge); } } @@ -199,7 +197,9 @@ bool Plan::EdgeFinished(Edge* edge, EdgeResult result, string* err) { // See if this job frees up any delayed jobs. if (directly_wanted) edge->pool()->EdgeFinished(*edge); - edge->pool()->RetrieveReadyEdges(&ready_); + EdgeSet new_edges; + edge->pool()->RetrieveReadyEdges(&new_edges); + ready_.push(new_edges.begin(), new_edges.end()); // The rest of this function only applies to successful commands. if (result != kEdgeSucceeded) @@ -437,29 +437,32 @@ void Plan::UnmarkDependents(const Node* node, set* dependents) { } namespace { -bool EdgeCom(const Edge* lhs, const Edge* rhs) { - // Note: > to sort in decreasing order. - return lhs->critical_time() > rhs->critical_time(); -} + +struct SeenNodeBefore { + std::set *seen; + + bool operator() (const Node* node) { + // Return true if the node has been seen before + return !seen->insert(node).second; + } +}; + } // namespace -void Plan::ComputePriorityList(BuildLog* build_log) { +void Plan::ComputeCriticalTime(BuildLog* build_log) { //testcase have no build_log if (!build_log) return; METRIC_RECORD("ComputePriorityList"); - std::set dedup; - std::vector deduped; - for (std::vector::iterator it = targets_.begin(); it != targets_.end(); - ++it) { - std::pair::iterator, bool> insert_result = dedup.insert(*it); - if (!insert_result.second) - continue; - deduped.push_back(*it); + // Remove duplicate targets + { + std::set seen; + targets_.erase( + std::remove_if(targets_.begin(), targets_.end(), SeenNodeBefore{&seen}), + targets_.end()); } - targets_.swap(deduped); std::vector edges; @@ -473,9 +476,8 @@ void Plan::ComputePriorityList(BuildLog* build_log) { std::set ins; // protect against #308; also sanity for (size_t nit = 0; nit < e->inputs_.size(); ++nit) { Node* n = e->inputs_[nit]; - if (ins.count(n) == 0) { + if (ins.insert(n).second) { num_out_edges[n]++; - ins.insert(n); } } } @@ -509,7 +511,7 @@ void Plan::ComputePriorityList(BuildLog* build_log) { edge->run_time_ms_ = duration; } - // 1. Use backflow algorithm to compute critical times for all nodes, starting + // Use backflow algorithm to compute critical times for all nodes, starting // from the destination nodes. use priority_weight = total_time * N as // initial critical time to makes forward edgs of higher priority always // get higher critical time value @@ -530,9 +532,8 @@ void Plan::ComputePriorityList(BuildLog* build_log) { in->set_critical_time( std::max(std::max(in->run_time_ms_, priority_weight), in->critical_time())); - if (done.count(in) == 0) { + if (done.insert(in).second) { edgesQ.push(in); - done.insert(in); } } } @@ -566,17 +567,12 @@ void Plan::ComputePriorityList(BuildLog* build_log) { it != end; ++it) { num_out_edges[*it]--; if (Edge* in = (*it)->in_edge()) { - if (done.count(in) == 0) { + if (done.insert(in).second) { edgesQ.push(in); - done.insert(in); } } } } - - // 2. Build priority list in decreasing order of critical times. - std::sort(edges.begin(), edges.end(), EdgeCom); - priority_list_.assign(edges.begin(), edges.end()); } void Plan::Dump() const { diff --git a/src/build.h b/src/build.h index 5ee5650..ec6deea 100644 --- a/src/build.h +++ b/src/build.h @@ -16,7 +16,7 @@ #define NINJA_BUILD_H_ #include -#include +#include #include #include #include @@ -36,6 +36,56 @@ struct Node; struct State; struct Status; + +// Set of ready edges, sorted by priority +class EdgeQueue { + struct EdgePriorityCompare { + bool operator()(const Edge* e1, const Edge* e2) const; + }; + + std::priority_queue, EdgePriorityCompare> queue_; + // Set to ensure no duplicate entries in ready_ + EdgeSet set_; + +public: + + void push(Edge* edge) { + if (set_.insert(edge).second) { + queue_.push(edge); + } + } + + template + void push(It first, It last) { + for (; first != last; ++first) { + push(*first); + } + } + + Edge* pop() { + Edge* ret = queue_.top(); + queue_.pop(); + set_.erase(ret); + return ret; + } + + void clear() { + set_.clear(); + while (!queue_.empty()) { + queue_.pop(); + } + } + + size_t size() const { + return queue_.size(); + } + + bool empty() const { + return queue_.empty(); + } +}; + + /// Plan stores the state of a build plan: what we intend to build, /// which steps we're ready to execute. struct Plan { @@ -76,7 +126,7 @@ struct Plan { /// Reset state. Clears want and ready sets. void Reset(); - void ComputePriorityList(BuildLog* build_log); + void ComputeCriticalTime(BuildLog* build_log); /// Update the build plan to account for modifications made to the graph /// by information loaded from a dyndep file. @@ -121,12 +171,11 @@ private: /// we want for the edge. std::map want_; - EdgeSet ready_; + EdgeQueue ready_; Builder* builder_; /// user provided targets in build order, earlier one have higher priority std::vector targets_; - std::list priority_list_; /// Total number of edges that have commands (not phony). int command_edges_; -- cgit v0.12 From 2fcf403ac54d77d7dc8d582c28aa86a911428da4 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Wed, 25 Aug 2021 16:18:40 +0100 Subject: Fix critical time calculation The existing algorithm doesn't work because it strictly requires that all outputs are visited before updating an edge. So any task downstream from a task with multiple out-edges may get ignored. The fix is to always propagate your critical time to the next input node, and only place it in the queue if you offer a higher critical time. --- src/build.cc | 97 +++++++++++++++++++----------------------------------------- src/graph.h | 8 ++--- 2 files changed, 35 insertions(+), 70 deletions(-) diff --git a/src/build.cc b/src/build.cc index 76df857..a4dfc01 100644 --- a/src/build.cc +++ b/src/build.cc @@ -77,8 +77,8 @@ bool DryRunCommandRunner::WaitForCommand(Result* result) { bool EdgeQueue::EdgePriorityCompare::operator()(const Edge* e1, const Edge* e2) const { - const uint64_t ct1 = e1->critical_time(); - const uint64_t ct2 = e2->critical_time(); + const int64_t ct1 = e1->critical_time(); + const int64_t ct2 = e2->critical_time(); if (ct1 != ct2) { return ct1 < ct2; } @@ -464,28 +464,10 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { targets_.end()); } - - std::vector edges; - std::map num_out_edges; - for (std::map::iterator it = want_.begin(), end = want_.end(); - it != end; ++it) { - if (it->second != kWantNothing) - continue; - Edge* e = it->first; - edges.push_back(e); - std::set ins; // protect against #308; also sanity - for (size_t nit = 0; nit < e->inputs_.size(); ++nit) { - Node* n = e->inputs_[nit]; - if (ins.insert(n).second) { - num_out_edges[n]++; - } - } - } - // this is total time if building all edges in serial, so this value is big // enough to ensure higher priority target initial critical time always bigger // than lower one - uint64_t total_time = 0; + int64_t total_time = 0; // Critical path scheduling. // 0. Assign costs to all edges, using: // a) The time the edge needed last time, if available. @@ -496,11 +478,12 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { // c) A fixed cost if this type of edge hasn't run before (0 for phony target, // 1 for others) // - for (std::vector::iterator it = edges.begin(), end = edges.end(); it != end; - total_time += (*it)->run_time_ms_, ++it) { - Edge* edge = *it; - if (edge->is_phony()) + for (std::map::iterator it = want_.begin(), end = want_.end(); + it != end; ++it) { + Edge* edge = it->first; + if (edge->is_phony()) { continue; + } BuildLog::LogEntry* entry = build_log->LookupByOutput(edge->outputs_[0]->path()); if (!entry) { @@ -512,62 +495,44 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { } // Use backflow algorithm to compute critical times for all nodes, starting - // from the destination nodes. use priority_weight = total_time * N as - // initial critical time to makes forward edgs of higher priority always - // get higher critical time value + // from the destination nodes. // XXX: ignores pools - std::queue edgesQ; - - // Makes sure that each edge is added to the queue just once. This is needed - // for example if a binary is used to generate 50 source files, and all the - // source file cxx lines are added. Without this, the edge generating that - // binary would be added ot the queue 50 times. - std::set done; + std::set active_edges; // All edges in edgesQ (for uniqueness) + std::queue edgesQ; // Queue, for breadth-first traversal for (std::vector::reverse_iterator it = targets_.rbegin(), end = targets_.rend(); it != end; ++it) { if (Edge* in = (*it)->in_edge()) { - uint64_t priority_weight = (it - targets_.rbegin()) * total_time; + // Use initial critical time: total_time * N. This means higher + // priority targets always get a higher critical time value + int64_t priority_weight = (it - targets_.rbegin()) * total_time; in->set_critical_time( - std::max(std::max(in->run_time_ms_, priority_weight), - in->critical_time())); - if (done.insert(in).second) { + priority_weight + + std::max(in->run_time_ms_, in->critical_time())); + if (active_edges.insert(in).second) { edgesQ.push(in); } } } + while (!edgesQ.empty()) { - Edge* e = edgesQ.front(); edgesQ.pop(); - bool all_nodes_ready = true; - uint64_t max_crit = 0; - for (std::vector::iterator it = e->outputs_.begin(), - end = e->outputs_.end(); - it != end; ++it) { - if (num_out_edges[*it] > 0) { - all_nodes_ready = false; - continue; - } - for (std::vector::const_iterator eit = (*it)->out_edges().begin(), - eend = (*it)->out_edges().end(); - eit != eend; ++eit) { - max_crit = std::max((*eit)->critical_time(), max_crit); - } - } - if (!all_nodes_ready) { - // To the back it goes. - // XXX: think about how often this can happen. - edgesQ.push(e); - continue; - } - e->set_critical_time(std::max(max_crit + e->run_time_ms_, e->critical_time())); + Edge* e = edgesQ.front(); + edgesQ.pop(); + active_edges.erase(e); for (std::vector::iterator it = e->inputs_.begin(), end = e->inputs_.end(); it != end; ++it) { - num_out_edges[*it]--; - if (Edge* in = (*it)->in_edge()) { - if (done.insert(in).second) { + Edge* in = (*it)->in_edge(); + if (!in) { + continue; + } + // Only process edge if this node offers a higher critical time + const int64_t proposed_time = e->critical_time() + in->run_time_ms_; + if (proposed_time > in->critical_time()) { + in->set_critical_time(proposed_time); + if (active_edges.insert(in).second) { edgesQ.push(in); } } @@ -725,7 +690,7 @@ bool Builder::AlreadyUpToDate() const { bool Builder::Build(string* err) { assert(!AlreadyUpToDate()); - plan_.ComputePriorityList(scan_.build_log()); + plan_.ComputeCriticalTime(scan_.build_log()); status_->PlanHasTotalEdges(plan_.command_edge_count()); int pending_commands = 0; diff --git a/src/graph.h b/src/graph.h index 47a2e57..1bc79fa 100644 --- a/src/graph.h +++ b/src/graph.h @@ -146,7 +146,7 @@ struct Edge { Edge() : rule_(NULL), pool_(NULL), dyndep_(NULL), env_(NULL), mark_(VisitNone), - id_(0), run_time_ms_(0), critical_time_(0), outputs_ready_(false), + id_(0), run_time_ms_(0), critical_time_(-1), outputs_ready_(false), deps_loaded_(false), deps_missing_(false), generated_by_dep_loader_(false), implicit_deps_(0), order_only_deps_(0), implicit_outs_(0) {} @@ -172,8 +172,8 @@ struct Edge { void Dump(const char* prefix="") const; - uint64_t critical_time() const { return critical_time_; } - void set_critical_time(uint64_t critical_time) { critical_time_ = critical_time; } + int64_t critical_time() const { return critical_time_; } + void set_critical_time(int64_t critical_time) { critical_time_ = critical_time; } const Rule* rule_; Pool* pool_; @@ -184,7 +184,7 @@ struct Edge { VisitMark mark_; size_t id_; int run_time_ms_; - uint64_t critical_time_; + int64_t critical_time_; bool outputs_ready_; bool deps_loaded_; bool deps_missing_; -- cgit v0.12 From c5d355cbb766a6bbce517dbd914dc671cea526b3 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Wed, 25 Aug 2021 20:53:28 +0100 Subject: clang-format diff --- src/build.cc | 9 ++++----- src/build.h | 12 +++--------- src/graph.h | 8 +++++--- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/src/build.cc b/src/build.cc index a4dfc01..8b638b7 100644 --- a/src/build.cc +++ b/src/build.cc @@ -450,8 +450,7 @@ struct SeenNodeBefore { } // namespace void Plan::ComputeCriticalTime(BuildLog* build_log) { - - //testcase have no build_log + // testcases have no build_log if (!build_log) return; @@ -459,9 +458,9 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { // Remove duplicate targets { std::set seen; - targets_.erase( - std::remove_if(targets_.begin(), targets_.end(), SeenNodeBefore{&seen}), - targets_.end()); + targets_.erase(std::remove_if(targets_.begin(), targets_.end(), + SeenNodeBefore{ &seen }), + targets_.end()); } // this is total time if building all edges in serial, so this value is big diff --git a/src/build.h b/src/build.h index ec6deea..410c2db 100644 --- a/src/build.h +++ b/src/build.h @@ -47,8 +47,7 @@ class EdgeQueue { // Set to ensure no duplicate entries in ready_ EdgeSet set_; -public: - + public: void push(Edge* edge) { if (set_.insert(edge).second) { queue_.push(edge); @@ -76,16 +75,11 @@ public: } } - size_t size() const { - return queue_.size(); - } + size_t size() const { return queue_.size(); } - bool empty() const { - return queue_.empty(); - } + bool empty() const { return queue_.empty(); } }; - /// Plan stores the state of a build plan: what we intend to build, /// which steps we're ready to execute. struct Plan { diff --git a/src/graph.h b/src/graph.h index 1bc79fa..b66ae6b 100644 --- a/src/graph.h +++ b/src/graph.h @@ -148,8 +148,8 @@ struct Edge { : rule_(NULL), pool_(NULL), dyndep_(NULL), env_(NULL), mark_(VisitNone), id_(0), run_time_ms_(0), critical_time_(-1), outputs_ready_(false), deps_loaded_(false), deps_missing_(false), - generated_by_dep_loader_(false), implicit_deps_(0), - order_only_deps_(0), implicit_outs_(0) {} + generated_by_dep_loader_(false), implicit_deps_(0), order_only_deps_(0), + implicit_outs_(0) {} /// Return true if all inputs' in-edges are ready. bool AllInputsReady() const; @@ -173,7 +173,9 @@ struct Edge { void Dump(const char* prefix="") const; int64_t critical_time() const { return critical_time_; } - void set_critical_time(int64_t critical_time) { critical_time_ = critical_time; } + void set_critical_time(int64_t critical_time) { + critical_time_ = critical_time; + } const Rule* rule_; Pool* pool_; -- cgit v0.12 From 63b0a9a6a170793f98aa22da827ee3ac11eb1a50 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Thu, 26 Aug 2021 00:53:54 +0100 Subject: Address review comments --- src/build.cc | 40 ++++++++++++++++++++++------------------ src/build.h | 12 ++---------- src/graph.h | 2 +- 3 files changed, 25 insertions(+), 29 deletions(-) diff --git a/src/build.cc b/src/build.cc index 8b638b7..fd220e5 100644 --- a/src/build.cc +++ b/src/build.cc @@ -438,12 +438,15 @@ void Plan::UnmarkDependents(const Node* node, set* dependents) { namespace { -struct SeenNodeBefore { - std::set *seen; +template +struct SeenBefore { + std::set *seen_; - bool operator() (const Node* node) { - // Return true if the node has been seen before - return !seen->insert(node).second; + SeenBefore(std::set* seen) : seen_(seen) {} + + bool operator() (const T* item) { + // Return true if the item has been seen before + return !seen_->insert(item).second; } }; @@ -458,8 +461,8 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { // Remove duplicate targets { std::set seen; - targets_.erase(std::remove_if(targets_.begin(), targets_.end(), - SeenNodeBefore{ &seen }), + SeenBefore seen_before(&seen); + targets_.erase(std::remove_if(targets_.begin(), targets_.end(), seen_before), targets_.end()); } @@ -489,15 +492,16 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { edge->run_time_ms_ = 1; continue; } - int duration = entry->end_time - entry->start_time; - edge->run_time_ms_ = duration; + edge->run_time_ms_ = entry->end_time - entry->start_time; } // Use backflow algorithm to compute critical times for all nodes, starting // from the destination nodes. // XXX: ignores pools - std::set active_edges; // All edges in edgesQ (for uniqueness) - std::queue edgesQ; // Queue, for breadth-first traversal + std::queue breadthFirstEdges; // Queue, for breadth-first traversal + std::set active_edges; // Set of in breadthFirstEdges + SeenBefore seen_edge( + &active_edges); // Test for uniqueness in breadthFirstEdges for (std::vector::reverse_iterator it = targets_.rbegin(), end = targets_.rend(); @@ -509,15 +513,15 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { in->set_critical_time( priority_weight + std::max(in->run_time_ms_, in->critical_time())); - if (active_edges.insert(in).second) { - edgesQ.push(in); + if (!seen_edge(in)) { + breadthFirstEdges.push(in); } } } - while (!edgesQ.empty()) { - Edge* e = edgesQ.front(); - edgesQ.pop(); + while (!breadthFirstEdges.empty()) { + Edge* e = breadthFirstEdges.front(); + breadthFirstEdges.pop(); active_edges.erase(e); for (std::vector::iterator it = e->inputs_.begin(), @@ -531,8 +535,8 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { const int64_t proposed_time = e->critical_time() + in->run_time_ms_; if (proposed_time > in->critical_time()) { in->set_critical_time(proposed_time); - if (active_edges.insert(in).second) { - edgesQ.push(in); + if (!seen_edge(in)) { + breadthFirstEdges.push(in); } } } diff --git a/src/build.h b/src/build.h index 410c2db..4e36e16 100644 --- a/src/build.h +++ b/src/build.h @@ -44,14 +44,10 @@ class EdgeQueue { }; std::priority_queue, EdgePriorityCompare> queue_; - // Set to ensure no duplicate entries in ready_ - EdgeSet set_; public: void push(Edge* edge) { - if (set_.insert(edge).second) { - queue_.push(edge); - } + queue_.push(edge); } template @@ -64,15 +60,11 @@ class EdgeQueue { Edge* pop() { Edge* ret = queue_.top(); queue_.pop(); - set_.erase(ret); return ret; } void clear() { - set_.clear(); - while (!queue_.empty()) { - queue_.pop(); - } + queue_ = std::priority_queue, EdgePriorityCompare>(); } size_t size() const { return queue_.size(); } diff --git a/src/graph.h b/src/graph.h index b66ae6b..7851504 100644 --- a/src/graph.h +++ b/src/graph.h @@ -185,7 +185,7 @@ struct Edge { BindingEnv* env_; VisitMark mark_; size_t id_; - int run_time_ms_; + int64_t run_time_ms_; int64_t critical_time_; bool outputs_ready_; bool deps_loaded_; -- cgit v0.12 From 5b8d19b24b00973976990303524f47750e3e1dc4 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Thu, 26 Aug 2021 00:58:31 +0100 Subject: Fix total_time computation --- src/build.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/build.cc b/src/build.cc index fd220e5..08b0cc6 100644 --- a/src/build.cc +++ b/src/build.cc @@ -492,7 +492,9 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { edge->run_time_ms_ = 1; continue; } - edge->run_time_ms_ = entry->end_time - entry->start_time; + const int64_t duration = entry->end_time - entry->start_time; + edge->run_time_ms_ = duration; + total_time += duration; } // Use backflow algorithm to compute critical times for all nodes, starting -- cgit v0.12 From fe80637327997d0a97e1eee45e3d0929edc75bbd Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Fri, 27 Aug 2021 12:12:45 +0100 Subject: Address review comments --- src/build.cc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/build.cc b/src/build.cc index 08b0cc6..99e6efa 100644 --- a/src/build.cc +++ b/src/build.cc @@ -440,7 +440,7 @@ namespace { template struct SeenBefore { - std::set *seen_; + std::set* seen_; SeenBefore(std::set* seen) : seen_(seen) {} @@ -473,13 +473,9 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { // Critical path scheduling. // 0. Assign costs to all edges, using: // a) The time the edge needed last time, if available. - // b) The average time this edge type needed, if this edge hasn't run before. - // (not implemented .log entries is not grouped by rule type, and even - // similar rule type may not have same name , for example two compile rule - // with different compile flags) - // c) A fixed cost if this type of edge hasn't run before (0 for phony target, - // 1 for others) - // + // b) A fixed cost if this type of edge hasn't run before (0 for + // phony target, 1 for others) + // TODO: Find a better heuristic for edges without log entries for (std::map::iterator it = want_.begin(), end = want_.end(); it != end; ++it) { Edge* edge = it->first; @@ -490,6 +486,7 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { build_log->LookupByOutput(edge->outputs_[0]->path()); if (!entry) { edge->run_time_ms_ = 1; + total_time += 1; continue; } const int64_t duration = entry->end_time - entry->start_time; -- cgit v0.12 From c83167fb6e5d862fe6389e3996f2a293e2e6554c Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Fri, 27 Aug 2021 17:15:27 +0100 Subject: Improve heuristic for unknown cost edges --- src/build.cc | 92 ++++++++++++++++++++++++++++++++++++++++++------------------ src/build.h | 23 +++++++-------- 2 files changed, 77 insertions(+), 38 deletions(-) diff --git a/src/build.cc b/src/build.cc index 99e6efa..6f9cf27 100644 --- a/src/build.cc +++ b/src/build.cc @@ -450,6 +450,67 @@ struct SeenBefore { } }; +// Assign run_time_ms_ for all wanted edges, and returns total time for all edges +// For phony edges, 0 cost. +// For edges with a build history, use the last build time. +// For edges without history, use the 75th percentile time for edges with history. +// Or, if there is no history at all just use 1 +int64_t AssignEdgeRuntime(BuildLog* build_log, + const std::map& want) { + bool missing_durations = false; + std::vector durations; + int64_t total_time = 0; + + for (std::map::const_iterator it = want.begin(), + end = want.end(); + it != end; ++it) { + Edge* edge = it->first; + if (edge->is_phony()) { + continue; + } + BuildLog::LogEntry* entry = + build_log->LookupByOutput(edge->outputs_[0]->path()); + if (!entry) { + missing_durations = true; + edge->run_time_ms_ = -1; // -1 to mark as needing filled in + continue; + } + const int64_t duration = entry->end_time - entry->start_time; + edge->run_time_ms_ = duration; + total_time += duration; + durations.push_back(duration); + } + + if (!missing_durations) { + return total_time; + } + + // Heuristic: for unknown edges, take the 75th percentile time. + // This allows the known-slowest jobs to run first, but isn't so + // small that it is always the lowest priority. Which for slow jobs, + // might bottleneck the build. + int64_t p75_time = 1; + int64_t num_durations = static_cast(durations.size()); + if (num_durations > 0) { + size_t p75_idx = (num_durations - 1) - num_durations / 4; + std::vector::iterator p75_it = durations.begin() + p75_idx; + std::nth_element(durations.begin(), p75_it, durations.end()); + p75_time = *p75_it; + } + + for (std::map::const_iterator it = want.begin(), + end = want.end(); + it != end; ++it) { + Edge* edge = it->first; + if (edge->run_time_ms_ >= 0) { + continue; + } + edge->run_time_ms_ = p75_time; + total_time += p75_time; + } + return total_time; +} + } // namespace void Plan::ComputeCriticalTime(BuildLog* build_log) { @@ -466,33 +527,10 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { targets_.end()); } - // this is total time if building all edges in serial, so this value is big - // enough to ensure higher priority target initial critical time always bigger - // than lower one - int64_t total_time = 0; - // Critical path scheduling. - // 0. Assign costs to all edges, using: - // a) The time the edge needed last time, if available. - // b) A fixed cost if this type of edge hasn't run before (0 for - // phony target, 1 for others) - // TODO: Find a better heuristic for edges without log entries - for (std::map::iterator it = want_.begin(), end = want_.end(); - it != end; ++it) { - Edge* edge = it->first; - if (edge->is_phony()) { - continue; - } - BuildLog::LogEntry* entry = - build_log->LookupByOutput(edge->outputs_[0]->path()); - if (!entry) { - edge->run_time_ms_ = 1; - total_time += 1; - continue; - } - const int64_t duration = entry->end_time - entry->start_time; - edge->run_time_ms_ = duration; - total_time += duration; - } + // total time if building all edges in serial. This value is big + // enough to ensure higher priority target's initial critical time + // is always bigger than lower ones + int64_t total_time = AssignEdgeRuntime(build_log, want_); // Use backflow algorithm to compute critical times for all nodes, starting // from the destination nodes. diff --git a/src/build.h b/src/build.h index 4e36e16..084607b 100644 --- a/src/build.h +++ b/src/build.h @@ -118,17 +118,6 @@ struct Plan { /// by information loaded from a dyndep file. bool DyndepsLoaded(DependencyScan* scan, const Node* node, const DyndepFile& ddf, std::string* err); -private: - bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err); - void UnmarkDependents(const Node* node, std::set* dependents); - bool AddSubTarget(const Node* node, const Node* dependent, std::string* err, - std::set* dyndep_walk); - - /// Update plan with knowledge that the given node is up to date. - /// If the node is a dyndep binding on any of its dependents, this - /// loads dynamic dependencies from the node's path. - /// Returns 'false' if loading dyndep info fails and 'true' otherwise. - bool NodeFinished(Node* node, std::string* err); /// Enumerate possible steps we want for an edge. enum Want @@ -143,6 +132,18 @@ private: kWantToFinish }; +private: + bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err); + void UnmarkDependents(const Node* node, std::set* dependents); + bool AddSubTarget(const Node* node, const Node* dependent, std::string* err, + std::set* dyndep_walk); + + /// Update plan with knowledge that the given node is up to date. + /// If the node is a dyndep binding on any of its dependents, this + /// loads dynamic dependencies from the node's path. + /// Returns 'false' if loading dyndep info fails and 'true' otherwise. + bool NodeFinished(Node* node, std::string* err); + void EdgeWanted(const Edge* edge); bool EdgeMaybeReady(std::map::iterator want_e, std::string* err); -- cgit v0.12 From 77448b4fb7dc1e8baad0cc75c4d6d04fabc21def Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Fri, 8 Oct 2021 15:11:49 +0100 Subject: Remove redundant include --- src/build.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/build.h b/src/build.h index 084607b..9b49ffb 100644 --- a/src/build.h +++ b/src/build.h @@ -16,7 +16,6 @@ #define NINJA_BUILD_H_ #include -#include #include #include #include -- cgit v0.12 From 24d1f5f0c130338b382c60eb32731d2638b47d03 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Mon, 7 Mar 2022 16:48:04 +0000 Subject: Address review comments 1. Move EdgePriorityQueue to graph.h and inherit from priority_queue 2. Add comment about edge->critical_time() --- src/build.cc | 18 ++++++------------ src/build.h | 36 ------------------------------------ src/graph.h | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 48 deletions(-) diff --git a/src/build.cc b/src/build.cc index 6f9cf27..d747b8a 100644 --- a/src/build.cc +++ b/src/build.cc @@ -76,15 +76,6 @@ bool DryRunCommandRunner::WaitForCommand(Result* result) { } // namespace -bool EdgeQueue::EdgePriorityCompare::operator()(const Edge* e1, const Edge* e2) const { - const int64_t ct1 = e1->critical_time(); - const int64_t ct2 = e2->critical_time(); - if (ct1 != ct2) { - return ct1 < ct2; - } - return e1->id_ < e2->id_; -} - Plan::Plan(Builder* builder) : builder_(builder) , command_edges_(0) @@ -162,7 +153,10 @@ void Plan::EdgeWanted(const Edge* edge) { Edge* Plan::FindWork() { if (ready_.empty()) return NULL; - return ready_.pop(); + + Edge* work = ready_.top(); + ready_.pop(); + return work; } void Plan::ScheduleWork(map::iterator want_e) { @@ -182,7 +176,7 @@ void Plan::ScheduleWork(map::iterator want_e) { pool->DelayEdge(edge); EdgeSet new_edges; pool->RetrieveReadyEdges(&new_edges); - ready_.push(new_edges.begin(), new_edges.end()); + ready_.push_multiple(new_edges.begin(), new_edges.end()); } else { pool->EdgeScheduled(*edge); ready_.push(edge); @@ -199,7 +193,7 @@ bool Plan::EdgeFinished(Edge* edge, EdgeResult result, string* err) { edge->pool()->EdgeFinished(*edge); EdgeSet new_edges; edge->pool()->RetrieveReadyEdges(&new_edges); - ready_.push(new_edges.begin(), new_edges.end()); + ready_.push_multiple(new_edges.begin(), new_edges.end()); // The rest of this function only applies to successful commands. if (result != kEdgeSucceeded) diff --git a/src/build.h b/src/build.h index 9b49ffb..652bc40 100644 --- a/src/build.h +++ b/src/build.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -36,41 +35,6 @@ struct State; struct Status; -// Set of ready edges, sorted by priority -class EdgeQueue { - struct EdgePriorityCompare { - bool operator()(const Edge* e1, const Edge* e2) const; - }; - - std::priority_queue, EdgePriorityCompare> queue_; - - public: - void push(Edge* edge) { - queue_.push(edge); - } - - template - void push(It first, It last) { - for (; first != last; ++first) { - push(*first); - } - } - - Edge* pop() { - Edge* ret = queue_.top(); - queue_.pop(); - return ret; - } - - void clear() { - queue_ = std::priority_queue, EdgePriorityCompare>(); - } - - size_t size() const { return queue_.size(); } - - bool empty() const { return queue_.empty(); } -}; - /// Plan stores the state of a build plan: what we intend to build, /// which steps we're ready to execute. struct Plan { diff --git a/src/graph.h b/src/graph.h index 7851504..a8bf0cd 100644 --- a/src/graph.h +++ b/src/graph.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "dyndep.h" #include "eval_env.h" @@ -172,6 +173,10 @@ struct Edge { void Dump(const char* prefix="") const; + // Critical time is the estimated exection time in ms of the edges + // forming the longest time-weighted path to the target output. + // This quantity is used as a priority during build scheduling. + // NOTE: Defaults to -1 as a marker smaller than any valid time int64_t critical_time() const { return critical_time_; } void set_critical_time(int64_t critical_time) { critical_time_ = critical_time; @@ -343,4 +348,32 @@ struct DependencyScan { DyndepLoader dyndep_loader_; }; +// Prioritize edges by largest critical time first +struct EdgePriorityCompare { + bool operator()(const Edge* e1, const Edge* e2) const { + const int64_t ct1 = e1->critical_time(); + const int64_t ct2 = e2->critical_time(); + if (ct1 != ct2) { + return ct1 < ct2; + } + return e1->id_ < e2->id_; + } +}; + +// Set of ready edges, sorted by priority +class EdgeQueue: + public std::priority_queue, EdgePriorityCompare>{ +public: + void clear() { + c.clear(); + } + + template + void push_multiple(It first, It last) { + for (; first != last; ++first) { + push(*first); + } + } +}; + #endif // NINJA_GRAPH_H_ -- cgit v0.12 From 6ee904948f25379d63941c1b23127705d9e0d9b0 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Mon, 7 Mar 2022 16:56:47 +0000 Subject: Remove unnecessary whitespace --- src/build.cc | 1 - src/build.h | 1 - 2 files changed, 2 deletions(-) diff --git a/src/build.cc b/src/build.cc index afe490d..d6be1c0 100644 --- a/src/build.cc +++ b/src/build.cc @@ -75,7 +75,6 @@ bool DryRunCommandRunner::WaitForCommand(Result* result) { } // namespace - Plan::Plan(Builder* builder) : builder_(builder) , command_edges_(0) diff --git a/src/build.h b/src/build.h index 652bc40..e91edbe 100644 --- a/src/build.h +++ b/src/build.h @@ -34,7 +34,6 @@ struct Node; struct State; struct Status; - /// Plan stores the state of a build plan: what we intend to build, /// which steps we're ready to execute. struct Plan { -- cgit v0.12 From 1128a56353cc596a86be4943be88c403663296f3 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Mon, 7 Mar 2022 17:36:38 +0000 Subject: Add simple test for EdgeQueue --- src/graph.h | 2 +- src/graph_test.cc | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/graph.h b/src/graph.h index 6e22f16..0604ea7 100644 --- a/src/graph.h +++ b/src/graph.h @@ -384,7 +384,7 @@ struct EdgePriorityCompare { if (ct1 != ct2) { return ct1 < ct2; } - return e1->id_ < e2->id_; + return e1->id_ > e2->id_; } }; diff --git a/src/graph_test.cc b/src/graph_test.cc index 5314bc5..97726ce 100644 --- a/src/graph_test.cc +++ b/src/graph_test.cc @@ -944,3 +944,56 @@ TEST_F(GraphTest, PhonyDepsMtimes) { EXPECT_EQ(out1->mtime(), out1Mtime1); EXPECT_TRUE(out1->dirty()); } + +// Test that EdgeQueue correctly prioritizes by critical time +TEST_F(GraphTest, EdgeQueuePriority) { + + ASSERT_NO_FATAL_FAILURE(AssertParse(&state_, +"rule r\n" +" command = unused\n" +"build out1: r in1\n" +"build out2: r in2\n" +"build out3: r in3\n" +)); + + const int n_edges = 3; + Edge *(edges)[n_edges] = { + GetNode("out1")->in_edge(), + GetNode("out2")->in_edge(), + GetNode("out3")->in_edge(), + }; + + // Output is largest critical time to smallest + for (int i = 0; i < n_edges; ++i) { + edges[i]->set_critical_time(i * 10); + } + + EdgeQueue queue; + for (int i = 0; i < n_edges; ++i) { + queue.push(edges[i]); + } + + EXPECT_EQ(queue.size(), n_edges); + for (int i = 0; i < n_edges; ++i) { + EXPECT_EQ(queue.top(), edges[n_edges - 1 - i]); + queue.pop(); + } + EXPECT_TRUE(queue.empty()); + + // When there is ambiguity, the lowest edge id comes first + for (int i = 0; i < n_edges; ++i) { + edges[i]->set_critical_time(0); + } + + queue.push(edges[1]); + queue.push(edges[2]); + queue.push(edges[0]); + + for (int i = 0; i < n_edges; ++i) { + EXPECT_EQ(queue.top(), edges[i]); + queue.pop(); + } + EXPECT_TRUE(queue.empty()); +} + + -- cgit v0.12 From a8611647de41d52a05ee9e85a767e59d1c923ce6 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Mon, 7 Mar 2022 20:42:53 +0000 Subject: Improve comments and retrieve edges into ready_queue directly --- src/build.cc | 24 ++++++++++-------------- src/build.h | 4 ++-- src/graph.h | 23 ++++++++++++----------- src/graph_test.cc | 2 +- src/state.cc | 4 ++-- src/state.h | 2 +- 6 files changed, 28 insertions(+), 31 deletions(-) diff --git a/src/build.cc b/src/build.cc index d6be1c0..de62a62 100644 --- a/src/build.cc +++ b/src/build.cc @@ -173,9 +173,7 @@ void Plan::ScheduleWork(map::iterator want_e) { Pool* pool = edge->pool(); if (pool->ShouldDelayEdge()) { pool->DelayEdge(edge); - EdgeSet new_edges; - pool->RetrieveReadyEdges(&new_edges); - ready_.push_multiple(new_edges.begin(), new_edges.end()); + pool->RetrieveReadyEdges(&ready_); } else { pool->EdgeScheduled(*edge); ready_.push(edge); @@ -190,9 +188,7 @@ bool Plan::EdgeFinished(Edge* edge, EdgeResult result, string* err) { // See if this job frees up any delayed jobs. if (directly_wanted) edge->pool()->EdgeFinished(*edge); - EdgeSet new_edges; - edge->pool()->RetrieveReadyEdges(&new_edges); - ready_.push_multiple(new_edges.begin(), new_edges.end()); + edge->pool()->RetrieveReadyEdges(&ready_); // The rest of this function only applies to successful commands. if (result != kEdgeSucceeded) @@ -541,10 +537,10 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { // Use backflow algorithm to compute critical times for all nodes, starting // from the destination nodes. // XXX: ignores pools - std::queue breadthFirstEdges; // Queue, for breadth-first traversal - std::set active_edges; // Set of in breadthFirstEdges + std::queue work_queue; // Queue, for breadth-first traversal + std::set active_edges; // Set of edges in work_queue SeenBefore seen_edge( - &active_edges); // Test for uniqueness in breadthFirstEdges + &active_edges); // Test for uniqueness in work_queue for (std::vector::reverse_iterator it = targets_.rbegin(), end = targets_.rend(); @@ -557,14 +553,14 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { priority_weight + std::max(in->run_time_ms_, in->critical_time())); if (!seen_edge(in)) { - breadthFirstEdges.push(in); + work_queue.push(in); } } } - while (!breadthFirstEdges.empty()) { - Edge* e = breadthFirstEdges.front(); - breadthFirstEdges.pop(); + while (!work_queue.empty()) { + Edge* e = work_queue.front(); + work_queue.pop(); active_edges.erase(e); for (std::vector::iterator it = e->inputs_.begin(), @@ -579,7 +575,7 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { if (proposed_time > in->critical_time()) { in->set_critical_time(proposed_time); if (!seen_edge(in)) { - breadthFirstEdges.push(in); + work_queue.push(in); } } } diff --git a/src/build.h b/src/build.h index e91edbe..0d7c01a 100644 --- a/src/build.h +++ b/src/build.h @@ -22,7 +22,7 @@ #include #include "depfile_parser.h" -#include "graph.h" // XXX needed for DependencyScan; should rearrange. +#include "graph.h" #include "exit_status.h" #include "util.h" // int64_t @@ -120,7 +120,7 @@ private: /// we want for the edge. std::map want_; - EdgeQueue ready_; + EdgePriorityQueue ready_; Builder* builder_; /// user provided targets in build order, earlier one have higher priority diff --git a/src/graph.h b/src/graph.h index 0604ea7..4b45dad 100644 --- a/src/graph.h +++ b/src/graph.h @@ -197,7 +197,7 @@ struct Edge { void Dump(const char* prefix="") const; - // Critical time is the estimated exection time in ms of the edges + // Critical time is the estimated execution time in ms of the edges // forming the longest time-weighted path to the target output. // This quantity is used as a priority during build scheduling. // NOTE: Defaults to -1 as a marker smaller than any valid time @@ -376,7 +376,13 @@ struct DependencyScan { DyndepLoader dyndep_loader_; }; -// Prioritize edges by largest critical time first +// Implements a less comarison for edges by priority, where highest +// priority is defined lexicographically first by largest critical +// time, then lowest ID. +// +// Including ID means that wherever the critical times are the same, +// the edges are executed in ascending ID order which was historically +// how all tasks were scheduled. struct EdgePriorityCompare { bool operator()(const Edge* e1, const Edge* e2) const { const int64_t ct1 = e1->critical_time(); @@ -388,20 +394,15 @@ struct EdgePriorityCompare { } }; -// Set of ready edges, sorted by priority -class EdgeQueue: +// A priority queue holding non-owning Edge pointers. top() will +// return the edge with the largest critical time, and lowest ID if +// more than one edge has the same critical time. +class EdgePriorityQueue: public std::priority_queue, EdgePriorityCompare>{ public: void clear() { c.clear(); } - - template - void push_multiple(It first, It last) { - for (; first != last; ++first) { - push(*first); - } - } }; #endif // NINJA_GRAPH_H_ diff --git a/src/graph_test.cc b/src/graph_test.cc index 97726ce..d657387 100644 --- a/src/graph_test.cc +++ b/src/graph_test.cc @@ -968,7 +968,7 @@ TEST_F(GraphTest, EdgeQueuePriority) { edges[i]->set_critical_time(i * 10); } - EdgeQueue queue; + EdgePriorityQueue queue; for (int i = 0; i < n_edges; ++i) { queue.push(edges[i]); } diff --git a/src/state.cc b/src/state.cc index 556b0d8..e194519 100644 --- a/src/state.cc +++ b/src/state.cc @@ -38,13 +38,13 @@ void Pool::DelayEdge(Edge* edge) { delayed_.insert(edge); } -void Pool::RetrieveReadyEdges(EdgeSet* ready_queue) { +void Pool::RetrieveReadyEdges(EdgePriorityQueue* ready_queue) { DelayedEdges::iterator it = delayed_.begin(); while (it != delayed_.end()) { Edge* edge = *it; if (current_use_ + edge->weight() > depth_) break; - ready_queue->insert(edge); + ready_queue->push(edge); EdgeScheduled(*edge); ++it; } diff --git a/src/state.h b/src/state.h index 878ac6d..2b0fa90 100644 --- a/src/state.h +++ b/src/state.h @@ -62,7 +62,7 @@ struct Pool { void DelayEdge(Edge* edge); /// Pool will add zero or more edges to the ready_queue - void RetrieveReadyEdges(EdgeSet* ready_queue); + void RetrieveReadyEdges(EdgePriorityQueue* ready_queue); /// Dump the Pool and its edges (useful for debugging). void Dump() const; -- cgit v0.12 From 026498fb36b67518501eec2edc66ddcd64f64b1f Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Mon, 7 Mar 2022 21:31:07 +0000 Subject: Add run_time_ms accessors and more comments --- src/build.cc | 28 ++++++++++++++-------------- src/graph.h | 8 ++++++++ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/build.cc b/src/build.cc index de62a62..ec71bf3 100644 --- a/src/build.cc +++ b/src/build.cc @@ -452,7 +452,7 @@ struct SeenBefore { } }; -// Assign run_time_ms_ for all wanted edges, and returns total time for all edges +// Assign run_time_ms for all wanted edges, and returns total time for all edges // For phony edges, 0 cost. // For edges with a build history, use the last build time. // For edges without history, use the 75th percentile time for edges with history. @@ -462,6 +462,7 @@ int64_t AssignEdgeRuntime(BuildLog* build_log, bool missing_durations = false; std::vector durations; int64_t total_time = 0; + const int64_t kUnknownRunTime = -1; // marker value for the two loops below. for (std::map::const_iterator it = want.begin(), end = want.end(); @@ -474,11 +475,11 @@ int64_t AssignEdgeRuntime(BuildLog* build_log, build_log->LookupByOutput(edge->outputs_[0]->path()); if (!entry) { missing_durations = true; - edge->run_time_ms_ = -1; // -1 to mark as needing filled in + edge->set_run_time_ms(kUnknownRunTime); // mark as needing filled in continue; } const int64_t duration = entry->end_time - entry->start_time; - edge->run_time_ms_ = duration; + edge->set_run_time_ms(duration); total_time += duration; durations.push_back(duration); } @@ -504,10 +505,10 @@ int64_t AssignEdgeRuntime(BuildLog* build_log, end = want.end(); it != end; ++it) { Edge* edge = it->first; - if (edge->run_time_ms_ >= 0) { + if (edge->run_time_ms() != kUnknownRunTime) { continue; } - edge->run_time_ms_ = p75_time; + edge->set_run_time_ms(p75_time); total_time += p75_time; } return total_time; @@ -542,16 +543,15 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { SeenBefore seen_edge( &active_edges); // Test for uniqueness in work_queue - for (std::vector::reverse_iterator it = targets_.rbegin(), - end = targets_.rend(); - it != end; ++it) { - if (Edge* in = (*it)->in_edge()) { - // Use initial critical time: total_time * N. This means higher - // priority targets always get a higher critical time value - int64_t priority_weight = (it - targets_.rbegin()) * total_time; + for (size_t i = 0; i < targets_.size(); ++i) { + const Node* target = targets_[i]; + if (Edge* in = target->in_edge()) { + // Add a bias to ensure that targets that appear first in |targets_| have a larger critical time than + // those that follow them. E.g. for 3 targets: [2*total_time, total_time, 0]. + int64_t priority_weight = (targets_.size() - i - 1) * total_time; in->set_critical_time( priority_weight + - std::max(in->run_time_ms_, in->critical_time())); + std::max(in->run_time_ms(), in->critical_time())); if (!seen_edge(in)) { work_queue.push(in); } @@ -571,7 +571,7 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { continue; } // Only process edge if this node offers a higher critical time - const int64_t proposed_time = e->critical_time() + in->run_time_ms_; + const int64_t proposed_time = e->critical_time() + in->run_time_ms(); if (proposed_time > in->critical_time()) { in->set_critical_time(proposed_time); if (!seen_edge(in)) { diff --git a/src/graph.h b/src/graph.h index 4b45dad..728cdc8 100644 --- a/src/graph.h +++ b/src/graph.h @@ -206,6 +206,14 @@ struct Edge { critical_time_ = critical_time; } + // Run time in ms for this edge's command. + // Taken from the build log if present, or estimated otherwise. + // Default initialized to 0. + int64_t run_time_ms() const { return run_time_ms_; } + void set_run_time_ms(int64_t run_time_ms) { + run_time_ms_ = run_time_ms; + } + const Rule* rule_; Pool* pool_; std::vector inputs_; -- cgit v0.12 From 4bd8db1fa8287ec0b828e98a7d2bcdf3d6b1904f Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Tue, 8 Mar 2022 01:40:43 +0000 Subject: Add test and fix priority bug AddTarget cannot add edges to the ready queue before the critical time has been computed. --- src/build.cc | 69 +++++++++++++++++--- src/build.h | 9 ++- src/build_test.cc | 190 ++++++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 231 insertions(+), 37 deletions(-) diff --git a/src/build.cc b/src/build.cc index ec71bf3..1f1e153 100644 --- a/src/build.cc +++ b/src/build.cc @@ -124,8 +124,6 @@ bool Plan::AddSubTarget(const Node* node, const Node* dependent, string* err, if (node->dirty() && want == kWantNothing) { want = kWantToStart; EdgeWanted(edge); - if (!dyndep_walk && edge->AllInputsReady()) - ScheduleWork(want_ins.first); } if (dyndep_walk) @@ -514,14 +512,27 @@ int64_t AssignEdgeRuntime(BuildLog* build_log, return total_time; } +int64_t AssignDefaultEdgeRuntime(std::map &want) { + int64_t total_time = 0; + + for (std::map::const_iterator it = want.begin(), + end = want.end(); + it != end; ++it) { + Edge* edge = it->first; + if (edge->is_phony()) { + continue; + } + + edge->set_run_time_ms(1); + ++total_time; + } + return total_time; +} + } // namespace void Plan::ComputeCriticalTime(BuildLog* build_log) { - // testcases have no build_log - if (!build_log) - return; - - METRIC_RECORD("ComputePriorityList"); + METRIC_RECORD("ComputeCriticalTime"); // Remove duplicate targets { std::set seen; @@ -533,7 +544,10 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { // total time if building all edges in serial. This value is big // enough to ensure higher priority target's initial critical time // is always bigger than lower ones - int64_t total_time = AssignEdgeRuntime(build_log, want_); + const int64_t total_time = build_log ? + AssignEdgeRuntime(build_log, want_) : + AssignDefaultEdgeRuntime(want_); // Plan tests have no build_log + // Use backflow algorithm to compute critical times for all nodes, starting // from the destination nodes. @@ -582,6 +596,42 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { } } +void Plan::ScheduleInitialEdges() { + // Add ready edges to queue. + assert(ready_.empty()); + std::set pools; + + for (std::map::iterator it = want_.begin(), + end = want_.end(); it != end; ++it) { + Edge* edge = it->first; + Plan::Want want = it->second; + if (!(want == kWantToStart && edge->AllInputsReady())) { + continue; + } + + Pool* pool = edge->pool(); + if (pool->ShouldDelayEdge()) { + pool->DelayEdge(edge); + pools.insert(pool); + } else { + ScheduleWork(it); + } + } + + // Call RetrieveReadyEdges only once at the end so higher priority + // edges are retrieved first, not the ones that happen to be first + // in the want_ map. + for (std::set::iterator it=pools.begin(), + end = pools.end(); it != end; ++it) { + (*it)->RetrieveReadyEdges(&ready_); + } +} + +void Plan::PrepareQueue(BuildLog* build_log) { + ComputeCriticalTime(build_log); + ScheduleInitialEdges(); +} + void Plan::Dump() const { printf("pending: %d\n", (int)want_.size()); for (map::const_iterator e = want_.begin(); e != want_.end(); ++e) { @@ -743,8 +793,7 @@ bool Builder::AlreadyUpToDate() const { bool Builder::Build(string* err) { assert(!AlreadyUpToDate()); - - plan_.ComputeCriticalTime(scan_.build_log()); + plan_.PrepareQueue(scan_.build_log()); status_->PlanHasTotalEdges(plan_.command_edge_count()); int pending_commands = 0; diff --git a/src/build.h b/src/build.h index 0d7c01a..7719d9a 100644 --- a/src/build.h +++ b/src/build.h @@ -74,7 +74,9 @@ struct Plan { /// Reset state. Clears want and ready sets. void Reset(); - void ComputeCriticalTime(BuildLog* build_log); + + // After all targets have been added, prepares the ready queue for find work. + void PrepareQueue(BuildLog* build_log); /// Update the build plan to account for modifications made to the graph /// by information loaded from a dyndep file. @@ -95,11 +97,16 @@ struct Plan { }; private: + void ComputeCriticalTime(BuildLog* build_log); bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err); void UnmarkDependents(const Node* node, std::set* dependents); bool AddSubTarget(const Node* node, const Node* dependent, std::string* err, std::set* dyndep_walk); + // Add edges that kWantToStart into the ready queue + // Must be called after ComputeCriticalTime and before FindWork + void ScheduleInitialEdges(); + /// Update plan with knowledge that the given node is up to date. /// If the node is a dyndep binding on any of its dependents, this /// loads dynamic dependencies from the node's path. diff --git a/src/build_test.cc b/src/build_test.cc index 4ef62b2..50a5394 100644 --- a/src/build_test.cc +++ b/src/build_test.cc @@ -50,6 +50,14 @@ struct PlanTest : public StateTestWithBuiltinRules { sort(ret->begin(), ret->end(), CompareEdgesByOutput::cmp); } + void PrepareForTarget(const char* node, BuildLog *log=NULL) { + string err; + EXPECT_TRUE(plan_.AddTarget(GetNode(node), &err)); + ASSERT_EQ("", err); + plan_.PrepareQueue(log); + ASSERT_TRUE(plan_.more_to_do()); + } + void TestPoolWithDepthOne(const char *test_case); }; @@ -59,10 +67,7 @@ TEST_F(PlanTest, Basic) { "build mid: cat in\n")); GetNode("mid")->MarkDirty(); GetNode("out")->MarkDirty(); - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("out"), &err)); - ASSERT_EQ("", err); - ASSERT_TRUE(plan_.more_to_do()); + PrepareForTarget("out"); Edge* edge = plan_.FindWork(); ASSERT_TRUE(edge); @@ -71,6 +76,7 @@ TEST_F(PlanTest, Basic) { ASSERT_FALSE(plan_.FindWork()); + string err; plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); @@ -95,15 +101,12 @@ TEST_F(PlanTest, DoubleOutputDirect) { GetNode("mid1")->MarkDirty(); GetNode("mid2")->MarkDirty(); GetNode("out")->MarkDirty(); - - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("out"), &err)); - ASSERT_EQ("", err); - ASSERT_TRUE(plan_.more_to_do()); + PrepareForTarget("out"); Edge* edge; edge = plan_.FindWork(); ASSERT_TRUE(edge); // cat in + string err; plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); @@ -128,14 +131,12 @@ TEST_F(PlanTest, DoubleOutputIndirect) { GetNode("b1")->MarkDirty(); GetNode("b2")->MarkDirty(); GetNode("out")->MarkDirty(); - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("out"), &err)); - ASSERT_EQ("", err); - ASSERT_TRUE(plan_.more_to_do()); + PrepareForTarget("out"); Edge* edge; edge = plan_.FindWork(); ASSERT_TRUE(edge); // cat in + string err; plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); @@ -169,15 +170,12 @@ TEST_F(PlanTest, DoubleDependent) { GetNode("a1")->MarkDirty(); GetNode("a2")->MarkDirty(); GetNode("out")->MarkDirty(); - - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("out"), &err)); - ASSERT_EQ("", err); - ASSERT_TRUE(plan_.more_to_do()); + PrepareForTarget("out"); Edge* edge; edge = plan_.FindWork(); ASSERT_TRUE(edge); // cat in + string err; plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); @@ -209,6 +207,7 @@ void PlanTest::TestPoolWithDepthOne(const char* test_case) { ASSERT_EQ("", err); EXPECT_TRUE(plan_.AddTarget(GetNode("out2"), &err)); ASSERT_EQ("", err); + plan_.PrepareQueue(NULL); ASSERT_TRUE(plan_.more_to_do()); Edge* edge = plan_.FindWork(); @@ -284,10 +283,7 @@ TEST_F(PlanTest, PoolsWithDepthTwo) { GetNode("outb" + string(1, '1' + static_cast(i)))->MarkDirty(); } GetNode("allTheThings")->MarkDirty(); - - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("allTheThings"), &err)); - ASSERT_EQ("", err); + PrepareForTarget("allTheThings"); deque edges; FindWorkSorted(&edges, 5); @@ -306,6 +302,7 @@ TEST_F(PlanTest, PoolsWithDepthTwo) { ASSERT_EQ("outb3", edge->outputs_[0]->path()); // finish out1 + string err; plan_.EdgeFinished(edges.front(), Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); edges.pop_front(); @@ -363,10 +360,7 @@ TEST_F(PlanTest, PoolWithRedundantEdges) { GetNode("bar.cpp.obj")->MarkDirty(); GetNode("libfoo.a")->MarkDirty(); GetNode("all")->MarkDirty(); - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("all"), &err)); - ASSERT_EQ("", err); - ASSERT_TRUE(plan_.more_to_do()); + PrepareForTarget("all"); Edge* edge = NULL; @@ -375,6 +369,7 @@ TEST_F(PlanTest, PoolWithRedundantEdges) { edge = initial_edges[1]; // Foo first ASSERT_EQ("foo.cpp", edge->outputs_[0]->path()); + string err; plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); @@ -439,6 +434,7 @@ TEST_F(PlanTest, PoolWithFailingEdge) { ASSERT_EQ("", err); EXPECT_TRUE(plan_.AddTarget(GetNode("out2"), &err)); ASSERT_EQ("", err); + plan_.PrepareQueue(NULL); ASSERT_TRUE(plan_.more_to_do()); Edge* edge = plan_.FindWork(); @@ -467,6 +463,148 @@ TEST_F(PlanTest, PoolWithFailingEdge) { ASSERT_EQ(0, edge); } +TEST_F(PlanTest, PriorityWithoutBuildLog) { + // Without a build log, the critical time is equivalent to graph + // depth. Test with the following graph: + // a2 + // | + // a1 b1 + // | | | + // a0 b0 c0 + // \ | / + // out + + ASSERT_NO_FATAL_FAILURE(AssertParse(&state_, + "rule r\n" + " command = unused\n" + "build out: r a0 b0 c0\n" + "build a0: r a1\n" + "build a1: r a2\n" + "build b0: r b1\n" + "build c0: r b1\n" + )); + GetNode("a1")->MarkDirty(); + GetNode("a0")->MarkDirty(); + GetNode("b0")->MarkDirty(); + GetNode("c0")->MarkDirty(); + GetNode("out")->MarkDirty(); + BuildLog log; + PrepareForTarget("out", &log); + + EXPECT_EQ(GetNode("out")->in_edge()->critical_time(), 1); + EXPECT_EQ(GetNode("a0")->in_edge()->critical_time(), 2); + EXPECT_EQ(GetNode("b0")->in_edge()->critical_time(), 2); + EXPECT_EQ(GetNode("c0")->in_edge()->critical_time(), 2); + EXPECT_EQ(GetNode("a1")->in_edge()->critical_time(), 3); + + const int n_edges = 5; + const char *expected_order[n_edges] = { + "a1", "a0", "b0", "c0", "out"}; + for (int i = 0; i < n_edges; ++i) { + Edge* edge = plan_.FindWork(); + ASSERT_NE(edge, NULL); + EXPECT_EQ(expected_order[i], edge->outputs_[0]->path()); + + std::string err; + ASSERT_TRUE(plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err)); + EXPECT_EQ(err, ""); + } + + EXPECT_FALSE(plan_.FindWork()); +} + +TEST_F(PlanTest, PriorityWithBuildLog) { + // With a build log, the critical time is longest weighted path. + // Test with the following graph: + // a2 + // | + // a1 b1 + // | | | + // a0 b0 c0 + // \ | / + // out + + ASSERT_NO_FATAL_FAILURE(AssertParse(&state_, + "rule r\n" + " command = unused\n" + "build out: r a0 b0 c0\n" + "build a0: r a1\n" + "build a1: r a2\n" + "build b0: r b1\n" + "build c0: r b1\n" + )); + GetNode("a1")->MarkDirty(); + GetNode("a0")->MarkDirty(); + GetNode("b0")->MarkDirty(); + GetNode("c0")->MarkDirty(); + GetNode("out")->MarkDirty(); + + BuildLog log; + log.RecordCommand(GetNode("out")->in_edge(), 0, 100); // time = 100 + log.RecordCommand(GetNode("a0")->in_edge(), 10, 20); // time = 10 + log.RecordCommand(GetNode("a1")->in_edge(), 20, 40); // time = 20 + log.RecordCommand(GetNode("b0")->in_edge(), 10, 30); // time = 20 + log.RecordCommand(GetNode("c0")->in_edge(), 20, 70); // time = 50 + + PrepareForTarget("out", &log); + + EXPECT_EQ(GetNode("out")->in_edge()->critical_time(), 100); + EXPECT_EQ(GetNode("a0")->in_edge()->critical_time(), 110); + EXPECT_EQ(GetNode("b0")->in_edge()->critical_time(), 120); + EXPECT_EQ(GetNode("c0")->in_edge()->critical_time(), 150); + EXPECT_EQ(GetNode("a1")->in_edge()->critical_time(), 130); + + const int n_edges = 5; + const char *expected_order[n_edges] = { + "c0", "a1", "b0", "a0", "out"}; + for (int i = 0; i < n_edges; ++i) { + Edge* edge = plan_.FindWork(); + ASSERT_NE(edge, NULL); + EXPECT_EQ(expected_order[i], edge->outputs_[0]->path()); + + std::string err; + ASSERT_TRUE(plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err)); + EXPECT_EQ(err, ""); + } + EXPECT_FALSE(plan_.FindWork()); +} + +TEST_F(PlanTest, RuntimePartialBuildLog) { + // Test the edge->run_time_ms() estimate when no build log is available + + ASSERT_NO_FATAL_FAILURE(AssertParse(&state_, + "rule r\n" + " command = unused\n" + "build out: r a0 b0 c0 d0\n" + "build a0: r a1\n" + "build b0: r b1\n" + "build c0: r c1\n" + "build d0: r d1\n" + )); + GetNode("a0")->MarkDirty(); + GetNode("b0")->MarkDirty(); + GetNode("c0")->MarkDirty(); + GetNode("d0")->MarkDirty(); + GetNode("out")->MarkDirty(); + + BuildLog log; + log.RecordCommand(GetNode("out")->in_edge(), 0, 100); // time = 40 + log.RecordCommand(GetNode("a0")->in_edge(), 10, 20); // time = 10 + log.RecordCommand(GetNode("b0")->in_edge(), 20, 40); // time = 20 + log.RecordCommand(GetNode("c0")->in_edge(), 10, 40); // time = 30 + + PrepareForTarget("out", &log); + + // These edges times are read from the build log + EXPECT_EQ(GetNode("out")->in_edge()->run_time_ms(), 100); + EXPECT_EQ(GetNode("a0")->in_edge()->run_time_ms(), 10); + EXPECT_EQ(GetNode("b0")->in_edge()->run_time_ms(), 20); + EXPECT_EQ(GetNode("c0")->in_edge()->run_time_ms(), 30); + + // The missing data is taken from the 3rd quintile of known data + EXPECT_EQ(GetNode("d0")->in_edge()->run_time_ms(), 30); +} + /// Fake implementation of CommandRunner, useful for tests. struct FakeCommandRunner : public CommandRunner { explicit FakeCommandRunner(VirtualFileSystem* fs) : -- cgit v0.12 From a643af2207f8f090d2a7aefeec954fcd2725c39c Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Tue, 8 Mar 2022 02:44:03 +0000 Subject: Pool: sort equally-weighted edges by priority --- src/graph.h | 13 ++++++++++--- src/state.h | 5 ++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/graph.h b/src/graph.h index 728cdc8..4fc34ca 100644 --- a/src/graph.h +++ b/src/graph.h @@ -384,14 +384,14 @@ struct DependencyScan { DyndepLoader dyndep_loader_; }; -// Implements a less comarison for edges by priority, where highest +// Implements a less comparison for edges by priority, where highest // priority is defined lexicographically first by largest critical // time, then lowest ID. // // Including ID means that wherever the critical times are the same, // the edges are executed in ascending ID order which was historically // how all tasks were scheduled. -struct EdgePriorityCompare { +struct EdgePriorityLess { bool operator()(const Edge* e1, const Edge* e2) const { const int64_t ct1 = e1->critical_time(); const int64_t ct2 = e2->critical_time(); @@ -402,11 +402,18 @@ struct EdgePriorityCompare { } }; +// Reverse of EdgePriorityLess, e.g. to sort by highest priority first +struct EdgePriorityGreater { + bool operator()(const Edge* e1, const Edge* e2) const { + return EdgePriorityLess()(e2, e1); + } +}; + // A priority queue holding non-owning Edge pointers. top() will // return the edge with the largest critical time, and lowest ID if // more than one edge has the same critical time. class EdgePriorityQueue: - public std::priority_queue, EdgePriorityCompare>{ + public std::priority_queue, EdgePriorityLess>{ public: void clear() { c.clear(); diff --git a/src/state.h b/src/state.h index 2b0fa90..05fb50e 100644 --- a/src/state.h +++ b/src/state.h @@ -80,7 +80,10 @@ struct Pool { if (!a) return b; if (!b) return false; int weight_diff = a->weight() - b->weight(); - return ((weight_diff < 0) || (weight_diff == 0 && EdgeCmp()(a, b))); + if (weight_diff != 0) { + return weight_diff < 0; + } + return EdgePriorityGreater()(a, b); } }; -- cgit v0.12 From f2333b706a080389583e5e355fd339a37089fe2c Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Tue, 8 Mar 2022 04:52:53 +0000 Subject: Rename critical_time to critical_time_ms --- src/build.cc | 10 +++++----- src/build_test.cc | 20 ++++++++++---------- src/graph.h | 14 +++++++------- src/graph_test.cc | 4 ++-- 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/build.cc b/src/build.cc index 1f1e153..04be16c 100644 --- a/src/build.cc +++ b/src/build.cc @@ -563,9 +563,9 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { // Add a bias to ensure that targets that appear first in |targets_| have a larger critical time than // those that follow them. E.g. for 3 targets: [2*total_time, total_time, 0]. int64_t priority_weight = (targets_.size() - i - 1) * total_time; - in->set_critical_time( + in->set_critical_time_ms( priority_weight + - std::max(in->run_time_ms(), in->critical_time())); + std::max(in->run_time_ms(), in->critical_time_ms())); if (!seen_edge(in)) { work_queue.push(in); } @@ -585,9 +585,9 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { continue; } // Only process edge if this node offers a higher critical time - const int64_t proposed_time = e->critical_time() + in->run_time_ms(); - if (proposed_time > in->critical_time()) { - in->set_critical_time(proposed_time); + const int64_t proposed_time = e->critical_time_ms() + in->run_time_ms(); + if (proposed_time > in->critical_time_ms()) { + in->set_critical_time_ms(proposed_time); if (!seen_edge(in)) { work_queue.push(in); } diff --git a/src/build_test.cc b/src/build_test.cc index 50a5394..e8518b4 100644 --- a/src/build_test.cc +++ b/src/build_test.cc @@ -491,11 +491,11 @@ TEST_F(PlanTest, PriorityWithoutBuildLog) { BuildLog log; PrepareForTarget("out", &log); - EXPECT_EQ(GetNode("out")->in_edge()->critical_time(), 1); - EXPECT_EQ(GetNode("a0")->in_edge()->critical_time(), 2); - EXPECT_EQ(GetNode("b0")->in_edge()->critical_time(), 2); - EXPECT_EQ(GetNode("c0")->in_edge()->critical_time(), 2); - EXPECT_EQ(GetNode("a1")->in_edge()->critical_time(), 3); + EXPECT_EQ(GetNode("out")->in_edge()->critical_time_ms(), 1); + EXPECT_EQ(GetNode("a0")->in_edge()->critical_time_ms(), 2); + EXPECT_EQ(GetNode("b0")->in_edge()->critical_time_ms(), 2); + EXPECT_EQ(GetNode("c0")->in_edge()->critical_time_ms(), 2); + EXPECT_EQ(GetNode("a1")->in_edge()->critical_time_ms(), 3); const int n_edges = 5; const char *expected_order[n_edges] = { @@ -548,11 +548,11 @@ TEST_F(PlanTest, PriorityWithBuildLog) { PrepareForTarget("out", &log); - EXPECT_EQ(GetNode("out")->in_edge()->critical_time(), 100); - EXPECT_EQ(GetNode("a0")->in_edge()->critical_time(), 110); - EXPECT_EQ(GetNode("b0")->in_edge()->critical_time(), 120); - EXPECT_EQ(GetNode("c0")->in_edge()->critical_time(), 150); - EXPECT_EQ(GetNode("a1")->in_edge()->critical_time(), 130); + EXPECT_EQ(GetNode("out")->in_edge()->critical_time_ms(), 100); + EXPECT_EQ(GetNode("a0")->in_edge()->critical_time_ms(), 110); + EXPECT_EQ(GetNode("b0")->in_edge()->critical_time_ms(), 120); + EXPECT_EQ(GetNode("c0")->in_edge()->critical_time_ms(), 150); + EXPECT_EQ(GetNode("a1")->in_edge()->critical_time_ms(), 130); const int n_edges = 5; const char *expected_order[n_edges] = { diff --git a/src/graph.h b/src/graph.h index 4fc34ca..0fab807 100644 --- a/src/graph.h +++ b/src/graph.h @@ -171,7 +171,7 @@ struct Edge { Edge() : rule_(NULL), pool_(NULL), dyndep_(NULL), env_(NULL), mark_(VisitNone), - id_(0), run_time_ms_(0), critical_time_(-1), outputs_ready_(false), + id_(0), run_time_ms_(0), critical_time_ms_(-1), outputs_ready_(false), deps_loaded_(false), deps_missing_(false), generated_by_dep_loader_(false), implicit_deps_(0), order_only_deps_(0), implicit_outs_(0) {} @@ -201,9 +201,9 @@ struct Edge { // forming the longest time-weighted path to the target output. // This quantity is used as a priority during build scheduling. // NOTE: Defaults to -1 as a marker smaller than any valid time - int64_t critical_time() const { return critical_time_; } - void set_critical_time(int64_t critical_time) { - critical_time_ = critical_time; + int64_t critical_time_ms() const { return critical_time_ms_; } + void set_critical_time_ms(int64_t critical_time_ms) { + critical_time_ms_ = critical_time_ms; } // Run time in ms for this edge's command. @@ -224,7 +224,7 @@ struct Edge { VisitMark mark_; size_t id_; int64_t run_time_ms_; - int64_t critical_time_; + int64_t critical_time_ms_; bool outputs_ready_; bool deps_loaded_; bool deps_missing_; @@ -393,8 +393,8 @@ struct DependencyScan { // how all tasks were scheduled. struct EdgePriorityLess { bool operator()(const Edge* e1, const Edge* e2) const { - const int64_t ct1 = e1->critical_time(); - const int64_t ct2 = e2->critical_time(); + const int64_t ct1 = e1->critical_time_ms(); + const int64_t ct2 = e2->critical_time_ms(); if (ct1 != ct2) { return ct1 < ct2; } diff --git a/src/graph_test.cc b/src/graph_test.cc index d657387..c7efec3 100644 --- a/src/graph_test.cc +++ b/src/graph_test.cc @@ -965,7 +965,7 @@ TEST_F(GraphTest, EdgeQueuePriority) { // Output is largest critical time to smallest for (int i = 0; i < n_edges; ++i) { - edges[i]->set_critical_time(i * 10); + edges[i]->set_critical_time_ms(i * 10); } EdgePriorityQueue queue; @@ -982,7 +982,7 @@ TEST_F(GraphTest, EdgeQueuePriority) { // When there is ambiguity, the lowest edge id comes first for (int i = 0; i < n_edges; ++i) { - edges[i]->set_critical_time(0); + edges[i]->set_critical_time_ms(0); } queue.push(edges[1]); -- cgit v0.12 From 09d4faa0119a5d45911fd5a5b1703e152b040d9a Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Tue, 8 Mar 2022 05:04:43 +0000 Subject: Clarify the purpose of active_edges in back-propagation --- src/build.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/build.cc b/src/build.cc index 04be16c..9c0dbc2 100644 --- a/src/build.cc +++ b/src/build.cc @@ -553,9 +553,9 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { // from the destination nodes. // XXX: ignores pools std::queue work_queue; // Queue, for breadth-first traversal - std::set active_edges; // Set of edges in work_queue - SeenBefore seen_edge( - &active_edges); // Test for uniqueness in work_queue + // The set of edges currently in work_queue, to avoid duplicates. + std::set active_edges; + SeenBefore seen_edge(&active_edges); for (size_t i = 0; i < targets_.size(); ++i) { const Node* target = targets_[i]; @@ -575,6 +575,8 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { while (!work_queue.empty()) { Edge* e = work_queue.front(); work_queue.pop(); + // If the critical time of any dependent edges is updated, this + // edge may need to be processed again. So re-allow insertion. active_edges.erase(e); for (std::vector::iterator it = e->inputs_.begin(), -- cgit v0.12 From 29fe3ef1faefa554bc9490716071e969e84774db Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Wed, 10 Aug 2022 17:29:50 +0100 Subject: Simplify scheduler to not use build log/execution time --- src/build.cc | 123 +++++++++--------------------------------------------- src/build.h | 6 +-- src/build_test.cc | 108 ++++------------------------------------------- src/graph.h | 45 ++++++++------------ src/graph_test.cc | 4 +- 5 files changed, 51 insertions(+), 235 deletions(-) diff --git a/src/build.cc b/src/build.cc index 9ace8c9..c3dc558 100644 --- a/src/build.cc +++ b/src/build.cc @@ -450,89 +450,16 @@ struct SeenBefore { } }; -// Assign run_time_ms for all wanted edges, and returns total time for all edges -// For phony edges, 0 cost. -// For edges with a build history, use the last build time. -// For edges without history, use the 75th percentile time for edges with history. -// Or, if there is no history at all just use 1 -int64_t AssignEdgeRuntime(BuildLog* build_log, - const std::map& want) { - bool missing_durations = false; - std::vector durations; - int64_t total_time = 0; - const int64_t kUnknownRunTime = -1; // marker value for the two loops below. - - for (std::map::const_iterator it = want.begin(), - end = want.end(); - it != end; ++it) { - Edge* edge = it->first; - if (edge->is_phony()) { - continue; - } - BuildLog::LogEntry* entry = - build_log->LookupByOutput(edge->outputs_[0]->path()); - if (!entry) { - missing_durations = true; - edge->set_run_time_ms(kUnknownRunTime); // mark as needing filled in - continue; - } - const int64_t duration = entry->end_time - entry->start_time; - edge->set_run_time_ms(duration); - total_time += duration; - durations.push_back(duration); - } - - if (!missing_durations) { - return total_time; - } - - // Heuristic: for unknown edges, take the 75th percentile time. - // This allows the known-slowest jobs to run first, but isn't so - // small that it is always the lowest priority. Which for slow jobs, - // might bottleneck the build. - int64_t p75_time = 1; - int64_t num_durations = static_cast(durations.size()); - if (num_durations > 0) { - size_t p75_idx = (num_durations - 1) - num_durations / 4; - std::vector::iterator p75_it = durations.begin() + p75_idx; - std::nth_element(durations.begin(), p75_it, durations.end()); - p75_time = *p75_it; - } - - for (std::map::const_iterator it = want.begin(), - end = want.end(); - it != end; ++it) { - Edge* edge = it->first; - if (edge->run_time_ms() != kUnknownRunTime) { - continue; - } - edge->set_run_time_ms(p75_time); - total_time += p75_time; - } - return total_time; -} - -int64_t AssignDefaultEdgeRuntime(std::map &want) { - int64_t total_time = 0; - - for (std::map::const_iterator it = want.begin(), - end = want.end(); - it != end; ++it) { - Edge* edge = it->first; - if (edge->is_phony()) { - continue; - } - - edge->set_run_time_ms(1); - ++total_time; - } - return total_time; +// Heuristic for edge priority weighting. +// Phony edges are free (0 cost), all other edges are weighted equally. +int64_t EdgeWeightHeuristic(Edge *edge) { + return edge->is_phony() ? 0 : 1; } } // namespace -void Plan::ComputeCriticalTime(BuildLog* build_log) { - METRIC_RECORD("ComputeCriticalTime"); +void Plan::ComputeCriticalPath() { + METRIC_RECORD("ComputeCriticalPath"); // Remove duplicate targets { std::set seen; @@ -541,16 +468,8 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { targets_.end()); } - // total time if building all edges in serial. This value is big - // enough to ensure higher priority target's initial critical time - // is always bigger than lower ones - const int64_t total_time = build_log ? - AssignEdgeRuntime(build_log, want_) : - AssignDefaultEdgeRuntime(want_); // Plan tests have no build_log - - - // Use backflow algorithm to compute critical times for all nodes, starting - // from the destination nodes. + // Use backflow algorithm to compute the critical path for all + // nodes, starting from the destination nodes. // XXX: ignores pools std::queue work_queue; // Queue, for breadth-first traversal // The set of edges currently in work_queue, to avoid duplicates. @@ -560,12 +479,9 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { for (size_t i = 0; i < targets_.size(); ++i) { const Node* target = targets_[i]; if (Edge* in = target->in_edge()) { - // Add a bias to ensure that targets that appear first in |targets_| have a larger critical time than - // those that follow them. E.g. for 3 targets: [2*total_time, total_time, 0]. - int64_t priority_weight = (targets_.size() - i - 1) * total_time; - in->set_critical_time_ms( - priority_weight + - std::max(in->run_time_ms(), in->critical_time_ms())); + int64_t edge_weight = EdgeWeightHeuristic(in); + in->set_critical_path_weight( + std::max(edge_weight, in->critical_path_weight())); if (!seen_edge(in)) { work_queue.push(in); } @@ -575,7 +491,7 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { while (!work_queue.empty()) { Edge* e = work_queue.front(); work_queue.pop(); - // If the critical time of any dependent edges is updated, this + // If the critical path of any dependent edges is updated, this // edge may need to be processed again. So re-allow insertion. active_edges.erase(e); @@ -586,10 +502,11 @@ void Plan::ComputeCriticalTime(BuildLog* build_log) { if (!in) { continue; } - // Only process edge if this node offers a higher critical time - const int64_t proposed_time = e->critical_time_ms() + in->run_time_ms(); - if (proposed_time > in->critical_time_ms()) { - in->set_critical_time_ms(proposed_time); + // Only process edge if this node offers a higher weighted path + const int64_t edge_weight = EdgeWeightHeuristic(in); + const int64_t proposed_weight = e->critical_path_weight() + edge_weight; + if (proposed_weight > in->critical_path_weight()) { + in->set_critical_path_weight(proposed_weight); if (!seen_edge(in)) { work_queue.push(in); } @@ -629,8 +546,8 @@ void Plan::ScheduleInitialEdges() { } } -void Plan::PrepareQueue(BuildLog* build_log) { - ComputeCriticalTime(build_log); +void Plan::PrepareQueue() { + ComputeCriticalPath(); ScheduleInitialEdges(); } @@ -803,7 +720,7 @@ bool Builder::AlreadyUpToDate() const { bool Builder::Build(string* err) { assert(!AlreadyUpToDate()); - plan_.PrepareQueue(scan_.build_log()); + plan_.PrepareQueue(); status_->PlanHasTotalEdges(plan_.command_edge_count()); int pending_commands = 0; diff --git a/src/build.h b/src/build.h index e8b7c39..63d0682 100644 --- a/src/build.h +++ b/src/build.h @@ -76,7 +76,7 @@ struct Plan { void Reset(); // After all targets have been added, prepares the ready queue for find work. - void PrepareQueue(BuildLog* build_log); + void PrepareQueue(); /// Update the build plan to account for modifications made to the graph /// by information loaded from a dyndep file. @@ -97,14 +97,14 @@ struct Plan { }; private: - void ComputeCriticalTime(BuildLog* build_log); + void ComputeCriticalPath(); bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err); void UnmarkDependents(const Node* node, std::set* dependents); bool AddSubTarget(const Node* node, const Node* dependent, std::string* err, std::set* dyndep_walk); // Add edges that kWantToStart into the ready queue - // Must be called after ComputeCriticalTime and before FindWork + // Must be called after ComputeCriticalPath and before FindWork void ScheduleInitialEdges(); /// Update plan with knowledge that the given node is up to date. diff --git a/src/build_test.cc b/src/build_test.cc index 4c274e6..176f1a3 100644 --- a/src/build_test.cc +++ b/src/build_test.cc @@ -54,7 +54,7 @@ struct PlanTest : public StateTestWithBuiltinRules { string err; EXPECT_TRUE(plan_.AddTarget(GetNode(node), &err)); ASSERT_EQ("", err); - plan_.PrepareQueue(log); + plan_.PrepareQueue(); ASSERT_TRUE(plan_.more_to_do()); } @@ -207,7 +207,7 @@ void PlanTest::TestPoolWithDepthOne(const char* test_case) { ASSERT_EQ("", err); EXPECT_TRUE(plan_.AddTarget(GetNode("out2"), &err)); ASSERT_EQ("", err); - plan_.PrepareQueue(NULL); + plan_.PrepareQueue(); ASSERT_TRUE(plan_.more_to_do()); Edge* edge = plan_.FindWork(); @@ -434,7 +434,7 @@ TEST_F(PlanTest, PoolWithFailingEdge) { ASSERT_EQ("", err); EXPECT_TRUE(plan_.AddTarget(GetNode("out2"), &err)); ASSERT_EQ("", err); - plan_.PrepareQueue(NULL); + plan_.PrepareQueue(); ASSERT_TRUE(plan_.more_to_do()); Edge* edge = plan_.FindWork(); @@ -491,11 +491,11 @@ TEST_F(PlanTest, PriorityWithoutBuildLog) { BuildLog log; PrepareForTarget("out", &log); - EXPECT_EQ(GetNode("out")->in_edge()->critical_time_ms(), 1); - EXPECT_EQ(GetNode("a0")->in_edge()->critical_time_ms(), 2); - EXPECT_EQ(GetNode("b0")->in_edge()->critical_time_ms(), 2); - EXPECT_EQ(GetNode("c0")->in_edge()->critical_time_ms(), 2); - EXPECT_EQ(GetNode("a1")->in_edge()->critical_time_ms(), 3); + EXPECT_EQ(GetNode("out")->in_edge()->critical_path_weight(), 1); + EXPECT_EQ(GetNode("a0")->in_edge()->critical_path_weight(), 2); + EXPECT_EQ(GetNode("b0")->in_edge()->critical_path_weight(), 2); + EXPECT_EQ(GetNode("c0")->in_edge()->critical_path_weight(), 2); + EXPECT_EQ(GetNode("a1")->in_edge()->critical_path_weight(), 3); const int n_edges = 5; const char *expected_order[n_edges] = { @@ -513,98 +513,6 @@ TEST_F(PlanTest, PriorityWithoutBuildLog) { EXPECT_FALSE(plan_.FindWork()); } -TEST_F(PlanTest, PriorityWithBuildLog) { - // With a build log, the critical time is longest weighted path. - // Test with the following graph: - // a2 - // | - // a1 b1 - // | | | - // a0 b0 c0 - // \ | / - // out - - ASSERT_NO_FATAL_FAILURE(AssertParse(&state_, - "rule r\n" - " command = unused\n" - "build out: r a0 b0 c0\n" - "build a0: r a1\n" - "build a1: r a2\n" - "build b0: r b1\n" - "build c0: r b1\n" - )); - GetNode("a1")->MarkDirty(); - GetNode("a0")->MarkDirty(); - GetNode("b0")->MarkDirty(); - GetNode("c0")->MarkDirty(); - GetNode("out")->MarkDirty(); - - BuildLog log; - log.RecordCommand(GetNode("out")->in_edge(), 0, 100); // time = 100 - log.RecordCommand(GetNode("a0")->in_edge(), 10, 20); // time = 10 - log.RecordCommand(GetNode("a1")->in_edge(), 20, 40); // time = 20 - log.RecordCommand(GetNode("b0")->in_edge(), 10, 30); // time = 20 - log.RecordCommand(GetNode("c0")->in_edge(), 20, 70); // time = 50 - - PrepareForTarget("out", &log); - - EXPECT_EQ(GetNode("out")->in_edge()->critical_time_ms(), 100); - EXPECT_EQ(GetNode("a0")->in_edge()->critical_time_ms(), 110); - EXPECT_EQ(GetNode("b0")->in_edge()->critical_time_ms(), 120); - EXPECT_EQ(GetNode("c0")->in_edge()->critical_time_ms(), 150); - EXPECT_EQ(GetNode("a1")->in_edge()->critical_time_ms(), 130); - - const int n_edges = 5; - const char *expected_order[n_edges] = { - "c0", "a1", "b0", "a0", "out"}; - for (int i = 0; i < n_edges; ++i) { - Edge* edge = plan_.FindWork(); - ASSERT_NE(edge, NULL); - EXPECT_EQ(expected_order[i], edge->outputs_[0]->path()); - - std::string err; - ASSERT_TRUE(plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err)); - EXPECT_EQ(err, ""); - } - EXPECT_FALSE(plan_.FindWork()); -} - -TEST_F(PlanTest, RuntimePartialBuildLog) { - // Test the edge->run_time_ms() estimate when no build log is available - - ASSERT_NO_FATAL_FAILURE(AssertParse(&state_, - "rule r\n" - " command = unused\n" - "build out: r a0 b0 c0 d0\n" - "build a0: r a1\n" - "build b0: r b1\n" - "build c0: r c1\n" - "build d0: r d1\n" - )); - GetNode("a0")->MarkDirty(); - GetNode("b0")->MarkDirty(); - GetNode("c0")->MarkDirty(); - GetNode("d0")->MarkDirty(); - GetNode("out")->MarkDirty(); - - BuildLog log; - log.RecordCommand(GetNode("out")->in_edge(), 0, 100); // time = 40 - log.RecordCommand(GetNode("a0")->in_edge(), 10, 20); // time = 10 - log.RecordCommand(GetNode("b0")->in_edge(), 20, 40); // time = 20 - log.RecordCommand(GetNode("c0")->in_edge(), 10, 40); // time = 30 - - PrepareForTarget("out", &log); - - // These edges times are read from the build log - EXPECT_EQ(GetNode("out")->in_edge()->run_time_ms(), 100); - EXPECT_EQ(GetNode("a0")->in_edge()->run_time_ms(), 10); - EXPECT_EQ(GetNode("b0")->in_edge()->run_time_ms(), 20); - EXPECT_EQ(GetNode("c0")->in_edge()->run_time_ms(), 30); - - // The missing data is taken from the 3rd quintile of known data - EXPECT_EQ(GetNode("d0")->in_edge()->run_time_ms(), 30); -} - /// Fake implementation of CommandRunner, useful for tests. struct FakeCommandRunner : public CommandRunner { explicit FakeCommandRunner(VirtualFileSystem* fs) : diff --git a/src/graph.h b/src/graph.h index db82bc5..c7304a6 100644 --- a/src/graph.h +++ b/src/graph.h @@ -174,7 +174,7 @@ struct Edge { id_(0), outputs_ready_(false), deps_loaded_(false), deps_missing_(false), generated_by_dep_loader_(false), command_start_time_(0), implicit_deps_(0), order_only_deps_(0), - run_time_ms_(0), critical_time_ms_(-1), implicit_outs_(0) {} + critical_path_weight_(-1), implicit_outs_(0) {} /// Return true if all inputs' in-edges are ready. bool AllInputsReady() const; @@ -200,21 +200,13 @@ struct Edge { // Append all edge explicit inputs to |*out|. Possibly with shell escaping. void CollectInputs(bool shell_escape, std::vector* out) const; - // Critical time is the estimated execution time in ms of the edges - // forming the longest time-weighted path to the target output. - // This quantity is used as a priority during build scheduling. - // NOTE: Defaults to -1 as a marker smaller than any valid time - int64_t critical_time_ms() const { return critical_time_ms_; } - void set_critical_time_ms(int64_t critical_time_ms) { - critical_time_ms_ = critical_time_ms; - } - - // Run time in ms for this edge's command. - // Taken from the build log if present, or estimated otherwise. - // Default initialized to 0. - int64_t run_time_ms() const { return run_time_ms_; } - void set_run_time_ms(int64_t run_time_ms) { - run_time_ms_ = run_time_ms; + // critical_path_weight is the priority during build scheduling. The + // "critical path" between this edge's inputs and any target node is + // the path which maximises the sum oof weights along that path. + // NOTE: Defaults to -1 as a marker smaller than any valid weight + int64_t critical_path_weight() const { return critical_path_weight_; } + void set_critical_path_weight(int64_t critical_path_weight) { + critical_path_weight_ = critical_path_weight; } const Rule* rule_; @@ -226,8 +218,7 @@ struct Edge { BindingEnv* env_; VisitMark mark_; size_t id_; - int64_t run_time_ms_; - int64_t critical_time_ms_; + int64_t critical_path_weight_; bool outputs_ready_; bool deps_loaded_; bool deps_missing_; @@ -392,15 +383,15 @@ struct DependencyScan { // priority is defined lexicographically first by largest critical // time, then lowest ID. // -// Including ID means that wherever the critical times are the same, -// the edges are executed in ascending ID order which was historically -// how all tasks were scheduled. +// Including ID means that wherever the critical path weights are the +// same, the edges are executed in ascending ID order which was +// historically how all tasks were scheduled. struct EdgePriorityLess { bool operator()(const Edge* e1, const Edge* e2) const { - const int64_t ct1 = e1->critical_time_ms(); - const int64_t ct2 = e2->critical_time_ms(); - if (ct1 != ct2) { - return ct1 < ct2; + const int64_t cw1 = e1->critical_path_weight(); + const int64_t cw2 = e2->critical_path_weight(); + if (cw1 != cw2) { + return cw1 < cw2; } return e1->id_ > e2->id_; } @@ -414,8 +405,8 @@ struct EdgePriorityGreater { }; // A priority queue holding non-owning Edge pointers. top() will -// return the edge with the largest critical time, and lowest ID if -// more than one edge has the same critical time. +// return the edge with the largest critical path weight, and lowest +// ID if more than one edge has the same critical path weight. class EdgePriorityQueue: public std::priority_queue, EdgePriorityLess>{ public: diff --git a/src/graph_test.cc b/src/graph_test.cc index 67dad53..fb0513c 100644 --- a/src/graph_test.cc +++ b/src/graph_test.cc @@ -998,7 +998,7 @@ TEST_F(GraphTest, EdgeQueuePriority) { // Output is largest critical time to smallest for (int i = 0; i < n_edges; ++i) { - edges[i]->set_critical_time_ms(i * 10); + edges[i]->set_critical_path_weight(i * 10); } EdgePriorityQueue queue; @@ -1015,7 +1015,7 @@ TEST_F(GraphTest, EdgeQueuePriority) { // When there is ambiguity, the lowest edge id comes first for (int i = 0; i < n_edges; ++i) { - edges[i]->set_critical_time_ms(0); + edges[i]->set_critical_path_weight(0); } queue.push(edges[1]); -- cgit v0.12