diff options
author | Jan Niklas Hasse <jhasse@bixense.com> | 2024-02-29 17:58:44 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-29 17:58:44 (GMT) |
commit | 460dff81192617168ea05eae0b7816054f2aa32d (patch) | |
tree | 926cbf997713e137c38b4e25b5963662bc7b51fa /src | |
parent | 5f93e5a6ac2227b40fe63dc098c4cd5b850685a4 (diff) | |
parent | 29fe3ef1faefa554bc9490716071e969e84774db (diff) | |
download | Ninja-460dff81192617168ea05eae0b7816054f2aa32d.zip Ninja-460dff81192617168ea05eae0b7816054f2aa32d.tar.gz Ninja-460dff81192617168ea05eae0b7816054f2aa32d.tar.bz2 |
Merge pull request #2177 from peterbell10/cpsched-2
Add simplified critical path scheduler to improve build times
Diffstat (limited to 'src')
-rw-r--r-- | src/build.cc | 129 | ||||
-rw-r--r-- | src/build.h | 38 | ||||
-rw-r--r-- | src/build_test.cc | 98 | ||||
-rw-r--r-- | src/graph.h | 49 | ||||
-rw-r--r-- | src/graph_test.cc | 53 | ||||
-rw-r--r-- | src/state.cc | 4 | ||||
-rw-r--r-- | src/state.h | 7 |
7 files changed, 326 insertions, 52 deletions
diff --git a/src/build.cc b/src/build.cc index 6903e45..e0c3939 100644 --- a/src/build.cc +++ b/src/build.cc @@ -89,6 +89,7 @@ void Plan::Reset() { } bool Plan::AddTarget(const Node* target, string* err) { + targets_.push_back(target); return AddSubTarget(target, NULL, err, NULL); } @@ -128,8 +129,6 @@ bool Plan::AddSubTarget(const Node* node, const Node* dependent, string* err, if (node->dirty() && want == kWantNothing) { want = kWantToStart; EdgeWanted(edge); - if (!dyndep_walk && edge->AllInputsReady()) - ScheduleWork(want_ins.first); } if (dyndep_walk) @@ -156,10 +155,10 @@ void Plan::EdgeWanted(const Edge* edge) { Edge* Plan::FindWork() { if (ready_.empty()) return NULL; - EdgeSet::iterator e = ready_.begin(); - Edge* edge = *e; - ready_.erase(e); - return edge; + + Edge* work = ready_.top(); + ready_.pop(); + return work; } void Plan::ScheduleWork(map<Edge*, Want>::iterator want_e) { @@ -180,7 +179,7 @@ void Plan::ScheduleWork(map<Edge*, Want>::iterator want_e) { pool->RetrieveReadyEdges(&ready_); } else { pool->EdgeScheduled(*edge); - ready_.insert(edge); + ready_.push(edge); } } @@ -442,6 +441,121 @@ void Plan::UnmarkDependents(const Node* node, set<Node*>* dependents) { } } +namespace { + +template <typename T> +struct SeenBefore { + std::set<const T*>* seen_; + + SeenBefore(std::set<const T*>* seen) : seen_(seen) {} + + bool operator() (const T* item) { + // Return true if the item has been seen before + return !seen_->insert(item).second; + } +}; + +// Heuristic for edge priority weighting. +// Phony edges are free (0 cost), all other edges are weighted equally. +int64_t EdgeWeightHeuristic(Edge *edge) { + return edge->is_phony() ? 0 : 1; +} + +} // namespace + +void Plan::ComputeCriticalPath() { + METRIC_RECORD("ComputeCriticalPath"); + // Remove duplicate targets + { + std::set<const Node*> seen; + SeenBefore<Node> seen_before(&seen); + targets_.erase(std::remove_if(targets_.begin(), targets_.end(), seen_before), + targets_.end()); + } + + // Use backflow algorithm to compute the critical path for all + // nodes, starting from the destination nodes. + // XXX: ignores pools + std::queue<Edge*> work_queue; // Queue, for breadth-first traversal + // The set of edges currently in work_queue, to avoid duplicates. + std::set<const Edge*> active_edges; + SeenBefore<Edge> seen_edge(&active_edges); + + for (size_t i = 0; i < targets_.size(); ++i) { + const Node* target = targets_[i]; + if (Edge* in = target->in_edge()) { + int64_t edge_weight = EdgeWeightHeuristic(in); + in->set_critical_path_weight( + std::max<int64_t>(edge_weight, in->critical_path_weight())); + if (!seen_edge(in)) { + work_queue.push(in); + } + } + } + + while (!work_queue.empty()) { + Edge* e = work_queue.front(); + work_queue.pop(); + // If the critical path of any dependent edges is updated, this + // edge may need to be processed again. So re-allow insertion. + active_edges.erase(e); + + for (std::vector<Node*>::iterator it = e->inputs_.begin(), + end = e->inputs_.end(); + it != end; ++it) { + Edge* in = (*it)->in_edge(); + if (!in) { + continue; + } + // Only process edge if this node offers a higher weighted path + const int64_t edge_weight = EdgeWeightHeuristic(in); + const int64_t proposed_weight = e->critical_path_weight() + edge_weight; + if (proposed_weight > in->critical_path_weight()) { + in->set_critical_path_weight(proposed_weight); + if (!seen_edge(in)) { + work_queue.push(in); + } + } + } + } +} + +void Plan::ScheduleInitialEdges() { + // Add ready edges to queue. + assert(ready_.empty()); + std::set<Pool*> pools; + + for (std::map<Edge*, Plan::Want>::iterator it = want_.begin(), + end = want_.end(); it != end; ++it) { + Edge* edge = it->first; + Plan::Want want = it->second; + if (!(want == kWantToStart && edge->AllInputsReady())) { + continue; + } + + Pool* pool = edge->pool(); + if (pool->ShouldDelayEdge()) { + pool->DelayEdge(edge); + pools.insert(pool); + } else { + ScheduleWork(it); + } + } + + // Call RetrieveReadyEdges only once at the end so higher priority + // edges are retrieved first, not the ones that happen to be first + // in the want_ map. + for (std::set<Pool*>::iterator it=pools.begin(), + end = pools.end(); it != end; ++it) { + (*it)->RetrieveReadyEdges(&ready_); + } +} + +void Plan::PrepareQueue() { + ComputeCriticalPath(); + ScheduleInitialEdges(); +} + void Plan::Dump() const { printf("pending: %d\n", (int)want_.size()); for (map<Edge*, Want>::const_iterator e = want_.begin(); e != want_.end(); ++e) { @@ -611,6 +725,7 @@ bool Builder::AlreadyUpToDate() const { bool Builder::Build(string* err) { assert(!AlreadyUpToDate()); + plan_.PrepareQueue(); status_->PlanHasTotalEdges(plan_.command_edge_count()); int pending_commands = 0; diff --git a/src/build.h b/src/build.h index 8ec2355..2885f41 100644 --- a/src/build.h +++ b/src/build.h @@ -18,12 +18,11 @@ #include <cstdio> #include <map> #include <memory> -#include <queue> #include <string> #include <vector> #include "depfile_parser.h" -#include "graph.h" // XXX needed for DependencyScan; should rearrange. +#include "graph.h" #include "exit_status.h" #include "util.h" // int64_t @@ -76,21 +75,13 @@ struct Plan { /// Reset state. Clears want and ready sets. void Reset(); + // After all targets have been added, prepares the ready queue for find work. + void PrepareQueue(); + /// Update the build plan to account for modifications made to the graph /// by information loaded from a dyndep file. bool DyndepsLoaded(DependencyScan* scan, const Node* node, const DyndepFile& ddf, std::string* err); -private: - bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err); - void UnmarkDependents(const Node* node, std::set<Node*>* dependents); - bool AddSubTarget(const Node* node, const Node* dependent, std::string* err, - std::set<Edge*>* dyndep_walk); - - /// Update plan with knowledge that the given node is up to date. - /// If the node is a dyndep binding on any of its dependents, this - /// loads dynamic dependencies from the node's path. - /// Returns 'false' if loading dyndep info fails and 'true' otherwise. - bool NodeFinished(Node* node, std::string* err); /// Enumerate possible steps we want for an edge. enum Want @@ -105,6 +96,23 @@ private: kWantToFinish }; +private: + void ComputeCriticalPath(); + bool RefreshDyndepDependents(DependencyScan* scan, const Node* node, std::string* err); + void UnmarkDependents(const Node* node, std::set<Node*>* dependents); + bool AddSubTarget(const Node* node, const Node* dependent, std::string* err, + std::set<Edge*>* dyndep_walk); + + // Add edges that kWantToStart into the ready queue + // Must be called after ComputeCriticalPath and before FindWork + void ScheduleInitialEdges(); + + /// Update plan with knowledge that the given node is up to date. + /// If the node is a dyndep binding on any of its dependents, this + /// loads dynamic dependencies from the node's path. + /// Returns 'false' if loading dyndep info fails and 'true' otherwise. + bool NodeFinished(Node* node, std::string* err); + void EdgeWanted(const Edge* edge); bool EdgeMaybeReady(std::map<Edge*, Want>::iterator want_e, std::string* err); @@ -119,9 +127,11 @@ private: /// we want for the edge. std::map<Edge*, Want> want_; - EdgeSet ready_; + EdgePriorityQueue ready_; Builder* builder_; + /// user provided targets in build order, earlier one have higher priority + std::vector<const Node*> targets_; /// Total number of edges that have commands (not phony). int command_edges_; diff --git a/src/build_test.cc b/src/build_test.cc index 8152b4e..71bf55c 100644 --- a/src/build_test.cc +++ b/src/build_test.cc @@ -50,6 +50,14 @@ struct PlanTest : public StateTestWithBuiltinRules { sort(ret->begin(), ret->end(), CompareEdgesByOutput::cmp); } + void PrepareForTarget(const char* node, BuildLog *log=NULL) { + string err; + EXPECT_TRUE(plan_.AddTarget(GetNode(node), &err)); + ASSERT_EQ("", err); + plan_.PrepareQueue(); + ASSERT_TRUE(plan_.more_to_do()); + } + void TestPoolWithDepthOne(const char *test_case); }; @@ -59,10 +67,7 @@ TEST_F(PlanTest, Basic) { "build mid: cat in\n")); GetNode("mid")->MarkDirty(); GetNode("out")->MarkDirty(); - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("out"), &err)); - ASSERT_EQ("", err); - ASSERT_TRUE(plan_.more_to_do()); + PrepareForTarget("out"); Edge* edge = plan_.FindWork(); ASSERT_TRUE(edge); @@ -71,6 +76,7 @@ TEST_F(PlanTest, Basic) { ASSERT_FALSE(plan_.FindWork()); + string err; plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); @@ -95,15 +101,12 @@ TEST_F(PlanTest, DoubleOutputDirect) { GetNode("mid1")->MarkDirty(); GetNode("mid2")->MarkDirty(); GetNode("out")->MarkDirty(); - - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("out"), &err)); - ASSERT_EQ("", err); - ASSERT_TRUE(plan_.more_to_do()); + PrepareForTarget("out"); Edge* edge; edge = plan_.FindWork(); ASSERT_TRUE(edge); // cat in + string err; plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); @@ -128,14 +131,12 @@ TEST_F(PlanTest, DoubleOutputIndirect) { GetNode("b1")->MarkDirty(); GetNode("b2")->MarkDirty(); GetNode("out")->MarkDirty(); - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("out"), &err)); - ASSERT_EQ("", err); - ASSERT_TRUE(plan_.more_to_do()); + PrepareForTarget("out"); Edge* edge; edge = plan_.FindWork(); ASSERT_TRUE(edge); // cat in + string err; plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); @@ -169,15 +170,12 @@ TEST_F(PlanTest, DoubleDependent) { GetNode("a1")->MarkDirty(); GetNode("a2")->MarkDirty(); GetNode("out")->MarkDirty(); - - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("out"), &err)); - ASSERT_EQ("", err); - ASSERT_TRUE(plan_.more_to_do()); + PrepareForTarget("out"); Edge* edge; edge = plan_.FindWork(); ASSERT_TRUE(edge); // cat in + string err; plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); @@ -209,6 +207,7 @@ void PlanTest::TestPoolWithDepthOne(const char* test_case) { ASSERT_EQ("", err); EXPECT_TRUE(plan_.AddTarget(GetNode("out2"), &err)); ASSERT_EQ("", err); + plan_.PrepareQueue(); ASSERT_TRUE(plan_.more_to_do()); Edge* edge = plan_.FindWork(); @@ -284,10 +283,7 @@ TEST_F(PlanTest, PoolsWithDepthTwo) { GetNode("outb" + string(1, '1' + static_cast<char>(i)))->MarkDirty(); } GetNode("allTheThings")->MarkDirty(); - - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("allTheThings"), &err)); - ASSERT_EQ("", err); + PrepareForTarget("allTheThings"); deque<Edge*> edges; FindWorkSorted(&edges, 5); @@ -306,6 +302,7 @@ TEST_F(PlanTest, PoolsWithDepthTwo) { ASSERT_EQ("outb3", edge->outputs_[0]->path()); // finish out1 + string err; plan_.EdgeFinished(edges.front(), Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); edges.pop_front(); @@ -363,10 +360,7 @@ TEST_F(PlanTest, PoolWithRedundantEdges) { GetNode("bar.cpp.obj")->MarkDirty(); GetNode("libfoo.a")->MarkDirty(); GetNode("all")->MarkDirty(); - string err; - EXPECT_TRUE(plan_.AddTarget(GetNode("all"), &err)); - ASSERT_EQ("", err); - ASSERT_TRUE(plan_.more_to_do()); + PrepareForTarget("all"); Edge* edge = NULL; @@ -375,6 +369,7 @@ TEST_F(PlanTest, PoolWithRedundantEdges) { edge = initial_edges[1]; // Foo first ASSERT_EQ("foo.cpp", edge->outputs_[0]->path()); + string err; plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err); ASSERT_EQ("", err); @@ -439,6 +434,7 @@ TEST_F(PlanTest, PoolWithFailingEdge) { ASSERT_EQ("", err); EXPECT_TRUE(plan_.AddTarget(GetNode("out2"), &err)); ASSERT_EQ("", err); + plan_.PrepareQueue(); ASSERT_TRUE(plan_.more_to_do()); Edge* edge = plan_.FindWork(); @@ -467,6 +463,56 @@ TEST_F(PlanTest, PoolWithFailingEdge) { ASSERT_EQ(0, edge); } +TEST_F(PlanTest, PriorityWithoutBuildLog) { + // Without a build log, the critical time is equivalent to graph + // depth. Test with the following graph: + // a2 + // | + // a1 b1 + // | | | + // a0 b0 c0 + // \ | / + // out + + ASSERT_NO_FATAL_FAILURE(AssertParse(&state_, + "rule r\n" + " command = unused\n" + "build out: r a0 b0 c0\n" + "build a0: r a1\n" + "build a1: r a2\n" + "build b0: r b1\n" + "build c0: r b1\n" + )); + GetNode("a1")->MarkDirty(); + GetNode("a0")->MarkDirty(); + GetNode("b0")->MarkDirty(); + GetNode("c0")->MarkDirty(); + GetNode("out")->MarkDirty(); + BuildLog log; + PrepareForTarget("out", &log); + + EXPECT_EQ(GetNode("out")->in_edge()->critical_path_weight(), 1); + EXPECT_EQ(GetNode("a0")->in_edge()->critical_path_weight(), 2); + EXPECT_EQ(GetNode("b0")->in_edge()->critical_path_weight(), 2); + EXPECT_EQ(GetNode("c0")->in_edge()->critical_path_weight(), 2); + EXPECT_EQ(GetNode("a1")->in_edge()->critical_path_weight(), 3); + + const int n_edges = 5; + const char *expected_order[n_edges] = { + "a1", "a0", "b0", "c0", "out"}; + for (int i = 0; i < n_edges; ++i) { + Edge* edge = plan_.FindWork(); + ASSERT_NE(edge, NULL); + EXPECT_EQ(expected_order[i], edge->outputs_[0]->path()); + + std::string err; + ASSERT_TRUE(plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, &err)); + EXPECT_EQ(err, ""); + } + + EXPECT_FALSE(plan_.FindWork()); +} + /// Fake implementation of CommandRunner, useful for tests. struct FakeCommandRunner : public CommandRunner { explicit FakeCommandRunner(VirtualFileSystem* fs) : diff --git a/src/graph.h b/src/graph.h index 5c8ca2c..d49c309 100644 --- a/src/graph.h +++ b/src/graph.h @@ -19,6 +19,7 @@ #include <set> #include <string> #include <vector> +#include <queue> #include "dyndep.h" #include "eval_env.h" @@ -188,7 +189,7 @@ struct Edge { id_(0), outputs_ready_(false), deps_loaded_(false), deps_missing_(false), generated_by_dep_loader_(false), command_start_time_(0), implicit_deps_(0), order_only_deps_(0), - implicit_outs_(0) {} + critical_path_weight_(-1), implicit_outs_(0) {} /// Return true if all inputs' in-edges are ready. bool AllInputsReady() const; @@ -214,6 +215,15 @@ struct Edge { // Append all edge explicit inputs to |*out|. Possibly with shell escaping. void CollectInputs(bool shell_escape, std::vector<std::string>* out) const; + // critical_path_weight is the priority during build scheduling. The + // "critical path" between this edge's inputs and any target node is + // the path which maximises the sum oof weights along that path. + // NOTE: Defaults to -1 as a marker smaller than any valid weight + int64_t critical_path_weight() const { return critical_path_weight_; } + void set_critical_path_weight(int64_t critical_path_weight) { + critical_path_weight_ = critical_path_weight; + } + const Rule* rule_; Pool* pool_; std::vector<Node*> inputs_; @@ -223,6 +233,7 @@ struct Edge { BindingEnv* env_; VisitMark mark_; size_t id_; + int64_t critical_path_weight_; bool outputs_ready_; bool deps_loaded_; bool deps_missing_; @@ -378,4 +389,40 @@ struct DependencyScan { DyndepLoader dyndep_loader_; }; +// Implements a less comparison for edges by priority, where highest +// priority is defined lexicographically first by largest critical +// time, then lowest ID. +// +// Including ID means that wherever the critical path weights are the +// same, the edges are executed in ascending ID order which was +// historically how all tasks were scheduled. +struct EdgePriorityLess { + bool operator()(const Edge* e1, const Edge* e2) const { + const int64_t cw1 = e1->critical_path_weight(); + const int64_t cw2 = e2->critical_path_weight(); + if (cw1 != cw2) { + return cw1 < cw2; + } + return e1->id_ > e2->id_; + } +}; + +// Reverse of EdgePriorityLess, e.g. to sort by highest priority first +struct EdgePriorityGreater { + bool operator()(const Edge* e1, const Edge* e2) const { + return EdgePriorityLess()(e2, e1); + } +}; + +// A priority queue holding non-owning Edge pointers. top() will +// return the edge with the largest critical path weight, and lowest +// ID if more than one edge has the same critical path weight. +class EdgePriorityQueue: + public std::priority_queue<Edge*, std::vector<Edge*>, EdgePriorityLess>{ +public: + void clear() { + c.clear(); + } +}; + #endif // NINJA_GRAPH_H_ diff --git a/src/graph_test.cc b/src/graph_test.cc index 9dba8af..fb0513c 100644 --- a/src/graph_test.cc +++ b/src/graph_test.cc @@ -977,3 +977,56 @@ TEST_F(GraphTest, PhonyDepsMtimes) { EXPECT_EQ(out1->mtime(), out1Mtime1); EXPECT_TRUE(out1->dirty()); } + +// Test that EdgeQueue correctly prioritizes by critical time +TEST_F(GraphTest, EdgeQueuePriority) { + + ASSERT_NO_FATAL_FAILURE(AssertParse(&state_, +"rule r\n" +" command = unused\n" +"build out1: r in1\n" +"build out2: r in2\n" +"build out3: r in3\n" +)); + + const int n_edges = 3; + Edge *(edges)[n_edges] = { + GetNode("out1")->in_edge(), + GetNode("out2")->in_edge(), + GetNode("out3")->in_edge(), + }; + + // Output is largest critical time to smallest + for (int i = 0; i < n_edges; ++i) { + edges[i]->set_critical_path_weight(i * 10); + } + + EdgePriorityQueue queue; + for (int i = 0; i < n_edges; ++i) { + queue.push(edges[i]); + } + + EXPECT_EQ(queue.size(), n_edges); + for (int i = 0; i < n_edges; ++i) { + EXPECT_EQ(queue.top(), edges[n_edges - 1 - i]); + queue.pop(); + } + EXPECT_TRUE(queue.empty()); + + // When there is ambiguity, the lowest edge id comes first + for (int i = 0; i < n_edges; ++i) { + edges[i]->set_critical_path_weight(0); + } + + queue.push(edges[1]); + queue.push(edges[2]); + queue.push(edges[0]); + + for (int i = 0; i < n_edges; ++i) { + EXPECT_EQ(queue.top(), edges[i]); + queue.pop(); + } + EXPECT_TRUE(queue.empty()); +} + + diff --git a/src/state.cc b/src/state.cc index d4b9a71..5fec5e1 100644 --- a/src/state.cc +++ b/src/state.cc @@ -38,13 +38,13 @@ void Pool::DelayEdge(Edge* edge) { delayed_.insert(edge); } -void Pool::RetrieveReadyEdges(EdgeSet* ready_queue) { +void Pool::RetrieveReadyEdges(EdgePriorityQueue* ready_queue) { DelayedEdges::iterator it = delayed_.begin(); while (it != delayed_.end()) { Edge* edge = *it; if (current_use_ + edge->weight() > depth_) break; - ready_queue->insert(edge); + ready_queue->push(edge); EdgeScheduled(*edge); ++it; } diff --git a/src/state.h b/src/state.h index 29bed56..8789cb1 100644 --- a/src/state.h +++ b/src/state.h @@ -62,7 +62,7 @@ struct Pool { void DelayEdge(Edge* edge); /// Pool will add zero or more edges to the ready_queue - void RetrieveReadyEdges(EdgeSet* ready_queue); + void RetrieveReadyEdges(EdgePriorityQueue* ready_queue); /// Dump the Pool and its edges (useful for debugging). void Dump() const; @@ -80,7 +80,10 @@ struct Pool { if (!a) return b; if (!b) return false; int weight_diff = a->weight() - b->weight(); - return ((weight_diff < 0) || (weight_diff == 0 && EdgeCmp()(a, b))); + if (weight_diff != 0) { + return weight_diff < 0; + } + return EdgePriorityGreater()(a, b); } }; |