From 0efbbbf67a94452919084765e87106e7748274cb Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 19 Sep 2011 02:56:15 +0100 Subject: Implement restat rules A restat rule is a rule which is capable of pruning the build tree depending on the timestamps of its outputs before and after a build. After a restat rule is rebuilt, Ninja will re-stat each output file to obtain its current timestamp. If the timestamp is unchanged from when Ninja initially stat'ed the file before starting the build, Ninja will mark that output file as clean, and recursively for each reverse dependency of the output file, recompute its dirty status. Ninja then stores the most recent timestamp of any input file in the build log entry associated with the output file. This timestamp will be treated by future invocations of Ninja as the output file's modification time instead of the output file's actual modification time for the purpose of deciding whether it is dirty (but not whether its reverse dependencies are dirty). --- doc/manual.asciidoc | 6 ++++ src/build.cc | 97 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/build.h | 4 +++ src/build_log.cc | 33 ++++++++++++------ src/build_log.h | 7 ++-- src/build_log_test.cc | 19 ++++++++++ src/build_test.cc | 62 +++++++++++++++++++++++++++++++- src/graph.cc | 23 +++++++++--- src/graph.h | 4 +-- src/parsers.cc | 6 ++++ 10 files changed, 239 insertions(+), 22 deletions(-) diff --git a/doc/manual.asciidoc b/doc/manual.asciidoc index 7fd2715..d59f471 100644 --- a/doc/manual.asciidoc +++ b/doc/manual.asciidoc @@ -469,6 +469,12 @@ aborting due to a missing input. if the command line changes; and secondly, they are not cleaned by default. +`restat`:: if present, causes Ninja to re-stat the command's outputs after + execution of the command. Each output whose modification time the command + did not change will be treated as though it had never needed to be built. + This may cause the output's reverse dependencies to be removed from the + list of pending build actions. + Additionally, the special `$in` and `$out` variables expand to the space-separated list of files provided to the `build` line referencing this `rule`. diff --git a/src/build.cc b/src/build.cc index c9ffe7e..9465532 100644 --- a/src/build.cc +++ b/src/build.cc @@ -301,6 +301,60 @@ void Plan::NodeFinished(Node* node) { } } +void Plan::CleanNode(BuildLog* build_log, Node* node) { + node->dirty_ = false; + + for (vector::iterator ei = node->out_edges_.begin(); + ei != node->out_edges_.end(); ++ei) { + // Don't process edges that we don't actually want. + map::iterator want_i = want_.find(*ei); + if (want_i == want_.end() || !want_i->second) + continue; + + // If all non-order-only inputs for this edge are now clean, + // we might have changed the dirty state of the outputs. + vector::iterator begin = (*ei)->inputs_.begin(), + end = (*ei)->inputs_.end() - (*ei)->order_only_deps_; + if (find_if(begin, end, mem_fun(&Node::dirty)) == end) { + // Recompute most_recent_input and command. + time_t most_recent_input = 1; + for (vector::iterator ni = begin; ni != end; ++ni) + if ((*ni)->file_->mtime_ > most_recent_input) + most_recent_input = (*ni)->file_->mtime_; + string command = (*ei)->EvaluateCommand(); + + // Now, recompute the dirty state of each output. + bool all_outputs_clean = true; + for (vector::iterator ni = (*ei)->outputs_.begin(); + ni != (*ei)->outputs_.end(); ++ni) { + if (!(*ni)->dirty_) + continue; + + // RecomputeOutputDirty will not modify dirty_ if the output is clean. + (*ni)->dirty_ = false; + + // Since we know that all non-order-only inputs are clean, we can pass + // "false" as the "dirty" argument here. + (*ei)->RecomputeOutputDirty(build_log, most_recent_input, false, + command, *ni); + if ((*ni)->dirty_) { + all_outputs_clean = false; + } else { + CleanNode(build_log, *ni); + } + } + + // If we cleaned all outputs, mark the node as not wanted. + if (all_outputs_clean) { + want_i->second = false; + --wanted_edges_; + if (!(*ei)->is_phony()) + --command_edges_; + } + } + } +} + void Plan::Dump() { printf("pending: %d\n", (int)want_.size()); for (map::iterator i = want_.begin(); i != want_.end(); ++i) { @@ -516,8 +570,47 @@ bool Builder::StartEdge(Edge* edge, string* err) { } void Builder::FinishEdge(Edge* edge, bool success, const string& output) { - if (success) + time_t restat_mtime = 0; + + if (success) { + if (edge->rule_->restat_) { + bool node_cleaned = false; + + for (vector::iterator i = edge->outputs_.begin(); + i != edge->outputs_.end(); ++i) { + if ((*i)->file_->exists()) { + time_t new_mtime = disk_interface_->Stat((*i)->file_->path_); + if ((*i)->file_->mtime_ == new_mtime) { + // The rule command did not change the output. Propagate the clean + // state through the build graph. + plan_.CleanNode(log_, *i); + node_cleaned = true; + } + } + } + + if (node_cleaned) { + // If any output was cleaned, find the most recent mtime of any + // (existing) non-order-only input. + for (vector::iterator i = edge->inputs_.begin(); + i != edge->inputs_.end() - edge->order_only_deps_; ++i) { + time_t input_mtime = disk_interface_->Stat((*i)->file_->path_); + if (input_mtime == 0) { + restat_mtime = 0; + break; + } + if (input_mtime > restat_mtime) + restat_mtime = input_mtime; + } + + // The total number of edges in the plan may have changed as a result + // of a restat. + status_->PlanHasTotalEdges(plan_.command_edge_count()); + } + } + plan_.EdgeFinished(edge); + } if (edge->is_phony()) return; @@ -525,5 +618,5 @@ void Builder::FinishEdge(Edge* edge, bool success, const string& output) { int start_time, end_time; status_->BuildEdgeFinished(edge, success, output, &start_time, &end_time); if (success && log_) - log_->RecordCommand(edge, start_time, end_time); + log_->RecordCommand(edge, start_time, end_time, restat_mtime); } diff --git a/src/build.h b/src/build.h index fa0abd2..586d1ff 100644 --- a/src/build.h +++ b/src/build.h @@ -22,6 +22,7 @@ #include using namespace std; +struct BuildLog; struct Edge; struct DiskInterface; struct Node; @@ -51,6 +52,9 @@ struct Plan { /// tests. void EdgeFinished(Edge* edge); + /// Clean the given node during the build. + void CleanNode(BuildLog* build_log, Node* node); + /// Number of edges with commands to run. int command_edge_count() const { return command_edges_; } diff --git a/src/build_log.cc b/src/build_log.cc index 98ce872..28cb6f8 100644 --- a/src/build_log.cc +++ b/src/build_log.cc @@ -32,8 +32,8 @@ namespace { -const char kFileSignature[] = "# ninja log v2\n"; -const int kCurrentVersion = 2; +const char kFileSignature[] = "# ninja log v%d\n"; +const int kCurrentVersion = 3; void SetCloseOnExec(FILE* file) { #ifndef _WIN32 @@ -74,7 +74,7 @@ bool BuildLog::OpenForWrite(const string& path, string* err) { SetCloseOnExec(log_file_); if (ftell(log_file_) == 0) { - if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, log_file_) < 1) { + if (fprintf(log_file_, kFileSignature, kCurrentVersion) < 0) { *err = strerror(errno); return false; } @@ -83,7 +83,8 @@ bool BuildLog::OpenForWrite(const string& path, string* err) { return true; } -void BuildLog::RecordCommand(Edge* edge, int start_time, int end_time) { +void BuildLog::RecordCommand(Edge* edge, int start_time, int end_time, + time_t restat_mtime) { const string command = edge->EvaluateCommand(); for (vector::iterator out = edge->outputs_.begin(); out != edge->outputs_.end(); ++out) { @@ -100,6 +101,7 @@ void BuildLog::RecordCommand(Edge* edge, int start_time, int end_time) { log_entry->command = command; log_entry->start_time = start_time; log_entry->end_time = end_time; + log_entry->restat_mtime = restat_mtime; if (log_file_) WriteEntry(log_file_, *log_entry); @@ -129,10 +131,8 @@ bool BuildLog::Load(const string& path, string* err) { while (fgets(buf, sizeof(buf), file)) { if (!log_version) { log_version = 1; // Assume by default. - if (strcmp(buf, kFileSignature) == 0) { - log_version = 2; + if (sscanf(buf, kFileSignature, &log_version) > 0) continue; - } } char* start = buf; char* end = strchr(start, ' '); @@ -141,6 +141,8 @@ bool BuildLog::Load(const string& path, string* err) { *end = 0; int start_time = 0, end_time = 0; + time_t restat_mtime = 0; + if (log_version == 1) { // In v1 we logged how long the command took; we don't use this info. // int time_ms = atoi(start); @@ -157,6 +159,16 @@ bool BuildLog::Load(const string& path, string* err) { end_time = atoi(start); start = end + 1; } + + if (log_version >= 3) { + // In v3 we log the restat mtime. + char* end = strchr(start, ' '); + if (!end) + continue; + *end = 0; + restat_mtime = atol(start); + start = end + 1; + } end = strchr(start, ' '); if (!end) @@ -182,6 +194,7 @@ bool BuildLog::Load(const string& path, string* err) { entry->start_time = start_time; entry->end_time = end_time; + entry->restat_mtime = restat_mtime; entry->command = string(start, end - start); } @@ -210,8 +223,8 @@ BuildLog::LogEntry* BuildLog::LookupByOutput(const string& path) { } void BuildLog::WriteEntry(FILE* f, const LogEntry& entry) { - fprintf(f, "%d %d %s %s\n", - entry.start_time, entry.end_time, + fprintf(f, "%d %d %ld %s %s\n", + entry.start_time, entry.end_time, (long) entry.restat_mtime, entry.output.c_str(), entry.command.c_str()); } @@ -225,7 +238,7 @@ bool BuildLog::Recompact(const string& path, string* err) { return false; } - if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, f) < 1) { + if (fprintf(f, kFileSignature, kCurrentVersion) < 0) { *err = strerror(errno); return false; } diff --git a/src/build_log.h b/src/build_log.h index 4a11c1a..99ae5a0 100644 --- a/src/build_log.h +++ b/src/build_log.h @@ -38,7 +38,8 @@ struct BuildLog { void SetConfig(BuildConfig* config) { config_ = config; } bool OpenForWrite(const string& path, string* err); - void RecordCommand(Edge* edge, int start_time, int end_time); + void RecordCommand(Edge* edge, int start_time, int end_time, + time_t restat_mtime = 0); void Close(); /// Load the on-disk log. @@ -49,11 +50,13 @@ struct BuildLog { string command; int start_time; int end_time; + time_t restat_mtime; // Used by tests. bool operator==(const LogEntry& o) { return output == o.output && command == o.command && - start_time == o.start_time && end_time == o.end_time; + start_time == o.start_time && end_time == o.end_time && + restat_mtime == o.restat_mtime; } }; diff --git a/src/build_log_test.cc b/src/build_log_test.cc index fdd2378..cf31c1d 100644 --- a/src/build_log_test.cc +++ b/src/build_log_test.cc @@ -115,3 +115,22 @@ TEST_F(BuildLogTest, Truncate) { ASSERT_EQ("", err); } } + +TEST_F(BuildLogTest, UpgradeV2) { + FILE* f = fopen(kTestFilename, "wb"); + fprintf(f, "# ninja log v2\n"); + fprintf(f, "123 456 out command\n"); + fclose(f); + + string err; + BuildLog log; + EXPECT_TRUE(log.Load(kTestFilename, &err)); + ASSERT_EQ("", err); + + BuildLog::LogEntry* e = log.LookupByOutput("out"); + ASSERT_TRUE(e); + ASSERT_EQ(123, e->start_time); + ASSERT_EQ(456, e->end_time); + ASSERT_EQ(0, e->restat_mtime); + ASSERT_EQ("command", e->command); +} diff --git a/src/build_test.cc b/src/build_test.cc index 4544e2c..9cfa6e1 100644 --- a/src/build_test.cc +++ b/src/build_test.cc @@ -14,6 +14,7 @@ #include "build.h" +#include "build_log.h" #include "graph.h" #include "test.h" @@ -238,7 +239,8 @@ bool BuildTest::StartCommand(Edge* edge) { out != edge->outputs_.end(); ++out) { fs_.Create((*out)->file_->path_, now_, ""); } - } else if (edge->rule_->name_ == "fail") { + } else if (edge->rule_->name_ == "true" || + edge->rule_->name_ == "fail") { // Don't do anything. } else { printf("unknown command\n"); @@ -665,3 +667,61 @@ TEST_F(BuildTest, SwallowFailuresLimit) { ASSERT_EQ(3u, commands_ran_.size()); ASSERT_EQ("cannot make progress due to previous errors", err); } + +struct BuildWithLogTest : public BuildTest { + BuildWithLogTest() { + state_.build_log_ = builder_.log_ = &build_log_; + } + + BuildLog build_log_; +}; + +TEST_F(BuildWithLogTest, RestatTest) { + ASSERT_NO_FATAL_FAILURE(AssertParse(&state_, +"rule true\n" +" command = true\n" +" restat = 1\n" +"rule cc\n" +" command = cc\n" +" restat = 1\n" +"build out1: cc in\n" +"build out2: true out1\n" +"build out3: cat out2\n")); + + fs_.Create("out1", now_, ""); + fs_.Create("out2", now_, ""); + fs_.Create("out3", now_, ""); + + now_++; + + fs_.Create("in", now_, ""); + + // "cc" touches out1, so we should build out2. But because "true" does not + // touch out2, we should cancel the build of out3. + string err; + EXPECT_TRUE(builder_.AddTarget("out3", &err)); + ASSERT_EQ("", err); + EXPECT_TRUE(builder_.Build(&err)); + ASSERT_EQ(2u, commands_ran_.size()); + + // If we run again, it should be a no-op, because the build log has recorded + // that we've already built out2 with an input timestamp of 2 (from out1). + commands_ran_.clear(); + state_.Reset(); + EXPECT_TRUE(builder_.AddTarget("out3", &err)); + ASSERT_EQ("", err); + EXPECT_TRUE(builder_.AlreadyUpToDate()); + + now_++; + + fs_.Create("in", now_, ""); + + // The build log entry should not, however, prevent us from rebuilding out2 + // if out1 changes. + commands_ran_.clear(); + state_.Reset(); + EXPECT_TRUE(builder_.AddTarget("out3", &err)); + ASSERT_EQ("", err); + EXPECT_TRUE(builder_.Build(&err)); + ASSERT_EQ(2u, commands_ran_.size()); +} diff --git a/src/graph.cc b/src/graph.cc index 9df6ecb..d13b10c 100644 --- a/src/graph.cc +++ b/src/graph.cc @@ -100,18 +100,31 @@ void Edge::RecomputeOutputDirty(BuildLog* build_log, time_t most_recent_input, return; } + BuildLog::LogEntry* entry = 0; // Output is dirty if we're dirty, we're missing the output, // or if it's older than the most recent input mtime. - if (dirty || !output->file_->exists() || - output->file_->mtime_ < most_recent_input) { + if (dirty || !output->file_->exists()) { output->dirty_ = true; - } else { + } else if (output->file_->mtime_ < most_recent_input) { + // If this is a restat rule, we may have cleaned the output with a restat + // rule in a previous run and stored the most recent input mtime in the + // build log. Use that mtime instead, so that the file will only be + // considered dirty if an input was modified since the previous run. + if (rule_->restat_ && build_log && + (entry = build_log->LookupByOutput(output->file_->path_))) { + if (entry->restat_mtime < most_recent_input) + output->dirty_ = true; + } else { + output->dirty_ = true; + } + } + + if (!output->dirty_) { // May also be dirty due to the command changing since the last build. // But if this is a generator rule, the command changing does not make us // dirty. - BuildLog::LogEntry* entry; if (!rule_->generator_ && build_log && - (entry = build_log->LookupByOutput(output->file_->path_))) { + (entry || (entry = build_log->LookupByOutput(output->file_->path_)))) { if (command != entry->command) output->dirty_ = true; } diff --git a/src/graph.h b/src/graph.h index 079db43..64496d5 100644 --- a/src/graph.h +++ b/src/graph.h @@ -59,7 +59,7 @@ struct FileStat { /// An invokable build command and associated metadata (description, etc.). struct Rule { - Rule(const string& name) : name_(name), generator_(false) { } + Rule(const string& name) : name_(name), generator_(false), restat_(false) { } bool ParseCommand(const string& command, string* err) { return command_.Parse(command, err); @@ -68,7 +68,7 @@ struct Rule { EvalString command_; EvalString description_; EvalString depfile_; - bool generator_; + bool generator_, restat_; }; struct BuildLog; diff --git a/src/parsers.cc b/src/parsers.cc index c086109..4920496 100644 --- a/src/parsers.cc +++ b/src/parsers.cc @@ -389,6 +389,12 @@ bool ManifestParser::ParseRule(string* err) { if (!tokenizer_.ReadToNewline(&dummy, err)) return false; continue; + } else if (key == "restat") { + rule->restat_ = true; + string dummy; + if (!tokenizer_.ReadToNewline(&dummy, err)) + return false; + continue; } else { // Die on other keyvals for now; revisit if we want to add a // scope here. -- cgit v0.12