From b334523f1da03adfcd23b6e7e7a66c8fcbf87840 Mon Sep 17 00:00:00 2001
From: Nico Weber <nicolasweber@gmx.de>
Date: Thu, 19 Mar 2015 08:42:46 -0700
Subject: Make failing stat() calls abort the build.

Fixes #830, fixes #904.

In practice, this either happens with 64-bit inodes and a 32-bit
userspace when building without -D_FILE_OFFSET_BITS=64 in CFLAGS, or
when a filename is longer than the system file length limit.

Since DiskInterface::Stat() returns -1 on error, and Node used -1 on
"stat state unknown", not aborting the build lead to ninja stat()ing the
same file over and over again, until it finally ran out of stack. That's
now fixed.

* Change RecomputeOutputsDirty() to return success instead of dirty
  state (like RecomputeDirty()) and return the dirty state in a bool
  outparam
* Node::Stat()s old return value wasn't used anywhere, change the
  function to return success instead and add an |err| outparam
* Node::StatIfNecessary()'s old return value was used only in one place.
  Change that place to explicitly check status_known() and make
  StatIfNecessary() return success and add an |err| outparam
* Plan::CleanNode() can now fail, make it return bool and add an |err|
  outparam
---
 src/build.cc               | 21 +++++++++++++++++----
 src/build.h                |  3 ++-
 src/build_test.cc          | 36 +++++++++++++++++++++++++-----------
 src/disk_interface_test.cc | 16 ++++++++++++----
 src/graph.cc               | 32 ++++++++++++++++++++++----------
 src/graph.h                | 20 ++++++++++----------
 6 files changed, 88 insertions(+), 40 deletions(-)

diff --git a/src/build.cc b/src/build.cc
index 5d66f4b..1e10c7c 100644
--- a/src/build.cc
+++ b/src/build.cc
@@ -406,7 +406,7 @@ void Plan::NodeFinished(Node* node) {
   }
 }
 
-void Plan::CleanNode(DependencyScan* scan, Node* node) {
+bool Plan::CleanNode(DependencyScan* scan, Node* node, string* err) {
   node->set_dirty(false);
 
   for (vector<Edge*>::const_iterator oe = node->out_edges().begin();
@@ -436,10 +436,16 @@ void Plan::CleanNode(DependencyScan* scan, Node* node) {
       // Now, this edge is dirty if any of the outputs are dirty.
       // If the edge isn't dirty, clean the outputs and mark the edge as not
       // wanted.
-      if (!scan->RecomputeOutputsDirty(*oe, most_recent_input)) {
+      bool outputs_dirty = false;
+      if (!scan->RecomputeOutputsDirty(*oe, most_recent_input,
+                                       &outputs_dirty, err)) {
+        return false;
+      }
+      if (!outputs_dirty) {
         for (vector<Node*>::iterator o = (*oe)->outputs_.begin();
              o != (*oe)->outputs_.end(); ++o) {
-          CleanNode(scan, *o);
+          if (!CleanNode(scan, *o, err))
+            return false;
         }
 
         want_e->second = false;
@@ -449,6 +455,7 @@ void Plan::CleanNode(DependencyScan* scan, Node* node) {
       }
     }
   }
+  return true;
 }
 
 void Plan::Dump() {
@@ -758,7 +765,8 @@ bool Builder::FinishCommand(CommandRunner::Result* result, string* err) {
         // The rule command did not change the output.  Propagate the clean
         // state through the build graph.
         // Note that this also applies to nonexistent outputs (mtime == 0).
-        plan_.CleanNode(&scan_, *o);
+        if (!plan_.CleanNode(&scan_, *o, err))
+          return false;
         node_cleaned = true;
       }
     }
@@ -805,6 +813,11 @@ bool Builder::FinishCommand(CommandRunner::Result* result, string* err) {
     assert(edge->outputs_.size() == 1 && "should have been rejected by parser");
     Node* out = edge->outputs_[0];
     TimeStamp deps_mtime = disk_interface_->Stat(out->path());
+    if (deps_mtime == -1) {
+      // TODO: Let DiskInterface::Stat() take err instead of it calling Error().
+      *err = "stat failed";
+      return false;
+    }
     if (!scan_.deps_log()->RecordDeps(out, deps_mtime, deps_nodes)) {
       *err = string("Error writing to deps log: ") + strerror(errno);
       return false;
diff --git a/src/build.h b/src/build.h
index eb3636a..5d5db80 100644
--- a/src/build.h
+++ b/src/build.h
@@ -61,7 +61,8 @@ struct Plan {
   void EdgeFinished(Edge* edge);
 
   /// Clean the given node during the build.
-  void CleanNode(DependencyScan* scan, Node* node);
+  /// Return false on error.
+  bool CleanNode(DependencyScan* scan, Node* node, string* err);
 
   /// Number of edges with commands to run.
   int command_edge_count() const { return command_edges_; }
diff --git a/src/build_test.cc b/src/build_test.cc
index 65d189d..0cdcd87 100644
--- a/src/build_test.cc
+++ b/src/build_test.cc
@@ -51,9 +51,9 @@ struct PlanTest : public StateTestWithBuiltinRules {
 };
 
 TEST_F(PlanTest, Basic) {
-  AssertParse(&state_,
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
 "build out: cat mid\n"
-"build mid: cat in\n");
+"build mid: cat in\n"));
   GetNode("mid")->MarkDirty();
   GetNode("out")->MarkDirty();
   string err;
@@ -84,9 +84,9 @@ TEST_F(PlanTest, Basic) {
 
 // Test that two outputs from one rule can be handled as inputs to the next.
 TEST_F(PlanTest, DoubleOutputDirect) {
-  AssertParse(&state_,
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
 "build out: cat mid1 mid2\n"
-"build mid1 mid2: cat in\n");
+"build mid1 mid2: cat in\n"));
   GetNode("mid1")->MarkDirty();
   GetNode("mid2")->MarkDirty();
   GetNode("out")->MarkDirty();
@@ -111,11 +111,11 @@ TEST_F(PlanTest, DoubleOutputDirect) {
 
 // Test that two outputs from one rule can eventually be routed to another.
 TEST_F(PlanTest, DoubleOutputIndirect) {
-  AssertParse(&state_,
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
 "build out: cat b1 b2\n"
 "build b1: cat a1\n"
 "build b2: cat a2\n"
-"build a1 a2: cat in\n");
+"build a1 a2: cat in\n"));
   GetNode("a1")->MarkDirty();
   GetNode("a2")->MarkDirty();
   GetNode("b1")->MarkDirty();
@@ -149,11 +149,11 @@ TEST_F(PlanTest, DoubleOutputIndirect) {
 
 // Test that two edges from one output can both execute.
 TEST_F(PlanTest, DoubleDependent) {
-  AssertParse(&state_,
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
 "build out: cat a1 a2\n"
 "build a1: cat mid\n"
 "build a2: cat mid\n"
-"build mid: cat in\n");
+"build mid: cat in\n"));
   GetNode("mid")->MarkDirty();
   GetNode("a1")->MarkDirty();
   GetNode("a2")->MarkDirty();
@@ -186,11 +186,11 @@ TEST_F(PlanTest, DoubleDependent) {
 }
 
 TEST_F(PlanTest, DependencyCycle) {
-  AssertParse(&state_,
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
 "build out: cat mid\n"
 "build mid: cat in\n"
 "build in: cat pre\n"
-"build pre: cat out\n");
+"build pre: cat out\n"));
   GetNode("out")->MarkDirty();
   GetNode("mid")->MarkDirty();
   GetNode("in")->MarkDirty();
@@ -1413,7 +1413,7 @@ TEST_F(BuildTest, RspFileFailure) {
   ASSERT_EQ("Another very long command", fs_.files_["out.rsp"].contents);
 }
 
-// Test that contens of the RSP file behaves like a regular part of
+// Test that contents of the RSP file behaves like a regular part of
 // command line, i.e. triggers a rebuild if changed
 TEST_F(BuildWithLogTest, RspFileCmdLineChange) {
   ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
@@ -1495,6 +1495,20 @@ TEST_F(BuildTest, InterruptCleanup) {
   EXPECT_EQ(0, fs_.Stat("out2"));
 }
 
+TEST_F(BuildTest, StatFailureAbortsBuild) {
+  const string kTooLongToStat(400, 'i');
+  ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
+("build " + kTooLongToStat + ": cat " + kTooLongToStat + "\n").c_str()));
+  // Also cyclic, for good measure.
+
+  // This simulates a stat failure:
+  fs_.files_[kTooLongToStat].mtime = -1;
+
+  string err;
+  EXPECT_FALSE(builder_.AddTarget(kTooLongToStat, &err));
+  EXPECT_EQ("stat failed", err);
+}
+
 TEST_F(BuildTest, PhonyWithNoInputs) {
   ASSERT_NO_FATAL_FAILURE(AssertParse(&state_,
 "build nonexistent: phony\n"
diff --git a/src/disk_interface_test.cc b/src/disk_interface_test.cc
index 05d509c..658fffd 100644
--- a/src/disk_interface_test.cc
+++ b/src/disk_interface_test.cc
@@ -204,7 +204,9 @@ TEST_F(StatTest, Simple) {
 "build out: cat in\n"));
 
   Node* out = GetNode("out");
-  out->Stat(this);
+  string err;
+  EXPECT_TRUE(out->Stat(this, &err));
+  EXPECT_EQ("", err);
   ASSERT_EQ(1u, stats_.size());
   scan_.RecomputeDirty(out->in_edge(), NULL);
   ASSERT_EQ(2u, stats_.size());
@@ -218,7 +220,9 @@ TEST_F(StatTest, TwoStep) {
 "build mid: cat in\n"));
 
   Node* out = GetNode("out");
-  out->Stat(this);
+  string err;
+  EXPECT_TRUE(out->Stat(this, &err));
+  EXPECT_EQ("", err);
   ASSERT_EQ(1u, stats_.size());
   scan_.RecomputeDirty(out->in_edge(), NULL);
   ASSERT_EQ(3u, stats_.size());
@@ -236,7 +240,9 @@ TEST_F(StatTest, Tree) {
 "build mid2: cat in21 in22\n"));
 
   Node* out = GetNode("out");
-  out->Stat(this);
+  string err;
+  EXPECT_TRUE(out->Stat(this, &err));
+  EXPECT_EQ("", err);
   ASSERT_EQ(1u, stats_.size());
   scan_.RecomputeDirty(out->in_edge(), NULL);
   ASSERT_EQ(1u + 6u, stats_.size());
@@ -255,7 +261,9 @@ TEST_F(StatTest, Middle) {
   mtimes_["out"] = 1;
 
   Node* out = GetNode("out");
-  out->Stat(this);
+  string err;
+  EXPECT_TRUE(out->Stat(this, &err));
+  EXPECT_EQ("", err);
   ASSERT_EQ(1u, stats_.size());
   scan_.RecomputeDirty(out->in_edge(), NULL);
   ASSERT_FALSE(GetNode("in")->dirty());
diff --git a/src/graph.cc b/src/graph.cc
index e3253fd..b19dc85 100644
--- a/src/graph.cc
+++ b/src/graph.cc
@@ -27,10 +27,15 @@
 #include "state.h"
 #include "util.h"
 
-bool Node::Stat(DiskInterface* disk_interface) {
+bool Node::Stat(DiskInterface* disk_interface, string* err) {
   METRIC_RECORD("node stat");
   mtime_ = disk_interface->Stat(path_);
-  return mtime_ > 0;
+  if (mtime_ == -1) {
+    // TODO: Let DiskInterface::Stat() take err instead of it calling Error().
+    *err = "stat failed";
+    return false;
+  }
+  return true;
 }
 
 bool DependencyScan::RecomputeDirty(Edge* edge, string* err) {
@@ -48,7 +53,8 @@ bool DependencyScan::RecomputeDirty(Edge* edge, string* err) {
   // graphs.
   for (vector<Node*>::iterator o = edge->outputs_.begin();
        o != edge->outputs_.end(); ++o) {
-    (*o)->StatIfNecessary(disk_interface_);
+    if (!(*o)->StatIfNecessary(disk_interface_, err))
+      return false;
   }
 
   if (!dep_loader_.LoadDeps(edge, err)) {
@@ -62,7 +68,9 @@ bool DependencyScan::RecomputeDirty(Edge* edge, string* err) {
   Node* most_recent_input = NULL;
   for (vector<Node*>::iterator i = edge->inputs_.begin();
        i != edge->inputs_.end(); ++i) {
-    if ((*i)->StatIfNecessary(disk_interface_)) {
+    if (!(*i)->status_known()) {
+      if (!(*i)->StatIfNecessary(disk_interface_, err))
+        return false;
       if (Edge* in_edge = (*i)->in_edge()) {
         if (!RecomputeDirty(in_edge, err))
           return false;
@@ -97,7 +105,8 @@ bool DependencyScan::RecomputeDirty(Edge* edge, string* err) {
   // We may also be dirty due to output state: missing outputs, out of
   // date outputs, etc.  Visit all outputs and determine whether they're dirty.
   if (!dirty)
-    dirty = RecomputeOutputsDirty(edge, most_recent_input);
+    if (!RecomputeOutputsDirty(edge, most_recent_input, &dirty, err))
+      return false;
 
   // Finally, visit each output and update their dirty state if necessary.
   for (vector<Node*>::iterator o = edge->outputs_.begin();
@@ -117,16 +126,19 @@ bool DependencyScan::RecomputeDirty(Edge* edge, string* err) {
   return true;
 }
 
-bool DependencyScan::RecomputeOutputsDirty(Edge* edge,
-                                           Node* most_recent_input) {
+bool DependencyScan::RecomputeOutputsDirty(Edge* edge, Node* most_recent_input,
+                                           bool* outputs_dirty, string* err) {
   string command = edge->EvaluateCommand(/*incl_rsp_file=*/true);
   for (vector<Node*>::iterator o = edge->outputs_.begin();
        o != edge->outputs_.end(); ++o) {
-    (*o)->StatIfNecessary(disk_interface_);
-    if (RecomputeOutputDirty(edge, most_recent_input, command, *o))
+    if (!(*o)->StatIfNecessary(disk_interface_, err))
+      return false;
+    if (RecomputeOutputDirty(edge, most_recent_input, command, *o)) {
+      *outputs_dirty = true;
       return true;
+    }
   }
-  return false;
+  return true;
 }
 
 bool DependencyScan::RecomputeOutputDirty(Edge* edge,
diff --git a/src/graph.h b/src/graph.h
index 9eafc05..9526712 100644
--- a/src/graph.h
+++ b/src/graph.h
@@ -41,15 +41,14 @@ struct Node {
         in_edge_(NULL),
         id_(-1) {}
 
-  /// Return true if the file exists (mtime_ got a value).
-  bool Stat(DiskInterface* disk_interface);
+  /// Return false on error.
+  bool Stat(DiskInterface* disk_interface, string* err);
 
-  /// Return true if we needed to stat.
-  bool StatIfNecessary(DiskInterface* disk_interface) {
+  /// Return false on error.
+  bool StatIfNecessary(DiskInterface* disk_interface, string* err) {
     if (status_known())
-      return false;
-    Stat(disk_interface);
-    return true;
+      return true;
+    return Stat(disk_interface, err);
   }
 
   /// Mark as not-yet-stat()ed and not dirty.
@@ -236,9 +235,10 @@ struct DependencyScan {
   /// Returns false on failure.
   bool RecomputeDirty(Edge* edge, string* err);
 
-  /// Recompute whether any output of the edge is dirty.
-  /// Returns true if so.
-  bool RecomputeOutputsDirty(Edge* edge, Node* most_recent_input);
+  /// Recompute whether any output of the edge is dirty, if so sets |*dirty|.
+  /// Returns false on failure.
+  bool RecomputeOutputsDirty(Edge* edge, Node* most_recent_input,
+                             bool* dirty, string* err);
 
   BuildLog* build_log() const {
     return build_log_;
-- 
cgit v0.12