summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Martin <martine@danga.com>2012-12-17 17:08:15 (GMT)
committerEvan Martin <martine@danga.com>2013-04-08 21:45:06 (GMT)
commitb6a9a1c8adbb444c2489d884f06e5bd39627c3e9 (patch)
tree61a7e8e06a57e1ef054a48e6febe061ac51635a5
parent9f1852fa3c97197e1876f1d47ca45e66b5e6cd28 (diff)
downloadNinja-b6a9a1c8adbb444c2489d884f06e5bd39627c3e9.zip
Ninja-b6a9a1c8adbb444c2489d884f06e5bd39627c3e9.tar.gz
Ninja-b6a9a1c8adbb444c2489d884f06e5bd39627c3e9.tar.bz2
add DepsLog, a new data structure for dependency information
DepsLog is a compact serialization of dependency information. It can be used to replace depfiles for faster loading.
-rwxr-xr-xconfigure.py2
-rw-r--r--src/deps_log.cc149
-rw-r--r--src/deps_log.h91
-rw-r--r--src/deps_log_test.cc63
-rw-r--r--src/graph.h9
-rw-r--r--src/state.cc5
6 files changed, 316 insertions, 3 deletions
diff --git a/configure.py b/configure.py
index 10c6994..8f5a497 100755
--- a/configure.py
+++ b/configure.py
@@ -269,6 +269,7 @@ for name in ['build',
'build_log',
'clean',
'depfile_parser',
+ 'deps_log',
'disk_interface',
'edit_distance',
'eval_env',
@@ -348,6 +349,7 @@ for name in ['build_log_test',
'build_test',
'clean_test',
'depfile_parser_test',
+ 'deps_log_test',
'disk_interface_test',
'edit_distance_test',
'graph_test',
diff --git a/src/deps_log.cc b/src/deps_log.cc
new file mode 100644
index 0000000..ca7fd4b
--- /dev/null
+++ b/src/deps_log.cc
@@ -0,0 +1,149 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "deps_log.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include "graph.h"
+#include "state.h"
+#include "util.h"
+
+bool DepsLog::OpenForWrite(const string& path, string* err) {
+ file_ = fopen(path.c_str(), "ab");
+ if (!file_) {
+ *err = strerror(errno);
+ return false;
+ }
+ SetCloseOnExec(fileno(file_));
+
+ // Opening a file in append mode doesn't set the file pointer to the file's
+ // end on Windows. Do that explicitly.
+ fseek(file_, 0, SEEK_END);
+
+ /* XXX
+ if (ftell(log_file_) == 0) {
+ if (fprintf(log_file_, kFileSignature, kCurrentVersion) < 0) {
+ *err = strerror(errno);
+ return false;
+ }
+ }
+ */
+
+ return true;
+}
+
+bool DepsLog::RecordDeps(Node* node, TimeStamp mtime,
+ const vector<Node*>& nodes) {
+ // Assign ids to all nodes that are missing one.
+ if (node->id() < 0)
+ RecordId(node);
+ for (vector<Node*>::const_iterator i = nodes.begin();
+ i != nodes.end(); ++i) {
+ if ((*i)->id() < 0)
+ RecordId(*i);
+ }
+
+ uint16_t size = 4 * (1 + 1 + nodes.size());
+ size |= 0x8000; // Deps record: set high bit.
+ fwrite(&size, 2, 1, file_);
+ int id = node->id();
+ fwrite(&id, 4, 1, file_);
+ int timestamp = node->mtime();
+ fwrite(&timestamp, 4, 1, file_);
+ for (vector<Node*>::const_iterator i = nodes.begin();
+ i != nodes.end(); ++i) {
+ id = node->id();
+ fwrite(&id, 4, 1, file_);
+ }
+
+ return true;
+}
+
+void DepsLog::Close() {
+ fclose(file_);
+ file_ = NULL;
+}
+
+bool DepsLog::Load(const string& path, State* state, string* err) {
+ char buf[32 << 10];
+ FILE* f = fopen(path.c_str(), "rb");
+ if (!f) {
+ *err = strerror(errno);
+ return false;
+ }
+
+ int id = 0;
+ for (;;) {
+ uint16_t size;
+ if (fread(&size, 2, 1, f) < 1)
+ break;
+ bool is_deps = (size >> 15) != 0;
+ size = size & 0x7FFF;
+
+ if (fread(buf, size, 1, f) < 1)
+ break;
+
+ if (is_deps) {
+ assert(size % 4 == 0);
+ int* deps_data = reinterpret_cast<int*>(buf);
+ int out_id = deps_data[0];
+ int mtime = deps_data[1];
+ deps_data += 2;
+ int deps_count = (size / 4) - 2;
+
+ Deps* deps = new Deps;
+ deps->mtime = mtime;
+ deps->node_count = deps_count;
+ deps->nodes = new Node*[deps_count];
+ for (int i = 0; i < deps_count; ++i) {
+ assert(deps_data[i] < (int)nodes_.size());
+ assert(nodes_[deps_data[i]]);
+ deps->nodes[i] = nodes_[deps_data[i]];
+ }
+
+ if (out_id >= (int)deps_.size())
+ deps_.resize(out_id + 1);
+ if (deps_[out_id])
+ delete deps_[out_id];
+ deps_[out_id] = deps;
+ } else {
+ StringPiece path(buf, size);
+ Node* node = state->GetNode(path);
+ assert(node->id() < 0);
+ node->set_id(id);
+ ++id;
+ }
+ }
+ if (ferror(f)) {
+ *err = strerror(ferror(f));
+ return false;
+ }
+ fclose(f);
+ return true;
+}
+
+bool DepsLog::RecordId(Node* node) {
+ uint16_t size = node->path().size();
+ fwrite(&size, 2, 1, file_);
+ fwrite(node->path().data(), node->path().size(), 1, file_);
+
+ node->set_id(nodes_.size());
+ nodes_.push_back(node);
+
+ return true;
+}
diff --git a/src/deps_log.h b/src/deps_log.h
new file mode 100644
index 0000000..45d2cea
--- /dev/null
+++ b/src/deps_log.h
@@ -0,0 +1,91 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef NINJA_DEPS_LOG_H_
+#define NINJA_DEPS_LOG_H_
+
+#include <string>
+#include <vector>
+using namespace std;
+
+#include <stdio.h>
+
+#include "timestamp.h"
+
+struct Node;
+struct State;
+
+/// As build commands run they can output extra dependency information
+/// (e.g. header dependencies for C source) via a pipe. DepsLog collects
+/// that information at build time and reloads it at startup.
+///
+/// The on-disk format is based on two primary constraints:
+/// - it must be written to as a stream (during the build, which may be
+/// interrupted);
+/// - it can be read all at once on startup. (Alternative designs, where
+/// it contains indexing information, were considered and discarded as
+/// too complicated to implement; if the file is small than reading it
+/// fully on startup is acceptable.)
+/// Here are some stats from the Windows Chrome dependency files, to
+/// help guide the design space. The total text in the files sums to
+/// 90mb so some compression is warranted to keep load-time fast.
+/// There's about 10k files worth of dependencies that reference about
+/// 40k total paths totalling 2mb of unique strings.
+///
+/// Based on these above, the file is structured as a sequence of records.
+/// Each record is either a path string or a dependency list.
+/// Numbering the path strings in file order gives them dense integer ids.
+/// A dependency list maps an output id to a list of input ids.
+///
+/// Concretely, a record is:
+/// two bytes record length, high bit indicates record type
+/// (implies max record length 32k)
+/// path records contain just the string name of the path
+/// dependency records are an array of 4-byte integers
+/// [output path id, output path mtime, input path id, input path id...]
+/// (The mtime is compared against the on-disk output path mtime
+/// to verify the stored data is up-to-date.)
+/// If two records reference the same output the latter one in the file
+/// wins, allowing updates to just be appended to the file. A separate
+/// repacking step can run occasionally to remove dead records.
+struct DepsLog {
+
+ // Writing (build-time) interface.
+ bool OpenForWrite(const string& path, string* err);
+ bool RecordDeps(Node* node, TimeStamp mtime, const vector<Node*>& nodes);
+ void Close();
+
+ // Reading (startup-time) interface.
+ bool Load(const string& path, State* state, string* err);
+
+ private:
+ // Write a node name record, assigning it an id.
+ bool RecordId(Node* node);
+
+ struct Deps {
+ Deps() : mtime(-1), node_count(0), nodes(NULL) {}
+ ~Deps() { delete [] nodes; }
+ int mtime;
+ int node_count;
+ Node** nodes;
+ };
+
+ FILE* file_;
+ vector<Node*> nodes_;
+ vector<Deps*> deps_;
+
+ friend struct DepsLogTest;
+};
+
+#endif // NINJA_DEPS_LOG_H_
diff --git a/src/deps_log_test.cc b/src/deps_log_test.cc
new file mode 100644
index 0000000..540865b
--- /dev/null
+++ b/src/deps_log_test.cc
@@ -0,0 +1,63 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "deps_log.h"
+
+#include "graph.h"
+#include "util.h"
+#include "test.h"
+
+namespace {
+
+const char kTestFilename[] = "DepsLogTest-tempfile";
+
+struct DepsLogTest : public testing::Test {
+ virtual void SetUp() {
+ // In case a crashing test left a stale file behind.
+ unlink(kTestFilename);
+ }
+ virtual void TearDown() {
+ //unlink(kTestFilename);
+ }
+};
+
+TEST_F(DepsLogTest, WriteRead) {
+ State state1;
+ DepsLog log1;
+ string err;
+ EXPECT_TRUE(log1.OpenForWrite(kTestFilename, &err));
+ ASSERT_EQ("", err);
+
+ vector<Node*> deps;
+ deps.push_back(state1.GetNode("foo.h"));
+ deps.push_back(state1.GetNode("bar.h"));
+ log1.RecordDeps(state1.GetNode("out.o"), 1, deps);
+
+ deps.clear();
+ deps.push_back(state1.GetNode("foo.h"));
+ deps.push_back(state1.GetNode("bar2.h"));
+ log1.RecordDeps(state1.GetNode("out2.o"), 2, deps);
+
+ log1.Close();
+
+ State state2;
+ DepsLog log2;
+ EXPECT_TRUE(log1.Load(kTestFilename, &state2, &err));
+ ASSERT_EQ("", err);
+ state2.Dump();
+
+ state2.GetNode("out2.o")->Dump();
+}
+
+} // anonymous namespace
diff --git a/src/graph.h b/src/graph.h
index 8b93e29..4ef05ec 100644
--- a/src/graph.h
+++ b/src/graph.h
@@ -32,7 +32,8 @@ struct Node {
: path_(path),
mtime_(-1),
dirty_(false),
- in_edge_(NULL) {}
+ in_edge_(NULL),
+ id_(-1) {}
/// Return true if the file exists (mtime_ got a value).
bool Stat(DiskInterface* disk_interface);
@@ -74,6 +75,9 @@ struct Node {
Edge* in_edge() const { return in_edge_; }
void set_in_edge(Edge* edge) { in_edge_ = edge; }
+ int id() const { return id_; }
+ void set_id(int id) { id_ = id; }
+
const vector<Edge*>& out_edges() const { return out_edges_; }
void AddOutEdge(Edge* edge) { out_edges_.push_back(edge); }
@@ -98,6 +102,9 @@ private:
/// All Edges that use this Node as an input.
vector<Edge*> out_edges_;
+
+ /// A dense integer id for the node, assigned and used by DepsLog.
+ int id_;
};
/// An invokable build command and associated metadata (description, etc.).
diff --git a/src/state.cc b/src/state.cc
index 9f46fee..d2d5ebe 100644
--- a/src/state.cc
+++ b/src/state.cc
@@ -202,10 +202,11 @@ void State::Reset() {
void State::Dump() {
for (Paths::iterator i = paths_.begin(); i != paths_.end(); ++i) {
Node* node = i->second;
- printf("%s %s\n",
+ printf("%s %s [id:%d]\n",
node->path().c_str(),
node->status_known() ? (node->dirty() ? "dirty" : "clean")
- : "unknown");
+ : "unknown",
+ node->id());
}
if (!pools_.empty()) {
printf("resource_pools:\n");