summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEvan Martin <martine@danga.com>2012-06-15 20:45:37 (GMT)
committerEvan Martin <martine@danga.com>2012-06-15 20:45:37 (GMT)
commitde275b185943c588b25de31998133babc298a0e7 (patch)
treec831b1d94681da67af479d2a46ad8a7e0f0a2539 /src
parent8f686fae940e9d2dfb86cb8cbb5ee11176ae03de (diff)
parentd59e82c4a098f4e3987871fae0fbd6347b64b1f2 (diff)
downloadNinja-de275b185943c588b25de31998133babc298a0e7.zip
Ninja-de275b185943c588b25de31998133babc298a0e7.tar.gz
Ninja-de275b185943c588b25de31998133babc298a0e7.tar.bz2
Merge pull request #329 from nico/hash2
Only store command hashes in the build log.
Diffstat (limited to 'src')
-rw-r--r--src/build_log.cc64
-rw-r--r--src/build_log.h6
-rw-r--r--src/build_log_test.cc12
-rw-r--r--src/build_test.cc6
-rw-r--r--src/graph.cc4
-rw-r--r--src/hash_collision_bench.cc43
-rw-r--r--src/test.cc5
-rw-r--r--src/test.h1
-rw-r--r--src/util.h1
9 files changed, 125 insertions, 17 deletions
diff --git a/src/build_log.cc b/src/build_log.cc
index 5803ada..97c3344 100644
--- a/src/build_log.cc
+++ b/src/build_log.cc
@@ -37,10 +37,57 @@
namespace {
const char kFileSignature[] = "# ninja log v%d\n";
-const int kCurrentVersion = 4;
+const int kCurrentVersion = 5;
+
+// 64bit MurmurHash2, by Austin Appleby
+#if defined(_MSC_VER)
+#define BIG_CONSTANT(x) (x)
+#else // defined(_MSC_VER)
+#define BIG_CONSTANT(x) (x##LLU)
+#endif // !defined(_MSC_VER)
+inline
+uint64_t MurmurHash64A(const void* key, int len) {
+ static const uint64_t seed = 0xDECAFBADDECAFBADull;
+ const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995);
+ const int r = 47;
+ uint64_t h = seed ^ (len * m);
+ const uint64_t * data = (const uint64_t *)key;
+ const uint64_t * end = data + (len/8);
+ while(data != end) {
+ uint64_t k = *data++;
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+ h ^= k;
+ h *= m;
+ }
+ const unsigned char* data2 = (const unsigned char*)data;
+ switch(len & 7)
+ {
+ case 7: h ^= uint64_t(data2[6]) << 48;
+ case 6: h ^= uint64_t(data2[5]) << 40;
+ case 5: h ^= uint64_t(data2[4]) << 32;
+ case 4: h ^= uint64_t(data2[3]) << 24;
+ case 3: h ^= uint64_t(data2[2]) << 16;
+ case 2: h ^= uint64_t(data2[1]) << 8;
+ case 1: h ^= uint64_t(data2[0]);
+ h *= m;
+ };
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
+ return h;
+}
+#undef BIG_CONSTANT
+
} // namespace
+// static
+uint64_t BuildLog::LogEntry::HashCommand(StringPiece command) {
+ return MurmurHash64A(command.str_, command.len_);
+}
+
BuildLog::BuildLog()
: log_file_(NULL), config_(NULL), needs_recompaction_(false) {}
@@ -95,7 +142,7 @@ void BuildLog::RecordCommand(Edge* edge, int start_time, int end_time,
log_entry->output = path;
log_.insert(Log::value_type(log_entry->output, log_entry));
}
- log_entry->command = command;
+ log_entry->command_hash = LogEntry::HashCommand(command);
log_entry->start_time = start_time;
log_entry->end_time = end_time;
log_entry->restat_mtime = restat_mtime;
@@ -239,7 +286,14 @@ bool BuildLog::Load(const string& path, string* err) {
entry->start_time = start_time;
entry->end_time = end_time;
entry->restat_mtime = restat_mtime;
- entry->command = string(start, end - start);
+ if (log_version >= 5) {
+ char c = *end; *end = '\0';
+ entry->command_hash = (uint64_t)strtoull(start, NULL, 10);
+ *end = c;
+ }
+ else
+ entry->command_hash = LogEntry::HashCommand(StringPiece(start,
+ end - start));
}
// Decide whether it's time to rebuild the log:
@@ -267,9 +321,9 @@ BuildLog::LogEntry* BuildLog::LookupByOutput(const string& path) {
}
void BuildLog::WriteEntry(FILE* f, const LogEntry& entry) {
- fprintf(f, "%d\t%d\t%ld\t%s\t%s\n",
+ fprintf(f, "%d\t%d\t%ld\t%s\t%llu\n",
entry.start_time, entry.end_time, (long) entry.restat_mtime,
- entry.output.c_str(), entry.command.c_str());
+ entry.output.c_str(), entry.command_hash);
}
bool BuildLog::Recompact(const string& path, string* err) {
diff --git a/src/build_log.h b/src/build_log.h
index da8e726..98ef7b3 100644
--- a/src/build_log.h
+++ b/src/build_log.h
@@ -49,14 +49,16 @@ struct BuildLog {
struct LogEntry {
string output;
- string command;
+ uint64_t command_hash;
int start_time;
int end_time;
TimeStamp restat_mtime;
+ static uint64_t HashCommand(StringPiece command);
+
// Used by tests.
bool operator==(const LogEntry& o) {
- return output == o.output && command == o.command &&
+ return output == o.output && command_hash == o.command_hash &&
start_time == o.start_time && end_time == o.end_time &&
restat_mtime == o.restat_mtime;
}
diff --git a/src/build_log_test.cc b/src/build_log_test.cc
index 199e016..afd3b81 100644
--- a/src/build_log_test.cc
+++ b/src/build_log_test.cc
@@ -110,7 +110,7 @@ TEST_F(BuildLogTest, DoubleEntry) {
BuildLog::LogEntry* e = log.LookupByOutput("out");
ASSERT_TRUE(e);
- ASSERT_EQ("command def", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command def", e->command_hash));
}
TEST_F(BuildLogTest, Truncate) {
@@ -164,7 +164,7 @@ TEST_F(BuildLogTest, UpgradeV3) {
ASSERT_EQ(123, e->start_time);
ASSERT_EQ(456, e->end_time);
ASSERT_EQ(0, e->restat_mtime);
- ASSERT_EQ("command", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command", e->command_hash));
}
TEST_F(BuildLogTest, SpacesInOutputV4) {
@@ -183,7 +183,7 @@ TEST_F(BuildLogTest, SpacesInOutputV4) {
ASSERT_EQ(123, e->start_time);
ASSERT_EQ(456, e->end_time);
ASSERT_EQ(456, e->restat_mtime);
- ASSERT_EQ("command", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command", e->command_hash));
}
TEST_F(BuildLogTest, DuplicateVersionHeader) {
@@ -207,14 +207,14 @@ TEST_F(BuildLogTest, DuplicateVersionHeader) {
ASSERT_EQ(123, e->start_time);
ASSERT_EQ(456, e->end_time);
ASSERT_EQ(456, e->restat_mtime);
- ASSERT_EQ("command", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command", e->command_hash));
e = log.LookupByOutput("out2");
ASSERT_TRUE(e);
ASSERT_EQ(456, e->start_time);
ASSERT_EQ(789, e->end_time);
ASSERT_EQ(789, e->restat_mtime);
- ASSERT_EQ("command2", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command2", e->command_hash));
}
TEST_F(BuildLogTest, VeryLongInputLine) {
@@ -242,5 +242,5 @@ TEST_F(BuildLogTest, VeryLongInputLine) {
ASSERT_EQ(456, e->start_time);
ASSERT_EQ(789, e->end_time);
ASSERT_EQ(789, e->restat_mtime);
- ASSERT_EQ("command2", e->command);
+ ASSERT_NO_FATAL_FAILURE(AssertHash("command2", e->command_hash));
}
diff --git a/src/build_test.cc b/src/build_test.cc
index c45f2b3..23f1909 100644
--- a/src/build_test.cc
+++ b/src/build_test.cc
@@ -953,8 +953,10 @@ TEST_F(BuildWithLogTest, RspFileCmdLineChange) {
// (to simulate a change in the command line between 2 builds)
BuildLog::LogEntry * log_entry = build_log_.LookupByOutput("out");
ASSERT_TRUE(NULL != log_entry);
- ASSERT_EQ("cat out.rsp > out;rspfile=Original very long command", log_entry->command);
- log_entry->command = "cat out.rsp > out;rspfile=Altered very long command";
+ ASSERT_NO_FATAL_FAILURE(AssertHash(
+ "cat out.rsp > out;rspfile=Original very long command",
+ log_entry->command_hash));
+ log_entry->command_hash++; // Change the command hash to something else.
// Now expect the target to be rebuilt
commands_ran_.clear();
state_.Reset();
diff --git a/src/graph.cc b/src/graph.cc
index 3531e86..f381d8a 100644
--- a/src/graph.cc
+++ b/src/graph.cc
@@ -157,7 +157,7 @@ bool Edge::RecomputeOutputDirty(BuildLog* build_log,
// dirty.
if (!rule_->generator() && build_log &&
(entry || (entry = build_log->LookupByOutput(output->path())))) {
- if (command != entry->command) {
+ if (BuildLog::LogEntry::HashCommand(command) != entry->command_hash) {
EXPLAIN("command line changed for %s", output->path().c_str());
return true;
}
@@ -200,7 +200,7 @@ string EdgeEnv::LookupVariable(const string& var) {
} else if (edge_->env_) {
return edge_->env_->LookupVariable(var);
} else {
- // XXX shoudl we warn here?
+ // XXX should we warn here?
return string();
}
}
diff --git a/src/hash_collision_bench.cc b/src/hash_collision_bench.cc
new file mode 100644
index 0000000..d9b09d9
--- /dev/null
+++ b/src/hash_collision_bench.cc
@@ -0,0 +1,43 @@
+#include "build_log.h"
+
+int random(int low, int high) {
+ return int(low + (rand() / double(RAND_MAX)) * (high - low) + 0.5);
+}
+
+void RandomCommand(char** s) {
+ int len = random(5, 100);
+ *s = new char[len];
+ for (int i = 0; i < len; ++i)
+ (*s)[i] = random(32, 127);
+}
+
+int main() {
+ const int N = 20 * 1000 * 1000;
+
+ // Leak these, else 10% of the runtime is spent destroying strings.
+ char** commands = new char*[N];
+ pair<uint64_t, int>* hashes = new pair<uint64_t, int>[N];
+
+ srand(time(NULL));
+
+ for (int i = 0; i < N; ++i) {
+ RandomCommand(&commands[i]);
+ hashes[i] = make_pair(BuildLog::LogEntry::HashCommand(commands[i]), i);
+ }
+
+ sort(hashes, hashes + N);
+
+ int num_collisions = 0;
+ for (int i = 1; i < N; ++i) {
+ if (hashes[i - 1].first == hashes[i].first) {
+ if (strcmp(commands[hashes[i - 1].second],
+ commands[hashes[i].second]) != 0) {
+ printf("collision!\n string 1: '%s'\n string 2: '%s'\n",
+ commands[hashes[i - 1].second],
+ commands[hashes[i].second]);
+ num_collisions++;
+ }
+ }
+ }
+ printf("\n\n%d collisions after %d runs\n", num_collisions, N);
+}
diff --git a/src/test.cc b/src/test.cc
index 2fbb4df..afedd10 100644
--- a/src/test.cc
+++ b/src/test.cc
@@ -18,6 +18,7 @@
#include <errno.h>
+#include "build_log.h"
#include "parsers.h"
#include "util.h"
@@ -89,6 +90,10 @@ void AssertParse(State* state, const char* input) {
ASSERT_EQ("", err);
}
+void AssertHash(const char* expected, uint64_t actual) {
+ ASSERT_EQ(BuildLog::LogEntry::HashCommand(expected), actual);
+}
+
void VirtualFileSystem::Create(const string& path, int time,
const string& contents) {
files_[path].mtime = time;
diff --git a/src/test.h b/src/test.h
index 97f7cb1..37ca1f9 100644
--- a/src/test.h
+++ b/src/test.h
@@ -35,6 +35,7 @@ struct StateTestWithBuiltinRules : public testing::Test {
};
void AssertParse(State* state, const char* input);
+void AssertHash(const char* expected, uint64_t actual);
/// An implementation of DiskInterface that uses an in-memory representation
/// of disk state. It also logs file accesses and directory creations
diff --git a/src/util.h b/src/util.h
index 399913e..82f4850 100644
--- a/src/util.h
+++ b/src/util.h
@@ -77,6 +77,7 @@ double GetLoadAverage();
#define fileno _fileno
#define unlink _unlink
#define chdir _chdir
+#define strtoull _strtoui64
#endif
#ifdef _WIN32