diff options
author | Evan Martin <martine@danga.com> | 2012-06-15 20:45:37 (GMT) |
---|---|---|
committer | Evan Martin <martine@danga.com> | 2012-06-15 20:45:37 (GMT) |
commit | de275b185943c588b25de31998133babc298a0e7 (patch) | |
tree | c831b1d94681da67af479d2a46ad8a7e0f0a2539 | |
parent | 8f686fae940e9d2dfb86cb8cbb5ee11176ae03de (diff) | |
parent | d59e82c4a098f4e3987871fae0fbd6347b64b1f2 (diff) | |
download | Ninja-de275b185943c588b25de31998133babc298a0e7.zip Ninja-de275b185943c588b25de31998133babc298a0e7.tar.gz Ninja-de275b185943c588b25de31998133babc298a0e7.tar.bz2 |
Merge pull request #329 from nico/hash2
Only store command hashes in the build log.
-rwxr-xr-x | configure.py | 21 | ||||
-rw-r--r-- | src/build_log.cc | 64 | ||||
-rw-r--r-- | src/build_log.h | 6 | ||||
-rw-r--r-- | src/build_log_test.cc | 12 | ||||
-rw-r--r-- | src/build_test.cc | 6 | ||||
-rw-r--r-- | src/graph.cc | 4 | ||||
-rw-r--r-- | src/hash_collision_bench.cc | 43 | ||||
-rw-r--r-- | src/test.cc | 5 | ||||
-rw-r--r-- | src/test.h | 1 | ||||
-rw-r--r-- | src/util.h | 1 |
10 files changed, 135 insertions, 28 deletions
diff --git a/configure.py b/configure.py index 7839dae..9eb2a48 100755 --- a/configure.py +++ b/configure.py @@ -314,21 +314,20 @@ n.newline() all_targets += ninja_test -n.comment('Perftest executables.') +n.comment('Ancilliary executables.') objs = cxx('parser_perftest') -parser_perftest = n.build(binary('parser_perftest'), 'link', objs, - implicit=ninja_lib, - variables=[('libs', libs)]) +all_targets += n.build(binary('parser_perftest'), 'link', objs, + implicit=ninja_lib, variables=[('libs', libs)]) objs = cxx('build_log_perftest') -build_log_perftest = n.build(binary('build_log_perftest'), 'link', objs, - implicit=ninja_lib, - variables=[('libs', libs)]) +all_targets += n.build(binary('build_log_perftest'), 'link', objs, + implicit=ninja_lib, variables=[('libs', libs)]) objs = cxx('canon_perftest') -canon_perftest = n.build(binary('canon_perftest'), 'link', objs, - implicit=ninja_lib, - variables=[('libs', libs)]) +all_targets += n.build(binary('canon_perftest'), 'link', objs, + implicit=ninja_lib, variables=[('libs', libs)]) +objs = cxx('hash_collision_bench') +all_targets += n.build(binary('hash_collision_bench'), 'link', objs, + implicit=ninja_lib, variables=[('libs', libs)]) n.newline() -all_targets += parser_perftest + build_log_perftest + canon_perftest n.comment('Generate a graph using the "graph" tool.') n.rule('gendot', diff --git a/src/build_log.cc b/src/build_log.cc index 5803ada..97c3344 100644 --- a/src/build_log.cc +++ b/src/build_log.cc @@ -37,10 +37,57 @@ namespace { const char kFileSignature[] = "# ninja log v%d\n"; -const int kCurrentVersion = 4; +const int kCurrentVersion = 5; + +// 64bit MurmurHash2, by Austin Appleby +#if defined(_MSC_VER) +#define BIG_CONSTANT(x) (x) +#else // defined(_MSC_VER) +#define BIG_CONSTANT(x) (x##LLU) +#endif // !defined(_MSC_VER) +inline +uint64_t MurmurHash64A(const void* key, int len) { + static const uint64_t seed = 0xDECAFBADDECAFBADull; + const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); + const int r = 47; + uint64_t h = seed ^ (len * m); + const uint64_t * data = (const uint64_t *)key; + const uint64_t * end = data + (len/8); + while(data != end) { + uint64_t k = *data++; + k *= m; + k ^= k >> r; + k *= m; + h ^= k; + h *= m; + } + const unsigned char* data2 = (const unsigned char*)data; + switch(len & 7) + { + case 7: h ^= uint64_t(data2[6]) << 48; + case 6: h ^= uint64_t(data2[5]) << 40; + case 5: h ^= uint64_t(data2[4]) << 32; + case 4: h ^= uint64_t(data2[3]) << 24; + case 3: h ^= uint64_t(data2[2]) << 16; + case 2: h ^= uint64_t(data2[1]) << 8; + case 1: h ^= uint64_t(data2[0]); + h *= m; + }; + h ^= h >> r; + h *= m; + h ^= h >> r; + return h; +} +#undef BIG_CONSTANT + } // namespace +// static +uint64_t BuildLog::LogEntry::HashCommand(StringPiece command) { + return MurmurHash64A(command.str_, command.len_); +} + BuildLog::BuildLog() : log_file_(NULL), config_(NULL), needs_recompaction_(false) {} @@ -95,7 +142,7 @@ void BuildLog::RecordCommand(Edge* edge, int start_time, int end_time, log_entry->output = path; log_.insert(Log::value_type(log_entry->output, log_entry)); } - log_entry->command = command; + log_entry->command_hash = LogEntry::HashCommand(command); log_entry->start_time = start_time; log_entry->end_time = end_time; log_entry->restat_mtime = restat_mtime; @@ -239,7 +286,14 @@ bool BuildLog::Load(const string& path, string* err) { entry->start_time = start_time; entry->end_time = end_time; entry->restat_mtime = restat_mtime; - entry->command = string(start, end - start); + if (log_version >= 5) { + char c = *end; *end = '\0'; + entry->command_hash = (uint64_t)strtoull(start, NULL, 10); + *end = c; + } + else + entry->command_hash = LogEntry::HashCommand(StringPiece(start, + end - start)); } // Decide whether it's time to rebuild the log: @@ -267,9 +321,9 @@ BuildLog::LogEntry* BuildLog::LookupByOutput(const string& path) { } void BuildLog::WriteEntry(FILE* f, const LogEntry& entry) { - fprintf(f, "%d\t%d\t%ld\t%s\t%s\n", + fprintf(f, "%d\t%d\t%ld\t%s\t%llu\n", entry.start_time, entry.end_time, (long) entry.restat_mtime, - entry.output.c_str(), entry.command.c_str()); + entry.output.c_str(), entry.command_hash); } bool BuildLog::Recompact(const string& path, string* err) { diff --git a/src/build_log.h b/src/build_log.h index da8e726..98ef7b3 100644 --- a/src/build_log.h +++ b/src/build_log.h @@ -49,14 +49,16 @@ struct BuildLog { struct LogEntry { string output; - string command; + uint64_t command_hash; int start_time; int end_time; TimeStamp restat_mtime; + static uint64_t HashCommand(StringPiece command); + // Used by tests. bool operator==(const LogEntry& o) { - return output == o.output && command == o.command && + return output == o.output && command_hash == o.command_hash && start_time == o.start_time && end_time == o.end_time && restat_mtime == o.restat_mtime; } diff --git a/src/build_log_test.cc b/src/build_log_test.cc index 199e016..afd3b81 100644 --- a/src/build_log_test.cc +++ b/src/build_log_test.cc @@ -110,7 +110,7 @@ TEST_F(BuildLogTest, DoubleEntry) { BuildLog::LogEntry* e = log.LookupByOutput("out"); ASSERT_TRUE(e); - ASSERT_EQ("command def", e->command); + ASSERT_NO_FATAL_FAILURE(AssertHash("command def", e->command_hash)); } TEST_F(BuildLogTest, Truncate) { @@ -164,7 +164,7 @@ TEST_F(BuildLogTest, UpgradeV3) { ASSERT_EQ(123, e->start_time); ASSERT_EQ(456, e->end_time); ASSERT_EQ(0, e->restat_mtime); - ASSERT_EQ("command", e->command); + ASSERT_NO_FATAL_FAILURE(AssertHash("command", e->command_hash)); } TEST_F(BuildLogTest, SpacesInOutputV4) { @@ -183,7 +183,7 @@ TEST_F(BuildLogTest, SpacesInOutputV4) { ASSERT_EQ(123, e->start_time); ASSERT_EQ(456, e->end_time); ASSERT_EQ(456, e->restat_mtime); - ASSERT_EQ("command", e->command); + ASSERT_NO_FATAL_FAILURE(AssertHash("command", e->command_hash)); } TEST_F(BuildLogTest, DuplicateVersionHeader) { @@ -207,14 +207,14 @@ TEST_F(BuildLogTest, DuplicateVersionHeader) { ASSERT_EQ(123, e->start_time); ASSERT_EQ(456, e->end_time); ASSERT_EQ(456, e->restat_mtime); - ASSERT_EQ("command", e->command); + ASSERT_NO_FATAL_FAILURE(AssertHash("command", e->command_hash)); e = log.LookupByOutput("out2"); ASSERT_TRUE(e); ASSERT_EQ(456, e->start_time); ASSERT_EQ(789, e->end_time); ASSERT_EQ(789, e->restat_mtime); - ASSERT_EQ("command2", e->command); + ASSERT_NO_FATAL_FAILURE(AssertHash("command2", e->command_hash)); } TEST_F(BuildLogTest, VeryLongInputLine) { @@ -242,5 +242,5 @@ TEST_F(BuildLogTest, VeryLongInputLine) { ASSERT_EQ(456, e->start_time); ASSERT_EQ(789, e->end_time); ASSERT_EQ(789, e->restat_mtime); - ASSERT_EQ("command2", e->command); + ASSERT_NO_FATAL_FAILURE(AssertHash("command2", e->command_hash)); } diff --git a/src/build_test.cc b/src/build_test.cc index c45f2b3..23f1909 100644 --- a/src/build_test.cc +++ b/src/build_test.cc @@ -953,8 +953,10 @@ TEST_F(BuildWithLogTest, RspFileCmdLineChange) { // (to simulate a change in the command line between 2 builds) BuildLog::LogEntry * log_entry = build_log_.LookupByOutput("out"); ASSERT_TRUE(NULL != log_entry); - ASSERT_EQ("cat out.rsp > out;rspfile=Original very long command", log_entry->command); - log_entry->command = "cat out.rsp > out;rspfile=Altered very long command"; + ASSERT_NO_FATAL_FAILURE(AssertHash( + "cat out.rsp > out;rspfile=Original very long command", + log_entry->command_hash)); + log_entry->command_hash++; // Change the command hash to something else. // Now expect the target to be rebuilt commands_ran_.clear(); state_.Reset(); diff --git a/src/graph.cc b/src/graph.cc index 3531e86..f381d8a 100644 --- a/src/graph.cc +++ b/src/graph.cc @@ -157,7 +157,7 @@ bool Edge::RecomputeOutputDirty(BuildLog* build_log, // dirty. if (!rule_->generator() && build_log && (entry || (entry = build_log->LookupByOutput(output->path())))) { - if (command != entry->command) { + if (BuildLog::LogEntry::HashCommand(command) != entry->command_hash) { EXPLAIN("command line changed for %s", output->path().c_str()); return true; } @@ -200,7 +200,7 @@ string EdgeEnv::LookupVariable(const string& var) { } else if (edge_->env_) { return edge_->env_->LookupVariable(var); } else { - // XXX shoudl we warn here? + // XXX should we warn here? return string(); } } diff --git a/src/hash_collision_bench.cc b/src/hash_collision_bench.cc new file mode 100644 index 0000000..d9b09d9 --- /dev/null +++ b/src/hash_collision_bench.cc @@ -0,0 +1,43 @@ +#include "build_log.h" + +int random(int low, int high) { + return int(low + (rand() / double(RAND_MAX)) * (high - low) + 0.5); +} + +void RandomCommand(char** s) { + int len = random(5, 100); + *s = new char[len]; + for (int i = 0; i < len; ++i) + (*s)[i] = random(32, 127); +} + +int main() { + const int N = 20 * 1000 * 1000; + + // Leak these, else 10% of the runtime is spent destroying strings. + char** commands = new char*[N]; + pair<uint64_t, int>* hashes = new pair<uint64_t, int>[N]; + + srand(time(NULL)); + + for (int i = 0; i < N; ++i) { + RandomCommand(&commands[i]); + hashes[i] = make_pair(BuildLog::LogEntry::HashCommand(commands[i]), i); + } + + sort(hashes, hashes + N); + + int num_collisions = 0; + for (int i = 1; i < N; ++i) { + if (hashes[i - 1].first == hashes[i].first) { + if (strcmp(commands[hashes[i - 1].second], + commands[hashes[i].second]) != 0) { + printf("collision!\n string 1: '%s'\n string 2: '%s'\n", + commands[hashes[i - 1].second], + commands[hashes[i].second]); + num_collisions++; + } + } + } + printf("\n\n%d collisions after %d runs\n", num_collisions, N); +} diff --git a/src/test.cc b/src/test.cc index 2fbb4df..afedd10 100644 --- a/src/test.cc +++ b/src/test.cc @@ -18,6 +18,7 @@ #include <errno.h> +#include "build_log.h" #include "parsers.h" #include "util.h" @@ -89,6 +90,10 @@ void AssertParse(State* state, const char* input) { ASSERT_EQ("", err); } +void AssertHash(const char* expected, uint64_t actual) { + ASSERT_EQ(BuildLog::LogEntry::HashCommand(expected), actual); +} + void VirtualFileSystem::Create(const string& path, int time, const string& contents) { files_[path].mtime = time; @@ -35,6 +35,7 @@ struct StateTestWithBuiltinRules : public testing::Test { }; void AssertParse(State* state, const char* input); +void AssertHash(const char* expected, uint64_t actual); /// An implementation of DiskInterface that uses an in-memory representation /// of disk state. It also logs file accesses and directory creations @@ -77,6 +77,7 @@ double GetLoadAverage(); #define fileno _fileno #define unlink _unlink #define chdir _chdir +#define strtoull _strtoui64 #endif #ifdef _WIN32 |