From c5ea04bfc5d59fb82e8a6f0d356770883939f751 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 30 Apr 2013 09:24:48 -0700 Subject: Recompact the deps log when it gets too big. Now that Recompact() keeps all data structures intact, it can just be called at the beginning of a build and the build will still work. --- src/deps_log.cc | 21 +++++++++++++++++++-- src/deps_log.h | 8 +++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/deps_log.cc b/src/deps_log.cc index 7dbd5b2..931cc77 100644 --- a/src/deps_log.cc +++ b/src/deps_log.cc @@ -37,6 +37,12 @@ DepsLog::~DepsLog() { } bool DepsLog::OpenForWrite(const string& path, string* err) { + if (needs_recompaction_) { + Close(); + if (!Recompact(path, err)) + return false; + } + file_ = fopen(path.c_str(), "ab"); if (!file_) { *err = strerror(errno); @@ -161,6 +167,8 @@ bool DepsLog::Load(const string& path, State* state, string* err) { long offset; bool read_failed = false; + int unique_dep_record_count = 0; + int total_dep_record_count = 0; for (;;) { offset = ftell(f); @@ -193,8 +201,9 @@ bool DepsLog::Load(const string& path, State* state, string* err) { deps->nodes[i] = nodes_[deps_data[i]]; } - if (UpdateDeps(out_id, deps)) - ++dead_record_count_; + total_dep_record_count++; + if (!UpdateDeps(out_id, deps)) + ++unique_dep_record_count; } else { StringPiece path(buf, size); Node* node = state->GetNode(path); @@ -225,6 +234,14 @@ bool DepsLog::Load(const string& path, State* state, string* err) { fclose(f); + // Rebuild the log if there are too many dead records. + int kMinCompactionEntryCount = 1000; + int kCompactionRatio = 3; + if (total_dep_record_count > kMinCompactionEntryCount && + total_dep_record_count > unique_dep_record_count * kCompactionRatio) { + needs_recompaction_ = true; + } + return true; } diff --git a/src/deps_log.h b/src/deps_log.h index 7270916..de0fe63 100644 --- a/src/deps_log.h +++ b/src/deps_log.h @@ -61,7 +61,7 @@ struct State; /// wins, allowing updates to just be appended to the file. A separate /// repacking step can run occasionally to remove dead records. struct DepsLog { - DepsLog() : dead_record_count_(0), file_(NULL) {} + DepsLog() : needs_recompaction_(false), file_(NULL) {} ~DepsLog(); // Writing (build-time) interface. @@ -96,11 +96,9 @@ struct DepsLog { // Write a node name record, assigning it an id. bool RecordId(Node* node); - /// Number of deps record read while loading the file that ended up - /// being unused (due to a latter entry superceding it). - int dead_record_count_; - + bool needs_recompaction_; FILE* file_; + /// Maps id -> Node. vector nodes_; /// Maps id -> deps of that id. -- cgit v0.12