From 4d463b48b8feb0691719a520d7ab825ece377a52 Mon Sep 17 00:00:00 2001 From: Evan Martin Date: Sun, 23 Jan 2011 11:28:38 -0800 Subject: recompact log files --- src/build_log.cc | 71 +++++++++++++++++++++++++++++++++++++++++++++++++------- src/build_log.h | 7 ++++++ todo | 2 -- 3 files changed, 70 insertions(+), 10 deletions(-) diff --git a/src/build_log.cc b/src/build_log.cc index 53fca4e..b5b429a 100644 --- a/src/build_log.cc +++ b/src/build_log.cc @@ -11,11 +11,17 @@ // Each run's log appends to the log file. // To load, we run through all log entries in series, throwing away // older runs. -// XXX figure out recompaction strategy +// Once the number of redundant entries exceeds a threshold, we write +// out a new file and replace the existing one with it. -BuildLog::BuildLog() : log_file_(NULL) {} +BuildLog::BuildLog() : log_file_(NULL), needs_recompaction_(false) {} bool BuildLog::OpenForWrite(const string& path, string* err) { + if (needs_recompaction_) { + if (!Recompact(path, err)) + return false; + } + log_file_ = fopen(path.c_str(), "ab"); if (!log_file_) { *err = strerror(errno); @@ -45,7 +51,7 @@ void BuildLog::RecordCommand(Edge* edge, int time_ms) { log_entry->command = command; log_entry->time_ms = time_ms; - fprintf(log_file_, "%d %s %s\n", time_ms, path.c_str(), command.c_str()); + WriteEntry(log_file_, *log_entry); } } @@ -64,6 +70,9 @@ bool BuildLog::Load(const string& path, string* err) { return false; } + int unique_entry_count = 0; + int total_entry_count = 0; + char buf[256 << 10]; while (fgets(buf, sizeof(buf), file)) { char* start = buf; @@ -71,20 +80,37 @@ bool BuildLog::Load(const string& path, string* err) { if (!end) continue; - LogEntry* entry = new LogEntry; *end = 0; - entry->time_ms = atoi(start); - + int time_ms = atoi(start); start = end + 1; end = strchr(start, ' '); - entry->output = string(start, end - start); + string output = string(start, end - start); + + LogEntry* entry; + Log::iterator i = log_.find(output); + if (i != log_.end()) { + entry = i->second; + } else { + entry = new LogEntry; + log_.insert(make_pair(output, entry)); + ++unique_entry_count; + } + ++total_entry_count; + + entry->time_ms = time_ms; + entry->output = output; start = end + 1; end = strchr(start, '\n'); entry->command = string(start, end - start); - log_[entry->output] = entry; } + // Mark the log as "needs rebuiding" if it has kCompactionRatio times + // too many log entries. + int kCompactionRatio = 3; + if (total_entry_count > unique_entry_count * kCompactionRatio) + needs_recompaction_ = true; + return true; } @@ -95,3 +121,32 @@ BuildLog::LogEntry* BuildLog::LookupByOutput(const string& path) { return i->second; return NULL; } + +void BuildLog::WriteEntry(FILE* f, const LogEntry& entry) { + fprintf(f, "%d %s %s\n", + entry.time_ms, entry.output.c_str(), entry.command.c_str()); +} + +bool BuildLog::Recompact(const string& path, string* err) { + printf("Recompacting log...\n"); + + string temp_path = path + ".recompact"; + FILE* f = fopen(temp_path.c_str(), "wb"); + if (!f) { + *err = strerror(errno); + return false; + } + + for (Log::iterator i = log_.begin(); i != log_.end(); ++i) { + WriteEntry(f, *i->second); + } + + fclose(f); + + if (rename(temp_path.c_str(), path.c_str()) < 0) { + *err = strerror(errno); + return false; + } + + return true; +} diff --git a/src/build_log.h b/src/build_log.h index 2442f70..265f378 100644 --- a/src/build_log.h +++ b/src/build_log.h @@ -33,7 +33,14 @@ struct BuildLog { // Lookup a previously-run command by its output path. LogEntry* LookupByOutput(const string& path); + // Serialize an entry into a log file. + void WriteEntry(FILE* f, const LogEntry& entry); + + // Rewrite the known log entries, throwing away old data. + bool Recompact(const string& path, string* err); + typedef map Log; Log log_; FILE* log_file_; + bool needs_recompaction_; }; diff --git a/todo b/todo index ddd350a..3b3c5df 100644 --- a/todo +++ b/todo @@ -3,8 +3,6 @@ necessary delete halfway-built output files when interrupted -recompact log files - frosting ======== -- cgit v0.12