From 9940196d57d46eb8aeb1cc28a57dee290034fccf Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 28 Aug 2013 13:29:02 -0700 Subject: Deps log format v3 This fixes two bugs. 1.) It makes the record size field 4 byte instead of 2, and allows a max record size of 512kB. This fixes #605, ninja "only" supporting 8k dependencies per file -- 512kB have room for 128k dependencies. (If that's still not enough, the current design can support at least 4x that if the constant is tweaked.) 2.) It makes all records 4-byte aligned, which is probably needed to make the `reinterpret_cast(buf)` work on SPARC (cf issue #481), and it's generally nicer too. (Since there's already one reinterpret_cast, convert a memcpy() to reinterpret_cast in another branch too). --- src/deps_log.cc | 59 +++++++++++++++++++++++++++++++-------------------------- src/deps_log.h | 7 ++++--- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/src/deps_log.cc b/src/deps_log.cc index ee49d6b..a5d726c 100644 --- a/src/deps_log.cc +++ b/src/deps_log.cc @@ -30,15 +30,11 @@ // The version is stored as 4 bytes after the signature and also serves as a // byte order mark. Signature and version combined are 16 bytes long. const char kFileSignature[] = "# ninjadeps\n"; -const int kCurrentVersion = 2; +const int kCurrentVersion = 3; -// Since the size field is 2 bytes and the top bit marks deps entries, a single -// record can be at most 32 kB. Set the buffer size to this and flush the file -// buffer after every record to make sure records aren't written partially. -const int kMaxBufferSize = 1 << 15; - -// Record size is currently limited to 15 bit -const size_t kMaxRecordSize = (1 << 15) - 1; +// Record size is currently limited to less than the full 32 bit, due to +// internal buffers having to have this size. +const unsigned kMaxRecordSize = (1 << 19) - 1; DepsLog::~DepsLog() { Close(); @@ -55,7 +51,9 @@ bool DepsLog::OpenForWrite(const string& path, string* err) { *err = strerror(errno); return false; } - setvbuf(file_, NULL, _IOFBF, kMaxBufferSize); + // Set the buffer size to this and flush the file buffer after every record + // to make sure records aren't written partially. + setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1); SetCloseOnExec(fileno(file_)); // Opening a file in append mode doesn't set the file pointer to the file's @@ -126,14 +124,13 @@ bool DepsLog::RecordDeps(Node* node, TimeStamp mtime, return true; // Update on-disk representation. - size_t size = 4 * (1 + 1 + (uint16_t)node_count); + unsigned size = 4 * (1 + 1 + (uint16_t)node_count); if (size > kMaxRecordSize) { errno = ERANGE; return false; } - size |= 0x8000; // Deps record: set high bit. - uint16_t size16 = (uint16_t)size; - if (fwrite(&size16, 2, 1, file_) < 1) + size |= 0x80000000; // Deps record: set high bit. + if (fwrite(&size, 4, 1, file_) < 1) return false; int id = node->id(); if (fwrite(&id, 4, 1, file_) < 1) @@ -147,7 +144,7 @@ bool DepsLog::RecordDeps(Node* node, TimeStamp mtime, return false; } if (fflush(file_) != 0) - return false; + return false; // Update in-memory representation. Deps* deps = new Deps(mtime, node_count); @@ -166,7 +163,7 @@ void DepsLog::Close() { bool DepsLog::Load(const string& path, State* state, string* err) { METRIC_RECORD(".ninja_deps load"); - char buf[32 << 10]; + char buf[kMaxRecordSize + 1]; FILE* f = fopen(path.c_str(), "rb"); if (!f) { if (errno == ENOENT) @@ -181,7 +178,8 @@ bool DepsLog::Load(const string& path, State* state, string* err) { valid_header = false; // Note: For version differences, this should migrate to the new format. // But the v1 format could sometimes (rarely) end up with invalid data, so - // don't migrate v1 to v2 to force a rebuild. + // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days, + // and there was no release with it, so pretend that it never happened.) if (!valid_header || strcmp(buf, kFileSignature) != 0 || version != kCurrentVersion) { if (version == 1) @@ -202,16 +200,16 @@ bool DepsLog::Load(const string& path, State* state, string* err) { for (;;) { offset = ftell(f); - uint16_t size; - if (fread(&size, 2, 1, f) < 1) { + unsigned size; + if (fread(&size, 4, 1, f) < 1) { if (!feof(f)) read_failed = true; break; } - bool is_deps = (size >> 15) != 0; - size = size & 0x7FFF; + bool is_deps = (size >> 31) != 0; + size = size & 0x7FFFFFFF; - if (fread(buf, size, 1, f) < 1) { + if (fread(buf, size, 1, f) < 1 || size > kMaxRecordSize) { read_failed = true; break; } @@ -236,6 +234,10 @@ bool DepsLog::Load(const string& path, State* state, string* err) { ++unique_dep_record_count; } else { int path_size = size - 4; + // There can be up to 3 bytes of padding. + if (buf[path_size - 1] == '\0') --path_size; + if (buf[path_size - 1] == '\0') --path_size; + if (buf[path_size - 1] == '\0') --path_size; StringPiece path(buf, path_size); Node* node = state->GetNode(path); @@ -243,8 +245,7 @@ bool DepsLog::Load(const string& path, State* state, string* err) { // happen if two ninja processes write to the same deps log concurrently. // (This uses unary complement to make the checksum look less like a // dependency record entry.) - unsigned checksum; - memcpy(&checksum, buf + path_size, sizeof checksum); + unsigned checksum = *reinterpret_cast(buf + size - 4); int expected_id = ~checksum; int id = nodes_.size(); if (id != expected_id) { @@ -361,18 +362,22 @@ bool DepsLog::UpdateDeps(int out_id, Deps* deps) { } bool DepsLog::RecordId(Node* node) { - size_t size = node->path().size() + 4; + int path_size = node->path().size(); + int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary. + + unsigned size = path_size + padding + 4; if (size > kMaxRecordSize) { errno = ERANGE; return false; } - uint16_t size16 = (uint16_t)size; - if (fwrite(&size16, 2, 1, file_) < 1) + if (fwrite(&size, 4, 1, file_) < 1) return false; - if (fwrite(node->path().data(), node->path().size(), 1, file_) < 1) { + if (fwrite(node->path().data(), path_size, 1, file_) < 1) { assert(node->path().size() > 0); return false; } + if (padding && fwrite("\0\0", padding, 1, file_) < 1) + return false; int id = nodes_.size(); unsigned checksum = ~(unsigned)id; if (fwrite(&checksum, 4, 1, file_) < 1) diff --git a/src/deps_log.h b/src/deps_log.h index 47ade03..babf828 100644 --- a/src/deps_log.h +++ b/src/deps_log.h @@ -50,9 +50,10 @@ struct State; /// A dependency list maps an output id to a list of input ids. /// /// Concretely, a record is: -/// two bytes record length, high bit indicates record type -/// (implies max record length 32k) -/// path records contain the string name of the path, followed by the +/// four bytes record length, high bit indicates record type +/// (but max record sizes are capped at 512kB) +/// path records contain the string name of the path, followed by up to 3 +/// padding bytes to align on 4 byte boundaries, followed by the /// one's complement of the expected index of the record (to detect /// concurrent writes of multiple ninja processes to the log). /// dependency records are an array of 4-byte integers -- cgit v0.12