From 210ca80b06c57994851fcfdebf7f3d767c41427b Mon Sep 17 00:00:00 2001 From: Evan Martin Date: Wed, 24 Aug 2011 15:49:41 -0700 Subject: semantic change: allow reaching into parent directories in paths This allows generating build files in a subdirectory of your source tree. - Change CanonicalizePath to accept this. - CanonicalizePath no longer has an error condition, so change it to a void function. I profiled the result against Chrome and it might be ~100ms slower, but that might just be Chrome's size working against me. In any case I think there are lower-hanging performance fruit elsewhere. --- src/graph.cc | 3 +- src/ninja.cc | 3 +- src/parsers.cc | 3 +- src/util.cc | 114 ++++++++++++++++++++----------------------------------- src/util.h | 2 +- src/util_test.cc | 55 +++++++++++++++------------ 6 files changed, 76 insertions(+), 104 deletions(-) diff --git a/src/graph.cc b/src/graph.cc index 13927fe..0f687ea 100644 --- a/src/graph.cc +++ b/src/graph.cc @@ -161,8 +161,7 @@ bool Edge::LoadDepFile(State* state, DiskInterface* disk_interface, // Add all its in-edges. for (vector::iterator i = makefile.ins_.begin(); i != makefile.ins_.end(); ++i) { - if (!CanonicalizePath(&*i, err)) - return false; + CanonicalizePath(&*i); Node* node = state->GetNode(*i); inputs_.insert(inputs_.end() - order_only_deps_, node); diff --git a/src/ninja.cc b/src/ninja.cc index b75ddbf..d4e697f 100644 --- a/src/ninja.cc +++ b/src/ninja.cc @@ -119,8 +119,7 @@ bool CollectTargetsFromArgs(State* state, int argc, char* argv[], } else { for (int i = 0; i < argc; ++i) { string path = argv[i]; - if (!CanonicalizePath(&path, err)) - return false; + CanonicalizePath(&path); Node* node = state->LookupNode(path); if (node) { targets->push_back(node); diff --git a/src/parsers.cc b/src/parsers.cc index 7c08179..2235ba2 100644 --- a/src/parsers.cc +++ b/src/parsers.cc @@ -516,8 +516,7 @@ bool ManifestParser::ParseEdge(string* err) { if (!eval.Parse(*i, &eval_err)) return tokenizer_.Error(eval_err, err); string path = eval.Evaluate(env); - if (!CanonicalizePath(&path, err)) - return false; + CanonicalizePath(&path); *i = path; } } diff --git a/src/util.cc b/src/util.cc index d5a8f1f..0ad470e 100644 --- a/src/util.cc +++ b/src/util.cc @@ -64,92 +64,62 @@ void Error(const char* msg, ...) { fprintf(stderr, "\n"); } -bool CanonicalizePath(string* path, string* err) { +void CanonicalizePath(string* path) { // WARNING: this function is performance-critical; please benchmark // any changes you make to it. - // We don't want to allocate memory if necessary; a previous version - // of this function modified |path| as it went, which meant we - // needed to keep a copy of it around for including in a potential - // error message. - // - // Instead, we find all path components within the path, then fix - // them up to eliminate "foo/.." pairs and "." components. Finally, - // we overwrite path with these new substrings (since the path only - // ever gets shorter, we can just use memmove within it). - const int kMaxPathComponents = 30; - const char* starts[kMaxPathComponents]; // Starts of path components. - int lens[kMaxPathComponents]; // Lengths of path components. - - int parts_count = 0; // Number of entries in starts/lens. - int slash_count = 0; // Number of components in the original path. - for (string::size_type start = 0; start < path->size(); ++start) { - string::size_type end = path->find('/', start); - if (end == string::npos) - end = path->size(); - if (end == 0 || end > start) { - if (parts_count == kMaxPathComponents) { - *err = "can't canonicalize path '" + *path + "'; too many " - "path components"; - return false; - } - starts[parts_count] = path->data() + start; - lens[parts_count] = end - start; - ++parts_count; - } - ++slash_count; - start = end; + char* components[kMaxPathComponents]; + int component_count = 0; + + char* start = &(*path)[0]; + char* dst = start; + const char* src = start; + const char* end = start + path->size(); + + if (*src == '/') { + ++src; + ++dst; } - int i = 0; - while (i < parts_count) { - const char* start = starts[i]; - int len = lens[i]; - if (start[0] == '.') { - int strip_components = 0; - if (len == 1) { - // "."; strip this component. - strip_components = 1; - } else if (len == 2 && start[1] == '.') { - // ".."; strip this and the previous component. - if (i == 0) { - *err = "can't canonicalize path '" + *path + "' that reaches " - "above its directory"; - return false; - } - strip_components = 2; - --i; - } + while (src < end) { + const char* sep = (const char*)memchr(src, '/', end - src); + if (sep == NULL) + sep = end; - if (strip_components) { - // Shift arrays backwards to remove bad path components. - int entries_to_move = parts_count - i - strip_components; - memmove(starts + i, starts + i + strip_components, - sizeof(starts[0]) * entries_to_move); - memmove(lens + i, lens + i + strip_components, - sizeof(lens[0]) * entries_to_move); - parts_count -= strip_components; + if (*src == '.') { + if (sep - src == 1) { + // '.' component; eliminate. + src += 2; + continue; + } else if (sep - src == 2 && src[1] == '.') { + // '..' component. Back up if possible. + if (component_count > 0) { + dst = components[component_count - 1]; + src += 3; + --component_count; + } else { + while (src <= sep) + *dst++ = *src++; + } continue; } } - ++i; - } - if (parts_count == slash_count) - return true; // Nothing to do. + if (sep > src) { + if (component_count == kMaxPathComponents) + Fatal("path has too many components"); + components[component_count] = dst; + ++component_count; + while (src <= sep) { + *dst++ = *src++; + } + } - char* p = (char*)path->data(); - for (i = 0; i < parts_count; ++i) { - if (p > path->data()) - *p++ = '/'; - int len = lens[i]; - memmove(p, starts[i], len); - p += len; + src = sep + 1; } - path->resize(p - path->data()); - return true; + path->resize(dst - path->c_str() - 1); } int MakeDir(const string& path) { diff --git a/src/util.h b/src/util.h index c9e06f4..58a0a3a 100644 --- a/src/util.h +++ b/src/util.h @@ -31,7 +31,7 @@ void Warning(const char* msg, ...); void Error(const char* msg, ...); /// Canonicalize a path like "foo/../bar.h" into just "bar.h". -bool CanonicalizePath(string* path, string* err); +void CanonicalizePath(string* path); /// Create a directory (mode 0777 on Unix). /// Portability abstraction. diff --git a/src/util_test.cc b/src/util_test.cc index 87b614c..97a236f 100644 --- a/src/util_test.cc +++ b/src/util_test.cc @@ -18,46 +18,51 @@ TEST(CanonicalizePath, PathSamples) { std::string path = "foo.h"; - std::string err; - EXPECT_TRUE(CanonicalizePath(&path, &err)); - EXPECT_EQ("", err); + CanonicalizePath(&path); EXPECT_EQ("foo.h", path); - path = "./foo.h"; err = ""; - EXPECT_TRUE(CanonicalizePath(&path, &err)); - EXPECT_EQ("", err); + path = "./foo.h"; + CanonicalizePath(&path); EXPECT_EQ("foo.h", path); - path = "./foo/./bar.h"; err = ""; - EXPECT_TRUE(CanonicalizePath(&path, &err)); - EXPECT_EQ("", err); + path = "./foo/./bar.h"; + CanonicalizePath(&path); EXPECT_EQ("foo/bar.h", path); - path = "./x/foo/../bar.h"; err = ""; - EXPECT_TRUE(CanonicalizePath(&path, &err)); - EXPECT_EQ("", err); + path = "./x/foo/../bar.h"; + CanonicalizePath(&path); EXPECT_EQ("x/bar.h", path); - path = "./x/foo/../../bar.h"; err = ""; - EXPECT_TRUE(CanonicalizePath(&path, &err)); - EXPECT_EQ("", err); + path = "./x/foo/../../bar.h"; + CanonicalizePath(&path); EXPECT_EQ("bar.h", path); - path = "foo//bar"; err = ""; - EXPECT_TRUE(CanonicalizePath(&path, &err)); - EXPECT_EQ("", err); + path = "foo//bar"; + CanonicalizePath(&path); EXPECT_EQ("foo/bar", path); - path = "./x/../foo/../../bar.h"; err = ""; - EXPECT_FALSE(CanonicalizePath(&path, &err)); - EXPECT_EQ("can't canonicalize path './x/../foo/../../bar.h' that reaches " - "above its directory", err); + path = "foo//.//..///bar"; + CanonicalizePath(&path); + EXPECT_EQ("bar", path); + + path = "./x/../foo/../../bar.h"; + CanonicalizePath(&path); + EXPECT_EQ("../bar.h", path); +} + +TEST(CanonicalizePath, UpDir) { + std::string path, err; + path = "../../foo/bar.h"; + CanonicalizePath(&path); + EXPECT_EQ("../../foo/bar.h", path); + + path = "test/../../foo/bar.h"; + CanonicalizePath(&path); + EXPECT_EQ("../foo/bar.h", path); } TEST(CanonicalizePath, AbsolutePath) { string path = "/usr/include/stdio.h"; - string err = ""; - EXPECT_TRUE(CanonicalizePath(&path, &err)); - EXPECT_EQ("", err); + CanonicalizePath(&path); EXPECT_EQ("/usr/include/stdio.h", path); } -- cgit v0.12