summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Martin <martine@danga.com>2011-04-23 00:03:15 (GMT)
committerEvan Martin <martine@danga.com>2011-04-23 00:06:55 (GMT)
commitf67b7c25008b1777600b8e8a35a847bdcf3bc4bc (patch)
tree5317ba28e98f26ebb141572a7cf2c28e078fe5d9
parente587ab486e9e448c24b78f34ec3935efc1df50ae (diff)
downloadNinja-f67b7c25008b1777600b8e8a35a847bdcf3bc4bc.zip
Ninja-f67b7c25008b1777600b8e8a35a847bdcf3bc4bc.tar.gz
Ninja-f67b7c25008b1777600b8e8a35a847bdcf3bc4bc.tar.bz2
optimize CanonicalizePath
Null build of Chrome: before I added extra calls to CanonicalizePath: 1.25s. with extra calls to CanonicalizePath: 1.35s. with new CanonicalizePath: 1.05s. And now CanonicalizePath isn't hot on profiles anymore.
-rw-r--r--src/util.cc103
-rw-r--r--src/util.h3
2 files changed, 71 insertions, 35 deletions
diff --git a/src/util.cc b/src/util.cc
index e8fcc64..cb88fdb 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -17,6 +17,7 @@
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <vector>
@@ -39,56 +40,90 @@ void Error(const char* msg, ...) {
fprintf(stderr, "\n");
}
-bool CanonicalizePath(std::string* path, std::string* err) {
- // Try to fast-path out the common case.
- if (path->find("/.") == std::string::npos &&
- path->find("./") == std::string::npos &&
- path->find("//") == std::string::npos) {
- return true;
- }
+bool CanonicalizePath(string* path, string* err) {
+ // WARNING: this function is performance-critical; please benchmark
+ // any changes you make to it.
+
+ // We don't want to allocate memory if necessary; a previous version
+ // of this function modified |path| as it went, which meant we
+ // needed to keep a copy of it around for including in a potential
+ // error message.
+ //
+ // Instead, we find all path components within the path, then fix
+ // them up to eliminate "foo/.." pairs and "." components. Finally,
+ // we overwrite path with these new substrings (since the path only
+ // ever gets shorter, we can just use memmove within it).
+
+ const int kMaxPathComponents = 20;
+ const char* starts[kMaxPathComponents]; // Starts of path components.
+ int lens[100]; // Lengths of path components.
- std::string inpath = *path;
- std::vector<const char*> parts;
- for (std::string::size_type start = 0; start < inpath.size(); ++start) {
- std::string::size_type end = inpath.find('/', start);
- if (end == std::string::npos)
- end = inpath.size();
- else
- inpath[end] = 0;
- if (end > start)
- parts.push_back(inpath.data() + start);
+ int parts_count = 0; // Number of entries in starts/lens.
+ int slash_count = 0; // Number of components in the original path.
+ for (string::size_type start = 0; start < path->size(); ++start) {
+ string::size_type end = path->find('/', start);
+ if (end == string::npos)
+ end = path->size();
+ if (end > start) {
+ if (parts_count == kMaxPathComponents) {
+ *err = "can't canonicalize path '" + *path + "'; too many "
+ "path components";
+ return false;
+ }
+ starts[parts_count] = path->data() + start;
+ lens[parts_count] = end - start;
+ ++parts_count;
+ }
+ ++slash_count;
start = end;
}
- std::vector<const char*>::iterator i = parts.begin();
- while (i != parts.end()) {
- const char* part = *i;
- if (part[0] == '.') {
- if (part[1] == 0) {
- // "."; strip.
- parts.erase(i);
- continue;
- } else if (part[1] == '.' && part[2] == 0) {
- // ".."; go up one.
- if (i == parts.begin()) {
+ int i = 0;
+ while (i < parts_count) {
+ const char* start = starts[i];
+ int len = lens[i];
+ if (start[0] == '.') {
+ int strip_components = 0;
+ if (len == 1) {
+ // "."; strip this component.
+ strip_components = 1;
+ } else if (len == 2 && start[1] == '.') {
+ // ".."; strip this and the previous component.
+ if (i == 0) {
*err = "can't canonicalize path '" + *path + "' that reaches "
"above its directory";
return false;
}
+ strip_components = 2;
--i;
- parts.erase(i, i + 2);
+ }
+
+ if (strip_components) {
+ // Shift arrays backwards to remove bad path components.
+ int entries_to_move = parts_count - i - strip_components;
+ memmove(starts + i, starts + i + strip_components,
+ sizeof(starts[0]) * entries_to_move);
+ memmove(lens + i, lens + i + strip_components,
+ sizeof(lens[0]) * entries_to_move);
+ parts_count -= strip_components;
continue;
}
}
++i;
}
- path->clear();
- for (i = parts.begin(); i != parts.end(); ++i) {
- if (!path->empty())
- path->push_back('/');
- path->append(*i);
+ if (parts_count == slash_count)
+ return true; // Nothing to do.
+
+ char* p = (char*)path->data();
+ for (i = 0; i < parts_count; ++i) {
+ if (p > path->data())
+ *p++ = '/';
+ int len = lens[i];
+ memmove(p, starts[i], len);
+ p += len;
}
+ path->resize(p - path->data());
return true;
}
diff --git a/src/util.h b/src/util.h
index dc3e40f..3d4d33c 100644
--- a/src/util.h
+++ b/src/util.h
@@ -17,6 +17,7 @@
#pragma once
#include <string>
+using namespace std;
// Log a fatal message, dump a backtrace, and exit.
void Fatal(const char* msg, ...);
@@ -25,6 +26,6 @@ void Fatal(const char* msg, ...);
void Error(const char* msg, ...);
// Canonicalize a path like "foo/../bar.h" into just "bar.h".
-bool CanonicalizePath(std::string* path, std::string* err);
+bool CanonicalizePath(string* path, string* err);
#endif // NINJA_UTIL_H_