From 3b320023276f98b978054c14c65d3888b989ff4a Mon Sep 17 00:00:00 2001 From: tikuta Date: Wed, 12 Apr 2017 14:40:40 +0900 Subject: Make clparser faster This patch improves perfromance of clparser. * Reduce the number of calling GetFullPathName. * Use StringPiece for Split and Join. * Add EqualsCaseInsensitive for StringPiece not to generate new string instance. * Add some utility member in StringPiece class. --- src/clparser.cc | 11 +++- src/clparser_perftest.cc | 2 +- src/includes_normalize-win32.cc | 127 ++++++++++++++++++++++++++++------------ src/includes_normalize.h | 18 +++--- src/includes_normalize_test.cc | 43 +++----------- src/string_piece.h | 8 +++ 6 files changed, 130 insertions(+), 79 deletions(-) diff --git a/src/clparser.cc b/src/clparser.cc index c17150b..c993e36 100644 --- a/src/clparser.cc +++ b/src/clparser.cc @@ -18,8 +18,11 @@ #include #include +#include "metrics.h" + #ifdef _WIN32 #include "includes_normalize.h" +#include "string_piece.h" #else #include "util.h" #endif @@ -72,9 +75,15 @@ bool CLParser::FilterInputFilename(string line) { // static bool CLParser::Parse(const string& output, const string& deps_prefix, string* filtered_output, string* err) { + METRIC_RECORD("CLParser::Parse"); + // Loop over all lines in the output to process them. assert(&output != filtered_output); size_t start = 0; +#ifdef _WIN32 + IncludesNormalize normalizer("."); +#endif + while (start < output.size()) { size_t end = output.find_first_of("\r\n", start); if (end == string::npos) @@ -85,7 +94,7 @@ bool CLParser::Parse(const string& output, const string& deps_prefix, if (!include.empty()) { string normalized; #ifdef _WIN32 - if (!IncludesNormalize::Normalize(include, NULL, &normalized, err)) + if (!normalizer.Normalize(include, &normalized, err)) return false; #else // TODO: should this make the path relative to cwd? diff --git a/src/clparser_perftest.cc b/src/clparser_perftest.cc index 101a4e2..7ac5230 100644 --- a/src/clparser_perftest.cc +++ b/src/clparser_perftest.cc @@ -145,7 +145,7 @@ int main(int argc, char* argv[]) { } int64_t end = GetTimeMillis(); - if (end - start > 100) { + if (end - start > 2000) { int delta_ms = (int)(end - start); printf("Parse %d times in %dms avg %.1fus\n", limit, delta_ms, float(delta_ms * 1000) / limit); diff --git a/src/includes_normalize-win32.cc b/src/includes_normalize-win32.cc index e8a3e0f..b60ad70 100644 --- a/src/includes_normalize-win32.cc +++ b/src/includes_normalize-win32.cc @@ -15,6 +15,7 @@ #include "includes_normalize.h" #include "string_piece.h" +#include "string_piece_util.h" #include "util.h" #include @@ -25,8 +26,45 @@ namespace { -/// Return true if paths a and b are on the same Windows drive. +bool IsPathSeparator(char c) { + return c == '/' || c == '\\'; +} + +// Return true if paths a and b are on the same windows drive. +// Return false if this funcation cannot check +// whether or not on the same windows drive. +bool SameDriveFast(StringPiece a, StringPiece b) { + if (a.size() < 3 || b.size() < 3) { + return false; + } + + if (!isalpha(a[0]) || !isalpha(b[0])) { + return false; + } + + if (tolower(a[0]) != tolower(b[0])) { + return false; + } + + if (a[1] != ':' || b[1] != ':') { + return false; + } + + if (!IsPathSeparator(a[2]) || + !IsPathSeparator(b[2])) { + return false; + } + + return true; +} + +// Return true if paths a and b are on the same Windows drive. bool SameDrive(StringPiece a, StringPiece b) { + // Fast check. + if (SameDriveFast(a, b)) { + return true; + } + char a_absolute[_MAX_PATH]; char b_absolute[_MAX_PATH]; GetFullPathName(a.AsString().c_str(), sizeof(a_absolute), a_absolute, NULL); @@ -38,34 +76,54 @@ bool SameDrive(StringPiece a, StringPiece b) { return _stricmp(a_drive, b_drive) == 0; } -} // anonymous namespace +bool IsAbsPath(StringPiece s) { + if (s.size() < 3 || + !isalpha(s[0]) || + s[1] != ':' || + !IsPathSeparator(s[2])) { + return false; + } + + // Check "." or ".." is contained in path. + for (size_t i = 2; i < s.size(); ++i) { + if (!IsPathSeparator(s[i])) { + continue; + } -string IncludesNormalize::Join(const vector& list, char sep) { - string ret; - for (size_t i = 0; i < list.size(); ++i) { - ret += list[i]; - if (i != list.size() - 1) - ret += sep; + // Check ".". + if (i + 1 < s.size() && s[i+1] == '.' && + (i + 2 >= s.size() || IsPathSeparator(s[i+2]))) { + return false; + } + + // Check "..". + if (i + 2 < s.size() && s[i+1] == '.' && s[i+2] == '.' && + (i + 3 >= s.size() || IsPathSeparator(s[i+3]))) { + return false; + } } - return ret; -} -vector IncludesNormalize::Split(const string& input, char sep) { - vector elems; - stringstream ss(input); - string item; - while (getline(ss, item, sep)) - elems.push_back(item); - return elems; + return true; } -string IncludesNormalize::ToLower(const string& s) { - string ret; - transform(s.begin(), s.end(), back_inserter(ret), ::tolower); - return ret; +} // anonymous namespace + +IncludesNormalize::IncludesNormalize(const string& relative_to) { + relative_to_ = AbsPath(relative_to); + splitted_relative_to_ = SplitStringPiece(relative_to_, '/'); } string IncludesNormalize::AbsPath(StringPiece s) { + if (IsAbsPath(s)) { + string result = s.AsString(); + for (size_t i = 0; i < result.size(); ++i) { + if (result[i] == '\\') { + result[i] = '/'; + } + } + return result; + } + char result[_MAX_PATH]; GetFullPathName(s.AsString().c_str(), sizeof(result), result, NULL); for (char* c = result; *c; ++c) @@ -74,28 +132,30 @@ string IncludesNormalize::AbsPath(StringPiece s) { return result; } -string IncludesNormalize::Relativize(StringPiece path, const string& start) { - vector start_list = Split(AbsPath(start), '/'); - vector path_list = Split(AbsPath(path), '/'); +string IncludesNormalize::Relativize(StringPiece path, const vector& start_list) { + string abs_path = AbsPath(path); + vector path_list = SplitStringPiece(abs_path, '/'); int i; for (i = 0; i < static_cast(min(start_list.size(), path_list.size())); ++i) { - if (ToLower(start_list[i]) != ToLower(path_list[i])) + if (!EqualsCaseInsensitiveASCII(start_list[i], path_list[i])) { break; + } } - vector rel_list; + vector rel_list; + rel_list.reserve(start_list.size() - i + path_list.size() - i); for (int j = 0; j < static_cast(start_list.size() - i); ++j) rel_list.push_back(".."); for (int j = i; j < static_cast(path_list.size()); ++j) rel_list.push_back(path_list[j]); if (rel_list.size() == 0) return "."; - return Join(rel_list, '/'); + return JoinStringPiece(rel_list, '/'); } -bool IncludesNormalize::Normalize(const string& input, const char* relative_to, - string* result, string* err) { +bool IncludesNormalize::Normalize(const string& input, + string* result, string* err) const { char copy[_MAX_PATH + 1]; size_t len = input.size(); if (len > _MAX_PATH) { @@ -108,15 +168,10 @@ bool IncludesNormalize::Normalize(const string& input, const char* relative_to, return false; StringPiece partially_fixed(copy, len); - string curdir; - if (!relative_to) { - curdir = AbsPath("."); - relative_to = curdir.c_str(); - } - if (!SameDrive(partially_fixed, relative_to)) { + if (!SameDrive(partially_fixed, relative_to_)) { *result = partially_fixed.AsString(); return true; } - *result = Relativize(partially_fixed, relative_to); + *result = Relativize(partially_fixed, splitted_relative_to_); return true; } diff --git a/src/includes_normalize.h b/src/includes_normalize.h index 98e912f..2ec08ed 100644 --- a/src/includes_normalize.h +++ b/src/includes_normalize.h @@ -21,15 +21,19 @@ struct StringPiece; /// Utility functions for normalizing include paths on Windows. /// TODO: this likely duplicates functionality of CanonicalizePath; refactor. struct IncludesNormalize { + /// Normalize path relative to |relative_to|. + IncludesNormalize(const string& relative_to); + // Internal utilities made available for testing, maybe useful otherwise. - static string Join(const vector& list, char sep); - static vector Split(const string& input, char sep); - static string ToLower(const string& s); static string AbsPath(StringPiece s); - static string Relativize(StringPiece path, const string& start); + static string Relativize(StringPiece path, + const vector& start_list); /// Normalize by fixing slashes style, fixing redundant .. and . and makes the - /// path relative to |relative_to|. - static bool Normalize(const string& input, const char* relative_to, - string* result, string* err); + /// path relative to |relative_to_|. + bool Normalize(const string& input, string* result, string* err) const; + + private: + string relative_to_; + vector splitted_relative_to_; }; diff --git a/src/includes_normalize_test.cc b/src/includes_normalize_test.cc index f18795c..0bb14ec 100644 --- a/src/includes_normalize_test.cc +++ b/src/includes_normalize_test.cc @@ -18,6 +18,7 @@ #include +#include "string_piece_util.h" #include "test.h" #include "util.h" @@ -26,13 +27,14 @@ namespace { string GetCurDir() { char buf[_MAX_PATH]; _getcwd(buf, sizeof(buf)); - vector parts = IncludesNormalize::Split(string(buf), '\\'); - return parts[parts.size() - 1]; + vector parts = SplitStringPiece(buf, '\\'); + return parts[parts.size() - 1].AsString(); } string NormalizeAndCheckNoError(const string& input) { string result, err; - EXPECT_TRUE(IncludesNormalize::Normalize(input.c_str(), NULL, &result, &err)); + IncludesNormalize normalizer("."); + EXPECT_TRUE(normalizer.Normalize(input, &result, &err)); EXPECT_EQ("", err); return result; } @@ -40,8 +42,8 @@ string NormalizeAndCheckNoError(const string& input) { string NormalizeRelativeAndCheckNoError(const string& input, const string& relative_to) { string result, err; - EXPECT_TRUE(IncludesNormalize::Normalize(input.c_str(), relative_to.c_str(), - &result, &err)); + IncludesNormalize normalizer(relative_to); + EXPECT_TRUE(normalizer.Normalize(input, &result, &err)); EXPECT_EQ("", err); return result; } @@ -76,34 +78,6 @@ TEST(IncludesNormalize, Case) { EXPECT_EQ("A/B", NormalizeAndCheckNoError("A\\./B")); } -TEST(IncludesNormalize, Join) { - vector x; - EXPECT_EQ("", IncludesNormalize::Join(x, ':')); - x.push_back("alpha"); - EXPECT_EQ("alpha", IncludesNormalize::Join(x, ':')); - x.push_back("beta"); - x.push_back("gamma"); - EXPECT_EQ("alpha:beta:gamma", IncludesNormalize::Join(x, ':')); -} - -TEST(IncludesNormalize, Split) { - EXPECT_EQ("", IncludesNormalize::Join(IncludesNormalize::Split("", '/'), - ':')); - EXPECT_EQ("a", IncludesNormalize::Join(IncludesNormalize::Split("a", '/'), - ':')); - EXPECT_EQ("a:b:c", - IncludesNormalize::Join( - IncludesNormalize::Split("a/b/c", '/'), ':')); -} - -TEST(IncludesNormalize, ToLower) { - EXPECT_EQ("", IncludesNormalize::ToLower("")); - EXPECT_EQ("stuff", IncludesNormalize::ToLower("Stuff")); - EXPECT_EQ("stuff and things", IncludesNormalize::ToLower("Stuff AND thINGS")); - EXPECT_EQ("stuff 3and thin43gs", - IncludesNormalize::ToLower("Stuff 3AND thIN43GS")); -} - TEST(IncludesNormalize, DifferentDrive) { EXPECT_EQ("stuff.h", NormalizeRelativeAndCheckNoError("p:\\vs08\\stuff.h", "p:\\vs08")); @@ -129,8 +103,9 @@ TEST(IncludesNormalize, LongInvalidPath) { "instead of /Zi, but expect a similar error when you link your program."; // Too long, won't be canonicalized. Ensure doesn't crash. string result, err; + IncludesNormalize normalizer("."); EXPECT_FALSE( - IncludesNormalize::Normalize(kLongInputString, NULL, &result, &err)); + normalizer.Normalize(kLongInputString, &result, &err)); EXPECT_EQ("path too long", err); const char kExactlyMaxPath[] = diff --git a/src/string_piece.h b/src/string_piece.h index 353b24e..031bda4 100644 --- a/src/string_piece.h +++ b/src/string_piece.h @@ -56,6 +56,14 @@ struct StringPiece { return str_ + len_; } + char operator[](size_t pos) const { + return str_[pos]; + } + + size_t size() const { + return len_; + } + const char* str_; size_t len_; }; -- cgit v0.12