From 3b320023276f98b978054c14c65d3888b989ff4a Mon Sep 17 00:00:00 2001
From: tikuta <tikuta@google.com>
Date: Wed, 12 Apr 2017 14:40:40 +0900
Subject: Make clparser faster

This patch improves perfromance of clparser.

 * Reduce the number of calling GetFullPathName.
 * Use StringPiece for Split and Join.
 * Add EqualsCaseInsensitive for StringPiece not to generate new string
   instance.
 * Add some utility member in StringPiece class.
---
 src/clparser.cc                 |  11 +++-
 src/clparser_perftest.cc        |   2 +-
 src/includes_normalize-win32.cc | 127 ++++++++++++++++++++++++++++------------
 src/includes_normalize.h        |  18 +++---
 src/includes_normalize_test.cc  |  43 +++-----------
 src/string_piece.h              |   8 +++
 6 files changed, 130 insertions(+), 79 deletions(-)
diff --git a/src/clparser.cc b/src/clparser.cc
index c17150b..c993e36 100644
--- a/src/clparser.cc
+++ b/src/clparser.cc
@@ -18,8 +18,11 @@
 #include <assert.h>
 #include <string.h>
 
+#include "metrics.h"
+
 #ifdef _WIN32
 #include "includes_normalize.h"
+#include "string_piece.h"
 #else
 #include "util.h"
 #endif
@@ -72,9 +75,15 @@ bool CLParser::FilterInputFilename(string line) {
 // static
 bool CLParser::Parse(const string& output, const string& deps_prefix,
                      string* filtered_output, string* err) {
+  METRIC_RECORD("CLParser::Parse");
+
   // Loop over all lines in the output to process them.
   assert(&output != filtered_output);
   size_t start = 0;
+#ifdef _WIN32
+  IncludesNormalize normalizer(".");
+#endif
+
   while (start < output.size()) {
     size_t end = output.find_first_of("\r\n", start);
     if (end == string::npos)
@@ -85,7 +94,7 @@ bool CLParser::Parse(const string& output, const string& deps_prefix,
     if (!include.empty()) {
       string normalized;
 #ifdef _WIN32
-      if (!IncludesNormalize::Normalize(include, NULL, &normalized, err))
+      if (!normalizer.Normalize(include, &normalized, err))
         return false;
 #else
       // TODO: should this make the path relative to cwd?
diff --git a/src/clparser_perftest.cc b/src/clparser_perftest.cc
index 101a4e2..7ac5230 100644
--- a/src/clparser_perftest.cc
+++ b/src/clparser_perftest.cc
@@ -145,7 +145,7 @@ int main(int argc, char* argv[]) {
     }
     int64_t end = GetTimeMillis();
 
-    if (end - start > 100) {
+    if (end - start > 2000) {
       int delta_ms = (int)(end - start);
       printf("Parse %d times in %dms avg %.1fus\n",
              limit, delta_ms, float(delta_ms * 1000) / limit);
diff --git a/src/includes_normalize-win32.cc b/src/includes_normalize-win32.cc
index e8a3e0f..b60ad70 100644
--- a/src/includes_normalize-win32.cc
+++ b/src/includes_normalize-win32.cc
@@ -15,6 +15,7 @@
 #include "includes_normalize.h"
 
 #include "string_piece.h"
+#include "string_piece_util.h"
 #include "util.h"
 
 #include <algorithm>
@@ -25,8 +26,45 @@
 
 namespace {
 
-/// Return true if paths a and b are on the same Windows drive.
+bool IsPathSeparator(char c) {
+  return c == '/' ||  c == '\\';
+}
+
+// Return true if paths a and b are on the same windows drive.
+// Return false if this funcation cannot check
+// whether or not on the same windows drive.
+bool SameDriveFast(StringPiece a, StringPiece b) {
+  if (a.size() < 3 || b.size() < 3) {
+    return false;
+  }
+
+  if (!isalpha(a[0]) || !isalpha(b[0])) {
+    return false;
+  }
+
+  if (tolower(a[0]) != tolower(b[0])) {
+    return false;
+  }
+
+  if (a[1] != ':' || b[1] != ':') {
+    return false;
+  }
+
+  if (!IsPathSeparator(a[2]) ||
+      !IsPathSeparator(b[2])) {
+    return false;
+  }
+
+  return true;
+}
+
+// Return true if paths a and b are on the same Windows drive.
 bool SameDrive(StringPiece a, StringPiece b)  {
+  // Fast check.
+  if (SameDriveFast(a, b)) {
+    return true;
+  }
+
   char a_absolute[_MAX_PATH];
   char b_absolute[_MAX_PATH];
   GetFullPathName(a.AsString().c_str(), sizeof(a_absolute), a_absolute, NULL);
@@ -38,34 +76,54 @@ bool SameDrive(StringPiece a, StringPiece b)  {
   return _stricmp(a_drive, b_drive) == 0;
 }
 
-}  // anonymous namespace
+bool IsAbsPath(StringPiece s) {
+  if (s.size() < 3 ||
+      !isalpha(s[0]) ||
+      s[1] != ':' ||
+      !IsPathSeparator(s[2])) {
+    return false;
+  }
+
+  // Check "." or ".." is contained in path.
+  for (size_t i = 2; i < s.size(); ++i) {
+    if (!IsPathSeparator(s[i])) {
+      continue;
+    }
 
-string IncludesNormalize::Join(const vector<string>& list, char sep) {
-  string ret;
-  for (size_t i = 0; i < list.size(); ++i) {
-    ret += list[i];
-    if (i != list.size() - 1)
-      ret += sep;
+    // Check ".".
+    if (i + 1 < s.size() && s[i+1] == '.' &&
+        (i + 2 >= s.size() || IsPathSeparator(s[i+2]))) {
+      return false;
+    }
+
+    // Check "..".
+    if (i + 2 < s.size() && s[i+1] == '.' && s[i+2] == '.' &&
+        (i + 3 >= s.size() || IsPathSeparator(s[i+3]))) {
+      return false;
+    }
   }
-  return ret;
-}
 
-vector<string> IncludesNormalize::Split(const string& input, char sep) {
-  vector<string> elems;
-  stringstream ss(input);
-  string item;
-  while (getline(ss, item, sep))
-    elems.push_back(item);
-  return elems;
+  return true;
 }
 
-string IncludesNormalize::ToLower(const string& s) {
-  string ret;
-  transform(s.begin(), s.end(), back_inserter(ret), ::tolower);
-  return ret;
+}  // anonymous namespace
+
+IncludesNormalize::IncludesNormalize(const string& relative_to) {
+  relative_to_ = AbsPath(relative_to);
+  splitted_relative_to_ = SplitStringPiece(relative_to_, '/');
 }
 
 string IncludesNormalize::AbsPath(StringPiece s) {
+  if (IsAbsPath(s)) {
+    string result = s.AsString();
+    for (size_t i = 0; i < result.size(); ++i) {
+      if (result[i] == '\\') {
+        result[i] = '/';
+      }
+    }
+    return result;
+  }
+
   char result[_MAX_PATH];
   GetFullPathName(s.AsString().c_str(), sizeof(result), result, NULL);
   for (char* c = result; *c; ++c)
@@ -74,28 +132,30 @@ string IncludesNormalize::AbsPath(StringPiece s) {
   return result;
 }
 
-string IncludesNormalize::Relativize(StringPiece path, const string& start) {
-  vector<string> start_list = Split(AbsPath(start), '/');
-  vector<string> path_list = Split(AbsPath(path), '/');
+string IncludesNormalize::Relativize(StringPiece path, const vector<StringPiece>& start_list) {
+  string abs_path = AbsPath(path);
+  vector<StringPiece> path_list = SplitStringPiece(abs_path, '/');
   int i;
   for (i = 0; i < static_cast<int>(min(start_list.size(), path_list.size()));
        ++i) {
-    if (ToLower(start_list[i]) != ToLower(path_list[i]))
+    if (!EqualsCaseInsensitiveASCII(start_list[i], path_list[i])) {
       break;
+    }
   }
 
-  vector<string> rel_list;
+  vector<StringPiece> rel_list;
+  rel_list.reserve(start_list.size() - i + path_list.size() - i);
   for (int j = 0; j < static_cast<int>(start_list.size() - i); ++j)
     rel_list.push_back("..");
   for (int j = i; j < static_cast<int>(path_list.size()); ++j)
     rel_list.push_back(path_list[j]);
   if (rel_list.size() == 0)
     return ".";
-  return Join(rel_list, '/');
+  return JoinStringPiece(rel_list, '/');
 }
 
-bool IncludesNormalize::Normalize(const string& input, const char* relative_to,
-                                  string* result, string* err) {
+bool IncludesNormalize::Normalize(const string& input,
+                                  string* result, string* err) const {
   char copy[_MAX_PATH + 1];
   size_t len = input.size();
   if (len > _MAX_PATH) {
@@ -108,15 +168,10 @@ bool IncludesNormalize::Normalize(const string& input, const char* relative_to,
     return false;
   StringPiece partially_fixed(copy, len);
 
-  string curdir;
-  if (!relative_to) {
-    curdir = AbsPath(".");
-    relative_to = curdir.c_str();
-  }
-  if (!SameDrive(partially_fixed, relative_to)) {
+  if (!SameDrive(partially_fixed, relative_to_)) {
     *result = partially_fixed.AsString();
     return true;
   }
-  *result = Relativize(partially_fixed, relative_to);
+  *result = Relativize(partially_fixed, splitted_relative_to_);
   return true;
 }
diff --git a/src/includes_normalize.h b/src/includes_normalize.h
index 98e912f..2ec08ed 100644
--- a/src/includes_normalize.h
+++ b/src/includes_normalize.h
@@ -21,15 +21,19 @@ struct StringPiece;
 /// Utility functions for normalizing include paths on Windows.
 /// TODO: this likely duplicates functionality of CanonicalizePath; refactor.
 struct IncludesNormalize {
+  /// Normalize path relative to |relative_to|.
+  IncludesNormalize(const string& relative_to);
+
   // Internal utilities made available for testing, maybe useful otherwise.
-  static string Join(const vector<string>& list, char sep);
-  static vector<string> Split(const string& input, char sep);
-  static string ToLower(const string& s);
   static string AbsPath(StringPiece s);
-  static string Relativize(StringPiece path, const string& start);
+  static string Relativize(StringPiece path,
+                           const vector<StringPiece>& start_list);
 
   /// Normalize by fixing slashes style, fixing redundant .. and . and makes the
-  /// path relative to |relative_to|.
-  static bool Normalize(const string& input, const char* relative_to,
-                        string* result, string* err);
+  /// path relative to |relative_to_|.
+  bool Normalize(const string& input, string* result, string* err) const;
+
+ private:
+  string relative_to_;
+  vector<StringPiece> splitted_relative_to_;
 };
diff --git a/src/includes_normalize_test.cc b/src/includes_normalize_test.cc
index f18795c..0bb14ec 100644
--- a/src/includes_normalize_test.cc
+++ b/src/includes_normalize_test.cc
@@ -18,6 +18,7 @@
 
 #include <direct.h>
 
+#include "string_piece_util.h"
 #include "test.h"
 #include "util.h"
 
@@ -26,13 +27,14 @@ namespace {
 string GetCurDir() {
   char buf[_MAX_PATH];
   _getcwd(buf, sizeof(buf));
-  vector<string> parts = IncludesNormalize::Split(string(buf), '\\');
-  return parts[parts.size() - 1];
+  vector<StringPiece> parts = SplitStringPiece(buf, '\\');
+  return parts[parts.size() - 1].AsString();
 }
 
 string NormalizeAndCheckNoError(const string& input) {
   string result, err;
-  EXPECT_TRUE(IncludesNormalize::Normalize(input.c_str(), NULL, &result, &err));
+  IncludesNormalize normalizer(".");
+  EXPECT_TRUE(normalizer.Normalize(input, &result, &err));
   EXPECT_EQ("", err);
   return result;
 }
@@ -40,8 +42,8 @@ string NormalizeAndCheckNoError(const string& input) {
 string NormalizeRelativeAndCheckNoError(const string& input,
                                         const string& relative_to) {
   string result, err;
-  EXPECT_TRUE(IncludesNormalize::Normalize(input.c_str(), relative_to.c_str(),
-                                           &result, &err));
+  IncludesNormalize normalizer(relative_to);
+  EXPECT_TRUE(normalizer.Normalize(input, &result, &err));
   EXPECT_EQ("", err);
   return result;
 }
@@ -76,34 +78,6 @@ TEST(IncludesNormalize, Case) {
   EXPECT_EQ("A/B", NormalizeAndCheckNoError("A\\./B"));
 }
 
-TEST(IncludesNormalize, Join) {
-  vector<string> x;
-  EXPECT_EQ("", IncludesNormalize::Join(x, ':'));
-  x.push_back("alpha");
-  EXPECT_EQ("alpha", IncludesNormalize::Join(x, ':'));
-  x.push_back("beta");
-  x.push_back("gamma");
-  EXPECT_EQ("alpha:beta:gamma", IncludesNormalize::Join(x, ':'));
-}
-
-TEST(IncludesNormalize, Split) {
-  EXPECT_EQ("", IncludesNormalize::Join(IncludesNormalize::Split("", '/'),
-                                        ':'));
-  EXPECT_EQ("a", IncludesNormalize::Join(IncludesNormalize::Split("a", '/'),
-                                         ':'));
-  EXPECT_EQ("a:b:c",
-            IncludesNormalize::Join(
-                IncludesNormalize::Split("a/b/c", '/'), ':'));
-}
-
-TEST(IncludesNormalize, ToLower) {
-  EXPECT_EQ("", IncludesNormalize::ToLower(""));
-  EXPECT_EQ("stuff", IncludesNormalize::ToLower("Stuff"));
-  EXPECT_EQ("stuff and things", IncludesNormalize::ToLower("Stuff AND thINGS"));
-  EXPECT_EQ("stuff 3and thin43gs",
-            IncludesNormalize::ToLower("Stuff 3AND thIN43GS"));
-}
-
 TEST(IncludesNormalize, DifferentDrive) {
   EXPECT_EQ("stuff.h",
             NormalizeRelativeAndCheckNoError("p:\\vs08\\stuff.h", "p:\\vs08"));
@@ -129,8 +103,9 @@ TEST(IncludesNormalize, LongInvalidPath) {
       "instead of /Zi, but expect a similar error when you link your program.";
   // Too long, won't be canonicalized. Ensure doesn't crash.
   string result, err;
+  IncludesNormalize normalizer(".");
   EXPECT_FALSE(
-      IncludesNormalize::Normalize(kLongInputString, NULL, &result, &err));
+      normalizer.Normalize(kLongInputString, &result, &err));
   EXPECT_EQ("path too long", err);
 
   const char kExactlyMaxPath[] =
diff --git a/src/string_piece.h b/src/string_piece.h
index 353b24e..031bda4 100644
--- a/src/string_piece.h
+++ b/src/string_piece.h
@@ -56,6 +56,14 @@ struct StringPiece {
     return str_ + len_;
   }
 
+  char operator[](size_t pos) const {
+    return str_[pos];
+  }
+
+  size_t size() const {
+    return len_;
+  }
+
   const char* str_;
   size_t len_;
 };
-- 
cgit v0.12