diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/clparser.cc | 16 | ||||
-rw-r--r-- | src/clparser_perftest.cc | 2 | ||||
-rw-r--r-- | src/includes_normalize-win32.cc | 126 | ||||
-rw-r--r-- | src/includes_normalize.h | 18 | ||||
-rw-r--r-- | src/includes_normalize_test.cc | 43 | ||||
-rw-r--r-- | src/string_piece.h | 18 | ||||
-rw-r--r-- | src/string_piece_util.cc | 78 | ||||
-rw-r--r-- | src/string_piece_util.h | 34 | ||||
-rw-r--r-- | src/string_piece_util_test.cc | 129 | ||||
-rw-r--r-- | src/util.cc | 2 | ||||
-rw-r--r-- | src/util.h | 2 |
11 files changed, 386 insertions, 82 deletions
diff --git a/src/clparser.cc b/src/clparser.cc index c17150b..7994c06 100644 --- a/src/clparser.cc +++ b/src/clparser.cc @@ -18,8 +18,12 @@ #include <assert.h> #include <string.h> +#include "metrics.h" +#include "string_piece_util.h" + #ifdef _WIN32 #include "includes_normalize.h" +#include "string_piece.h" #else #include "util.h" #endif @@ -53,7 +57,7 @@ string CLParser::FilterShowIncludes(const string& line, // static bool CLParser::IsSystemInclude(string path) { - transform(path.begin(), path.end(), path.begin(), ::tolower); + transform(path.begin(), path.end(), path.begin(), ToLowerASCII); // TODO: this is a heuristic, perhaps there's a better way? return (path.find("program files") != string::npos || path.find("microsoft visual studio") != string::npos); @@ -61,7 +65,7 @@ bool CLParser::IsSystemInclude(string path) { // static bool CLParser::FilterInputFilename(string line) { - transform(line.begin(), line.end(), line.begin(), ::tolower); + transform(line.begin(), line.end(), line.begin(), ToLowerASCII); // TODO: other extensions, like .asm? return EndsWith(line, ".c") || EndsWith(line, ".cc") || @@ -72,9 +76,15 @@ bool CLParser::FilterInputFilename(string line) { // static bool CLParser::Parse(const string& output, const string& deps_prefix, string* filtered_output, string* err) { + METRIC_RECORD("CLParser::Parse"); + // Loop over all lines in the output to process them. assert(&output != filtered_output); size_t start = 0; +#ifdef _WIN32 + IncludesNormalize normalizer("."); +#endif + while (start < output.size()) { size_t end = output.find_first_of("\r\n", start); if (end == string::npos) @@ -85,7 +95,7 @@ bool CLParser::Parse(const string& output, const string& deps_prefix, if (!include.empty()) { string normalized; #ifdef _WIN32 - if (!IncludesNormalize::Normalize(include, NULL, &normalized, err)) + if (!normalizer.Normalize(include, &normalized, err)) return false; #else // TODO: should this make the path relative to cwd? diff --git a/src/clparser_perftest.cc b/src/clparser_perftest.cc index 101a4e2..7ac5230 100644 --- a/src/clparser_perftest.cc +++ b/src/clparser_perftest.cc @@ -145,7 +145,7 @@ int main(int argc, char* argv[]) { } int64_t end = GetTimeMillis(); - if (end - start > 100) { + if (end - start > 2000) { int delta_ms = (int)(end - start); printf("Parse %d times in %dms avg %.1fus\n", limit, delta_ms, float(delta_ms * 1000) / limit); diff --git a/src/includes_normalize-win32.cc b/src/includes_normalize-win32.cc index e8a3e0f..459329b 100644 --- a/src/includes_normalize-win32.cc +++ b/src/includes_normalize-win32.cc @@ -15,6 +15,7 @@ #include "includes_normalize.h" #include "string_piece.h" +#include "string_piece_util.h" #include "util.h" #include <algorithm> @@ -25,8 +26,39 @@ namespace { -/// Return true if paths a and b are on the same Windows drive. +bool IsPathSeparator(char c) { + return c == '/' || c == '\\'; +} + +// Return true if paths a and b are on the same windows drive. +// Return false if this funcation cannot check +// whether or not on the same windows drive. +bool SameDriveFast(StringPiece a, StringPiece b) { + if (a.size() < 3 || b.size() < 3) { + return false; + } + + if (!islatinalpha(a[0]) || !islatinalpha(b[0])) { + return false; + } + + if (ToLowerASCII(a[0]) != ToLowerASCII(b[0])) { + return false; + } + + if (a[1] != ':' || b[1] != ':') { + return false; + } + + return IsPathSeparator(a[2]) && IsPathSeparator(b[2]); +} + +// Return true if paths a and b are on the same Windows drive. bool SameDrive(StringPiece a, StringPiece b) { + if (SameDriveFast(a, b)) { + return true; + } + char a_absolute[_MAX_PATH]; char b_absolute[_MAX_PATH]; GetFullPathName(a.AsString().c_str(), sizeof(a_absolute), a_absolute, NULL); @@ -38,34 +70,57 @@ bool SameDrive(StringPiece a, StringPiece b) { return _stricmp(a_drive, b_drive) == 0; } -} // anonymous namespace +// Check path |s| is FullPath style returned by GetFullPathName. +// This ignores difference of path separator. +// This is used not to call very slow GetFullPathName API. +bool IsFullPathName(StringPiece s) { + if (s.size() < 3 || + !islatinalpha(s[0]) || + s[1] != ':' || + !IsPathSeparator(s[2])) { + return false; + } + + // Check "." or ".." is contained in path. + for (size_t i = 2; i < s.size(); ++i) { + if (!IsPathSeparator(s[i])) { + continue; + } + + // Check ".". + if (i + 1 < s.size() && s[i+1] == '.' && + (i + 2 >= s.size() || IsPathSeparator(s[i+2]))) { + return false; + } -string IncludesNormalize::Join(const vector<string>& list, char sep) { - string ret; - for (size_t i = 0; i < list.size(); ++i) { - ret += list[i]; - if (i != list.size() - 1) - ret += sep; + // Check "..". + if (i + 2 < s.size() && s[i+1] == '.' && s[i+2] == '.' && + (i + 3 >= s.size() || IsPathSeparator(s[i+3]))) { + return false; + } } - return ret; -} -vector<string> IncludesNormalize::Split(const string& input, char sep) { - vector<string> elems; - stringstream ss(input); - string item; - while (getline(ss, item, sep)) - elems.push_back(item); - return elems; + return true; } -string IncludesNormalize::ToLower(const string& s) { - string ret; - transform(s.begin(), s.end(), back_inserter(ret), ::tolower); - return ret; +} // anonymous namespace + +IncludesNormalize::IncludesNormalize(const string& relative_to) { + relative_to_ = AbsPath(relative_to); + split_relative_to_ = SplitStringPiece(relative_to_, '/'); } string IncludesNormalize::AbsPath(StringPiece s) { + if (IsFullPathName(s)) { + string result = s.AsString(); + for (size_t i = 0; i < result.size(); ++i) { + if (result[i] == '\\') { + result[i] = '/'; + } + } + return result; + } + char result[_MAX_PATH]; GetFullPathName(s.AsString().c_str(), sizeof(result), result, NULL); for (char* c = result; *c; ++c) @@ -74,28 +129,31 @@ string IncludesNormalize::AbsPath(StringPiece s) { return result; } -string IncludesNormalize::Relativize(StringPiece path, const string& start) { - vector<string> start_list = Split(AbsPath(start), '/'); - vector<string> path_list = Split(AbsPath(path), '/'); +string IncludesNormalize::Relativize( + StringPiece path, const vector<StringPiece>& start_list) { + string abs_path = AbsPath(path); + vector<StringPiece> path_list = SplitStringPiece(abs_path, '/'); int i; for (i = 0; i < static_cast<int>(min(start_list.size(), path_list.size())); ++i) { - if (ToLower(start_list[i]) != ToLower(path_list[i])) + if (!EqualsCaseInsensitiveASCII(start_list[i], path_list[i])) { break; + } } - vector<string> rel_list; + vector<StringPiece> rel_list; + rel_list.reserve(start_list.size() - i + path_list.size() - i); for (int j = 0; j < static_cast<int>(start_list.size() - i); ++j) rel_list.push_back(".."); for (int j = i; j < static_cast<int>(path_list.size()); ++j) rel_list.push_back(path_list[j]); if (rel_list.size() == 0) return "."; - return Join(rel_list, '/'); + return JoinStringPiece(rel_list, '/'); } -bool IncludesNormalize::Normalize(const string& input, const char* relative_to, - string* result, string* err) { +bool IncludesNormalize::Normalize(const string& input, + string* result, string* err) const { char copy[_MAX_PATH + 1]; size_t len = input.size(); if (len > _MAX_PATH) { @@ -107,16 +165,12 @@ bool IncludesNormalize::Normalize(const string& input, const char* relative_to, if (!CanonicalizePath(copy, &len, &slash_bits, err)) return false; StringPiece partially_fixed(copy, len); + string abs_input = AbsPath(partially_fixed); - string curdir; - if (!relative_to) { - curdir = AbsPath("."); - relative_to = curdir.c_str(); - } - if (!SameDrive(partially_fixed, relative_to)) { + if (!SameDrive(abs_input, relative_to_)) { *result = partially_fixed.AsString(); return true; } - *result = Relativize(partially_fixed, relative_to); + *result = Relativize(abs_input, split_relative_to_); return true; } diff --git a/src/includes_normalize.h b/src/includes_normalize.h index 98e912f..3811e53 100644 --- a/src/includes_normalize.h +++ b/src/includes_normalize.h @@ -21,15 +21,19 @@ struct StringPiece; /// Utility functions for normalizing include paths on Windows. /// TODO: this likely duplicates functionality of CanonicalizePath; refactor. struct IncludesNormalize { + /// Normalize path relative to |relative_to|. + IncludesNormalize(const string& relative_to); + // Internal utilities made available for testing, maybe useful otherwise. - static string Join(const vector<string>& list, char sep); - static vector<string> Split(const string& input, char sep); - static string ToLower(const string& s); static string AbsPath(StringPiece s); - static string Relativize(StringPiece path, const string& start); + static string Relativize(StringPiece path, + const vector<StringPiece>& start_list); /// Normalize by fixing slashes style, fixing redundant .. and . and makes the - /// path relative to |relative_to|. - static bool Normalize(const string& input, const char* relative_to, - string* result, string* err); + /// path |input| relative to |this->relative_to_| and store to |result|. + bool Normalize(const string& input, string* result, string* err) const; + + private: + string relative_to_; + vector<StringPiece> split_relative_to_; }; diff --git a/src/includes_normalize_test.cc b/src/includes_normalize_test.cc index f18795c..0bb14ec 100644 --- a/src/includes_normalize_test.cc +++ b/src/includes_normalize_test.cc @@ -18,6 +18,7 @@ #include <direct.h> +#include "string_piece_util.h" #include "test.h" #include "util.h" @@ -26,13 +27,14 @@ namespace { string GetCurDir() { char buf[_MAX_PATH]; _getcwd(buf, sizeof(buf)); - vector<string> parts = IncludesNormalize::Split(string(buf), '\\'); - return parts[parts.size() - 1]; + vector<StringPiece> parts = SplitStringPiece(buf, '\\'); + return parts[parts.size() - 1].AsString(); } string NormalizeAndCheckNoError(const string& input) { string result, err; - EXPECT_TRUE(IncludesNormalize::Normalize(input.c_str(), NULL, &result, &err)); + IncludesNormalize normalizer("."); + EXPECT_TRUE(normalizer.Normalize(input, &result, &err)); EXPECT_EQ("", err); return result; } @@ -40,8 +42,8 @@ string NormalizeAndCheckNoError(const string& input) { string NormalizeRelativeAndCheckNoError(const string& input, const string& relative_to) { string result, err; - EXPECT_TRUE(IncludesNormalize::Normalize(input.c_str(), relative_to.c_str(), - &result, &err)); + IncludesNormalize normalizer(relative_to); + EXPECT_TRUE(normalizer.Normalize(input, &result, &err)); EXPECT_EQ("", err); return result; } @@ -76,34 +78,6 @@ TEST(IncludesNormalize, Case) { EXPECT_EQ("A/B", NormalizeAndCheckNoError("A\\./B")); } -TEST(IncludesNormalize, Join) { - vector<string> x; - EXPECT_EQ("", IncludesNormalize::Join(x, ':')); - x.push_back("alpha"); - EXPECT_EQ("alpha", IncludesNormalize::Join(x, ':')); - x.push_back("beta"); - x.push_back("gamma"); - EXPECT_EQ("alpha:beta:gamma", IncludesNormalize::Join(x, ':')); -} - -TEST(IncludesNormalize, Split) { - EXPECT_EQ("", IncludesNormalize::Join(IncludesNormalize::Split("", '/'), - ':')); - EXPECT_EQ("a", IncludesNormalize::Join(IncludesNormalize::Split("a", '/'), - ':')); - EXPECT_EQ("a:b:c", - IncludesNormalize::Join( - IncludesNormalize::Split("a/b/c", '/'), ':')); -} - -TEST(IncludesNormalize, ToLower) { - EXPECT_EQ("", IncludesNormalize::ToLower("")); - EXPECT_EQ("stuff", IncludesNormalize::ToLower("Stuff")); - EXPECT_EQ("stuff and things", IncludesNormalize::ToLower("Stuff AND thINGS")); - EXPECT_EQ("stuff 3and thin43gs", - IncludesNormalize::ToLower("Stuff 3AND thIN43GS")); -} - TEST(IncludesNormalize, DifferentDrive) { EXPECT_EQ("stuff.h", NormalizeRelativeAndCheckNoError("p:\\vs08\\stuff.h", "p:\\vs08")); @@ -129,8 +103,9 @@ TEST(IncludesNormalize, LongInvalidPath) { "instead of /Zi, but expect a similar error when you link your program."; // Too long, won't be canonicalized. Ensure doesn't crash. string result, err; + IncludesNormalize normalizer("."); EXPECT_FALSE( - IncludesNormalize::Normalize(kLongInputString, NULL, &result, &err)); + normalizer.Normalize(kLongInputString, &result, &err)); EXPECT_EQ("path too long", err); const char kExactlyMaxPath[] = diff --git a/src/string_piece.h b/src/string_piece.h index b1bf105..031bda4 100644 --- a/src/string_piece.h +++ b/src/string_piece.h @@ -25,6 +25,8 @@ using namespace std; /// externally. It is useful for reducing the number of std::strings /// we need to allocate. struct StringPiece { + typedef const char* const_iterator; + StringPiece() : str_(NULL), len_(0) {} /// The constructors intentionally allow for implicit conversions. @@ -46,6 +48,22 @@ struct StringPiece { return len_ ? string(str_, len_) : string(); } + const_iterator begin() const { + return str_; + } + + const_iterator end() const { + return str_ + len_; + } + + char operator[](size_t pos) const { + return str_[pos]; + } + + size_t size() const { + return len_; + } + const char* str_; size_t len_; }; diff --git a/src/string_piece_util.cc b/src/string_piece_util.cc new file mode 100644 index 0000000..8e1ecfd --- /dev/null +++ b/src/string_piece_util.cc @@ -0,0 +1,78 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "string_piece_util.h" + +#include <algorithm> +#include <string> +#include <vector> +using namespace std; + +vector<StringPiece> SplitStringPiece(StringPiece input, char sep) { + vector<StringPiece> elems; + elems.reserve(count(input.begin(), input.end(), sep) + 1); + + StringPiece::const_iterator pos = input.begin(); + + for (;;) { + const char* next_pos = find(pos, input.end(), sep); + if (next_pos == input.end()) { + elems.push_back(StringPiece(pos, input.end() - pos)); + break; + } + elems.push_back(StringPiece(pos, next_pos - pos)); + pos = next_pos + 1; + } + + return elems; +} + +string JoinStringPiece(const vector<StringPiece>& list, char sep) { + if (list.size() == 0){ + return ""; + } + + string ret; + + { + size_t cap = list.size() - 1; + for (size_t i = 0; i < list.size(); ++i) { + cap += list[i].len_; + } + ret.reserve(cap); + } + + for (size_t i = 0; i < list.size(); ++i) { + if (i != 0) { + ret += sep; + } + ret.append(list[i].str_, list[i].len_); + } + + return ret; +} + +bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) { + if (a.len_ != b.len_) { + return false; + } + + for (size_t i = 0; i < a.len_; ++i) { + if (ToLowerASCII(a.str_[i]) != ToLowerASCII(b.str_[i])) { + return false; + } + } + + return true; +} diff --git a/src/string_piece_util.h b/src/string_piece_util.h new file mode 100644 index 0000000..2e40b9f --- /dev/null +++ b/src/string_piece_util.h @@ -0,0 +1,34 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NINJA_STRINGPIECE_UTIL_H_ +#define NINJA_STRINGPIECE_UTIL_H_ + +#include <string> +#include <vector> + +#include "string_piece.h" +using namespace std; + +vector<StringPiece> SplitStringPiece(StringPiece input, char sep); + +string JoinStringPiece(const vector<StringPiece>& list, char sep); + +inline char ToLowerASCII(char c) { + return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; +} + +bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b); + +#endif // NINJA_STRINGPIECE_UTIL_H_ diff --git a/src/string_piece_util_test.cc b/src/string_piece_util_test.cc new file mode 100644 index 0000000..648c647 --- /dev/null +++ b/src/string_piece_util_test.cc @@ -0,0 +1,129 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "string_piece_util.h" + +#include "test.h" + +TEST(StringPieceUtilTest, SplitStringPiece) { + { + string input("a:b:c"); + vector<StringPiece> list = SplitStringPiece(input, ':'); + + EXPECT_EQ(list.size(), 3); + + EXPECT_EQ(list[0], "a"); + EXPECT_EQ(list[1], "b"); + EXPECT_EQ(list[2], "c"); + } + + { + string empty(""); + vector<StringPiece> list = SplitStringPiece(empty, ':'); + + EXPECT_EQ(list.size(), 1); + + EXPECT_EQ(list[0], ""); + } + + { + string one("a"); + vector<StringPiece> list = SplitStringPiece(one, ':'); + + EXPECT_EQ(list.size(), 1); + + EXPECT_EQ(list[0], "a"); + } + + { + string sep_only(":"); + vector<StringPiece> list = SplitStringPiece(sep_only, ':'); + + EXPECT_EQ(list.size(), 2); + + EXPECT_EQ(list[0], ""); + EXPECT_EQ(list[1], ""); + } + + { + string sep(":a:b:c:"); + vector<StringPiece> list = SplitStringPiece(sep, ':'); + + EXPECT_EQ(list.size(), 5); + + EXPECT_EQ(list[0], ""); + EXPECT_EQ(list[1], "a"); + EXPECT_EQ(list[2], "b"); + EXPECT_EQ(list[3], "c"); + EXPECT_EQ(list[4], ""); + } +} + +TEST(StringPieceUtilTest, JoinStringPiece) { + { + string input("a:b:c"); + vector<StringPiece> list = SplitStringPiece(input, ':'); + + EXPECT_EQ("a:b:c", JoinStringPiece(list, ':')); + EXPECT_EQ("a/b/c", JoinStringPiece(list, '/')); + } + + { + string empty(""); + vector<StringPiece> list = SplitStringPiece(empty, ':'); + + EXPECT_EQ("", JoinStringPiece(list, ':')); + } + + { + vector<StringPiece> empty_list; + + EXPECT_EQ("", JoinStringPiece(empty_list, ':')); + } + + { + string one("a"); + vector<StringPiece> single_list = SplitStringPiece(one, ':'); + + EXPECT_EQ("a", JoinStringPiece(single_list, ':')); + } + + { + string sep(":a:b:c:"); + vector<StringPiece> list = SplitStringPiece(sep, ':'); + + EXPECT_EQ(":a:b:c:", JoinStringPiece(list, ':')); + } +} + +TEST(StringPieceUtilTest, ToLowerASCII) { + EXPECT_EQ('a', ToLowerASCII('A')); + EXPECT_EQ('z', ToLowerASCII('Z')); + EXPECT_EQ('a', ToLowerASCII('a')); + EXPECT_EQ('z', ToLowerASCII('z')); + EXPECT_EQ('/', ToLowerASCII('/')); + EXPECT_EQ('1', ToLowerASCII('1')); +} + +TEST(StringPieceUtilTest, EqualsCaseInsensitiveASCII) { + EXPECT_TRUE(EqualsCaseInsensitiveASCII("abc", "abc")); + EXPECT_TRUE(EqualsCaseInsensitiveASCII("abc", "ABC")); + EXPECT_TRUE(EqualsCaseInsensitiveASCII("abc", "aBc")); + EXPECT_TRUE(EqualsCaseInsensitiveASCII("AbC", "aBc")); + EXPECT_TRUE(EqualsCaseInsensitiveASCII("", "")); + + EXPECT_FALSE(EqualsCaseInsensitiveASCII("a", "ac")); + EXPECT_FALSE(EqualsCaseInsensitiveASCII("/", "\\")); + EXPECT_FALSE(EqualsCaseInsensitiveASCII("1", "10")); +} diff --git a/src/util.cc b/src/util.cc index ce4b192..84de879 100644 --- a/src/util.cc +++ b/src/util.cc @@ -471,7 +471,7 @@ void Win32Fatal(const char* function) { } #endif -static bool islatinalpha(int c) { +bool islatinalpha(int c) { // isalpha() is locale-dependent. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } @@ -70,6 +70,8 @@ const char* SpellcheckStringV(const string& text, /// Like SpellcheckStringV, but takes a NULL-terminated list. const char* SpellcheckString(const char* text, ...); +bool islatinalpha(int c); + /// Removes all Ansi escape codes (http://www.termsys.demon.co.uk/vtansi.htm). string StripAnsiEscapeCodes(const string& in); |