From 08a3220bc2fe12e7f05967b317d221e0bc620be9 Mon Sep 17 00:00:00 2001 From: Takuto Ikuta Date: Wed, 26 Apr 2017 16:10:27 +0900 Subject: Add string_piece_util Following functions are implemented for further performance optimization. * JoinStringPiece * SplitStringPiece * EqualsCaseInsensitiveASCII * ToLowerASCII To improve performance of CLParser, I will introduce above functions into include_normalize-win32.cc. --- configure.py | 2 + src/string_piece.h | 10 ++++ src/string_piece_util.cc | 78 +++++++++++++++++++++++++ src/string_piece_util.h | 34 +++++++++++ src/string_piece_util_test.cc | 129 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 253 insertions(+) create mode 100644 src/string_piece_util.cc create mode 100644 src/string_piece_util.h create mode 100644 src/string_piece_util_test.cc diff --git a/configure.py b/configure.py index edf85e2..643106c 100755 --- a/configure.py +++ b/configure.py @@ -489,6 +489,7 @@ for name in ['build', 'manifest_parser', 'metrics', 'state', + 'string_piece_util', 'util', 'version']: objs += cxx(name) @@ -551,6 +552,7 @@ for name in ['build_log_test', 'manifest_parser_test', 'ninja_test', 'state_test', + 'string_piece_util_test', 'subprocess_test', 'test', 'util_test']: diff --git a/src/string_piece.h b/src/string_piece.h index b1bf105..353b24e 100644 --- a/src/string_piece.h +++ b/src/string_piece.h @@ -25,6 +25,8 @@ using namespace std; /// externally. It is useful for reducing the number of std::strings /// we need to allocate. struct StringPiece { + typedef const char* const_iterator; + StringPiece() : str_(NULL), len_(0) {} /// The constructors intentionally allow for implicit conversions. @@ -46,6 +48,14 @@ struct StringPiece { return len_ ? string(str_, len_) : string(); } + const_iterator begin() const { + return str_; + } + + const_iterator end() const { + return str_ + len_; + } + const char* str_; size_t len_; }; diff --git a/src/string_piece_util.cc b/src/string_piece_util.cc new file mode 100644 index 0000000..8e1ecfd --- /dev/null +++ b/src/string_piece_util.cc @@ -0,0 +1,78 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "string_piece_util.h" + +#include +#include +#include +using namespace std; + +vector SplitStringPiece(StringPiece input, char sep) { + vector elems; + elems.reserve(count(input.begin(), input.end(), sep) + 1); + + StringPiece::const_iterator pos = input.begin(); + + for (;;) { + const char* next_pos = find(pos, input.end(), sep); + if (next_pos == input.end()) { + elems.push_back(StringPiece(pos, input.end() - pos)); + break; + } + elems.push_back(StringPiece(pos, next_pos - pos)); + pos = next_pos + 1; + } + + return elems; +} + +string JoinStringPiece(const vector& list, char sep) { + if (list.size() == 0){ + return ""; + } + + string ret; + + { + size_t cap = list.size() - 1; + for (size_t i = 0; i < list.size(); ++i) { + cap += list[i].len_; + } + ret.reserve(cap); + } + + for (size_t i = 0; i < list.size(); ++i) { + if (i != 0) { + ret += sep; + } + ret.append(list[i].str_, list[i].len_); + } + + return ret; +} + +bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) { + if (a.len_ != b.len_) { + return false; + } + + for (size_t i = 0; i < a.len_; ++i) { + if (ToLowerASCII(a.str_[i]) != ToLowerASCII(b.str_[i])) { + return false; + } + } + + return true; +} diff --git a/src/string_piece_util.h b/src/string_piece_util.h new file mode 100644 index 0000000..2e40b9f --- /dev/null +++ b/src/string_piece_util.h @@ -0,0 +1,34 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NINJA_STRINGPIECE_UTIL_H_ +#define NINJA_STRINGPIECE_UTIL_H_ + +#include +#include + +#include "string_piece.h" +using namespace std; + +vector SplitStringPiece(StringPiece input, char sep); + +string JoinStringPiece(const vector& list, char sep); + +inline char ToLowerASCII(char c) { + return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; +} + +bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b); + +#endif // NINJA_STRINGPIECE_UTIL_H_ diff --git a/src/string_piece_util_test.cc b/src/string_piece_util_test.cc new file mode 100644 index 0000000..648c647 --- /dev/null +++ b/src/string_piece_util_test.cc @@ -0,0 +1,129 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "string_piece_util.h" + +#include "test.h" + +TEST(StringPieceUtilTest, SplitStringPiece) { + { + string input("a:b:c"); + vector list = SplitStringPiece(input, ':'); + + EXPECT_EQ(list.size(), 3); + + EXPECT_EQ(list[0], "a"); + EXPECT_EQ(list[1], "b"); + EXPECT_EQ(list[2], "c"); + } + + { + string empty(""); + vector list = SplitStringPiece(empty, ':'); + + EXPECT_EQ(list.size(), 1); + + EXPECT_EQ(list[0], ""); + } + + { + string one("a"); + vector list = SplitStringPiece(one, ':'); + + EXPECT_EQ(list.size(), 1); + + EXPECT_EQ(list[0], "a"); + } + + { + string sep_only(":"); + vector list = SplitStringPiece(sep_only, ':'); + + EXPECT_EQ(list.size(), 2); + + EXPECT_EQ(list[0], ""); + EXPECT_EQ(list[1], ""); + } + + { + string sep(":a:b:c:"); + vector list = SplitStringPiece(sep, ':'); + + EXPECT_EQ(list.size(), 5); + + EXPECT_EQ(list[0], ""); + EXPECT_EQ(list[1], "a"); + EXPECT_EQ(list[2], "b"); + EXPECT_EQ(list[3], "c"); + EXPECT_EQ(list[4], ""); + } +} + +TEST(StringPieceUtilTest, JoinStringPiece) { + { + string input("a:b:c"); + vector list = SplitStringPiece(input, ':'); + + EXPECT_EQ("a:b:c", JoinStringPiece(list, ':')); + EXPECT_EQ("a/b/c", JoinStringPiece(list, '/')); + } + + { + string empty(""); + vector list = SplitStringPiece(empty, ':'); + + EXPECT_EQ("", JoinStringPiece(list, ':')); + } + + { + vector empty_list; + + EXPECT_EQ("", JoinStringPiece(empty_list, ':')); + } + + { + string one("a"); + vector single_list = SplitStringPiece(one, ':'); + + EXPECT_EQ("a", JoinStringPiece(single_list, ':')); + } + + { + string sep(":a:b:c:"); + vector list = SplitStringPiece(sep, ':'); + + EXPECT_EQ(":a:b:c:", JoinStringPiece(list, ':')); + } +} + +TEST(StringPieceUtilTest, ToLowerASCII) { + EXPECT_EQ('a', ToLowerASCII('A')); + EXPECT_EQ('z', ToLowerASCII('Z')); + EXPECT_EQ('a', ToLowerASCII('a')); + EXPECT_EQ('z', ToLowerASCII('z')); + EXPECT_EQ('/', ToLowerASCII('/')); + EXPECT_EQ('1', ToLowerASCII('1')); +} + +TEST(StringPieceUtilTest, EqualsCaseInsensitiveASCII) { + EXPECT_TRUE(EqualsCaseInsensitiveASCII("abc", "abc")); + EXPECT_TRUE(EqualsCaseInsensitiveASCII("abc", "ABC")); + EXPECT_TRUE(EqualsCaseInsensitiveASCII("abc", "aBc")); + EXPECT_TRUE(EqualsCaseInsensitiveASCII("AbC", "aBc")); + EXPECT_TRUE(EqualsCaseInsensitiveASCII("", "")); + + EXPECT_FALSE(EqualsCaseInsensitiveASCII("a", "ac")); + EXPECT_FALSE(EqualsCaseInsensitiveASCII("/", "\\")); + EXPECT_FALSE(EqualsCaseInsensitiveASCII("1", "10")); +} -- cgit v0.12