From c946ae60194727ede9d3ef44754839f48541a981 Mon Sep 17 00:00:00 2001 From: "zhanyong.wan" Date: Thu, 29 Jan 2009 01:28:52 +0000 Subject: Implements a simple regex matcher (to be used by death tests on Windows). --- include/gtest/gtest-death-test.h | 51 ++++ include/gtest/internal/gtest-port.h | 41 ++- src/gtest-filepath.cc | 1 - src/gtest-internal-inl.h | 16 ++ src/gtest-port.cc | 271 ++++++++++++++++++- test/gtest-port_test.cc | 501 +++++++++++++++++++++++++++++++++++- 6 files changed, 854 insertions(+), 27 deletions(-) diff --git a/include/gtest/gtest-death-test.h b/include/gtest/gtest-death-test.h index f0e109a..1d4cf98 100644 --- a/include/gtest/gtest-death-test.h +++ b/include/gtest/gtest-death-test.h @@ -86,6 +86,57 @@ GTEST_DECLARE_string_(death_test_style); // // ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!"); // +// On the regular expressions used in death tests: +// +// On POSIX-compliant systems (*nix), we use the library, +// which uses the POSIX extended regex syntax. +// +// On other platforms (e.g. Windows), we only support a simple regex +// syntax implemented as part of Google Test. This limited +// implementation should be enough most of the time when writing +// death tests; though it lacks many features you can find in PCRE +// or POSIX extended regex syntax. For example, we don't support +// union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and +// repetition count ("x{5,7}"), among others. +// +// Below is the syntax that we do support. We chose it to be a +// subset of both PCRE and POSIX extended regex, so it's easy to +// learn wherever you come from. In the following: 'A' denotes a +// literal character, period (.), or a single \\ escape sequence; +// 'x' and 'y' denote regular expressions; 'm' and 'n' are for +// natural numbers. +// +// c matches any literal character c +// \\d matches any decimal digit +// \\D matches any character that's not a decimal digit +// \\f matches \f +// \\n matches \n +// \\r matches \r +// \\s matches any ASCII whitespace, including \n +// \\S matches any character that's not a whitespace +// \\t matches \t +// \\v matches \v +// \\w matches any letter, _, or decimal digit +// \\W matches any character that \\w doesn't match +// \\c matches any literal character c, which must be a punctuation +// . matches any single character except \n +// A? matches 0 or 1 occurrences of A +// A* matches 0 or many occurrences of A +// A+ matches 1 or many occurrences of A +// ^ matches the beginning of a string (not that of each line) +// $ matches the end of a string (not that of each line) +// xy matches x followed by y +// +// If you accidentally use PCRE or POSIX extended regex features +// not implemented by us, you will get a run-time failure. In that +// case, please try to rewrite your regular expression within the +// above syntax. +// +// This implementation is *not* meant to be as highly tuned or robust +// as a compiled regex library, but should perform well enough for a +// death test, which already incurs significant overhead by launching +// a child process. +// // Known caveats: // // A "threadsafe" style death test obtains the path to the test diff --git a/include/gtest/internal/gtest-port.h b/include/gtest/internal/gtest-port.h index b3ffc88..96eb0ab 100644 --- a/include/gtest/internal/gtest-port.h +++ b/include/gtest/internal/gtest-port.h @@ -97,6 +97,9 @@ // GTEST_HAS_TYPED_TEST - defined iff typed tests are supported. // GTEST_HAS_TYPED_TEST_P - defined iff type-parameterized tests are // supported. +// GTEST_USES_POSIX_RE - defined iff enhanced POSIX regex is used. +// GTEST_USES_SIMPLE_RE - defined iff our own simple regex is used; +// the above two are mutually exclusive. // // Macros for basic C++ coding: // GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning. @@ -187,6 +190,23 @@ #define GTEST_OS_SOLARIS #endif // _MSC_VER +#if defined(GTEST_OS_LINUX) + +// On some platforms, needs someone to define size_t, and +// won't compile otherwise. We can #include it here as we already +// included , which is guaranteed to define size_t through +// . +#include // NOLINT +#define GTEST_USES_POSIX_RE 1 + +#else + +// We are not on Linux, so may not be available. Use our +// own simple regex implementation instead. +#define GTEST_USES_SIMPLE_RE 1 + +#endif // GTEST_OS_LINUX + // Determines whether ::std::string and ::string are available. #ifndef GTEST_HAS_STD_STRING @@ -352,11 +372,6 @@ // Determines whether to support death tests. #if GTEST_HAS_STD_STRING && GTEST_HAS_CLONE #define GTEST_HAS_DEATH_TEST -// On some platforms, needs someone to define size_t, and -// won't compile otherwise. We can #include it here as we already -// included , which is guaranteed to define size_t through -// . -#include #include #include #include @@ -375,8 +390,8 @@ // Typed tests need and variadic macros, which gcc and VC // 8.0+ support. #if defined(__GNUC__) || (_MSC_VER >= 1400) -#define GTEST_HAS_TYPED_TEST -#define GTEST_HAS_TYPED_TEST_P +#define GTEST_HAS_TYPED_TEST 1 +#define GTEST_HAS_TYPED_TEST_P 1 #endif // defined(__GNUC__) || (_MSC_VER >= 1400) // Determines whether to support Combine(). This only makes sense when @@ -490,8 +505,6 @@ class scoped_ptr { GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr); }; -#ifdef GTEST_HAS_DEATH_TEST - // Defines RE. // A simple C++ wrapper for . It uses the POSIX Enxtended @@ -549,12 +562,16 @@ class RE { // String type here, in order to simplify dependencies between the // files. const char* pattern_; + bool is_valid_; +#if GTEST_USES_POSIX_RE regex_t full_regex_; // For FullMatch(). regex_t partial_regex_; // For PartialMatch(). - bool is_valid_; -}; +#else // GTEST_USES_SIMPLE_RE + const char* full_pattern_; // For FullMatch(); +#endif -#endif // GTEST_HAS_DEATH_TEST + GTEST_DISALLOW_COPY_AND_ASSIGN_(RE); +}; // Defines logging utilities: // GTEST_LOG_() - logs messages at the specified severity level. diff --git a/src/gtest-filepath.cc b/src/gtest-filepath.cc index b21b709..ebf7cf9 100644 --- a/src/gtest-filepath.cc +++ b/src/gtest-filepath.cc @@ -215,7 +215,6 @@ bool FilePath::DirectoryExists() const { // root directory per disk drive.) bool FilePath::IsRootDirectory() const { #ifdef GTEST_OS_WINDOWS - const char* const name = pathname_.c_str(); // TODO(wan@google.com): on Windows a network share like // \\server\share can be a root directory, although it cannot be the // current directory. Handle this properly. diff --git a/src/gtest-internal-inl.h b/src/gtest-internal-inl.h index 28006a2..caa0877 100644 --- a/src/gtest-internal-inl.h +++ b/src/gtest-internal-inl.h @@ -1266,6 +1266,22 @@ inline UnitTestImpl* GetUnitTestImpl() { return UnitTest::GetInstance()->impl(); } +// Internal helper functions for implementing the simple regular +// expression matcher. +bool IsInSet(char ch, const char* str); +bool IsDigit(char ch); +bool IsPunct(char ch); +bool IsRepeat(char ch); +bool IsWhiteSpace(char ch); +bool IsWordChar(char ch); +bool IsValidEscape(char ch); +bool AtomMatchesChar(bool escaped, char pattern, char ch); +bool ValidateRegex(const char* regex); +bool MatchRegexAtHead(const char* regex, const char* str); +bool MatchRepetitionAndRegexAtHead( + bool escaped, char ch, char repeat, const char* regex, const char* str); +bool MatchRegexAnywhere(const char* regex, const char* str); + // Parses the command line for Google Test flags, without initializing // other parts of Google Test. void ParseGoogleTestFlagsOnly(int* argc, char** argv); diff --git a/src/gtest-port.cc b/src/gtest-port.cc index 9878cae..3c9ec4b 100644 --- a/src/gtest-port.cc +++ b/src/gtest-port.cc @@ -39,6 +39,10 @@ #include #endif // GTEST_HAS_DEATH_TEST +#if GTEST_USES_SIMPLE_RE +#include +#endif + #ifdef _WIN32_WCE #include // For TerminateProcess() #endif // _WIN32_WCE @@ -47,11 +51,19 @@ #include #include +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION namespace testing { namespace internal { -#ifdef GTEST_HAS_DEATH_TEST +#if GTEST_USES_POSIX_RE // Implements RE. Currently only needed for death tests. @@ -101,7 +113,262 @@ void RE::Init(const char* regex) { delete[] full_pattern; } -#endif // GTEST_HAS_DEATH_TEST +#elif GTEST_USES_SIMPLE_RE + +// Returns true iff ch appears anywhere in str (excluding the +// terminating '\0' character). +bool IsInSet(char ch, const char* str) { + return ch != '\0' && strchr(str, ch) != NULL; +} + +// Returns true iff ch belongs to the given classification. Unlike +// similar functions in , these aren't affected by the +// current locale. +bool IsDigit(char ch) { return '0' <= ch && ch <= '9'; } +bool IsPunct(char ch) { + return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"); +} +bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); } +bool IsWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); } +bool IsWordChar(char ch) { + return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || + ('0' <= ch && ch <= '9') || ch == '_'; +} + +// Returns true iff "\\c" is a supported escape sequence. +bool IsValidEscape(char c) { + return (IsPunct(c) || IsInSet(c, "dDfnrsStvwW")); +} + +// Returns true iff the given atom (specified by escaped and pattern) +// matches ch. The result is undefined if the atom is invalid. +bool AtomMatchesChar(bool escaped, char pattern_char, char ch) { + if (escaped) { // "\\p" where p is pattern_char. + switch (pattern_char) { + case 'd': return IsDigit(ch); + case 'D': return !IsDigit(ch); + case 'f': return ch == '\f'; + case 'n': return ch == '\n'; + case 'r': return ch == '\r'; + case 's': return IsWhiteSpace(ch); + case 'S': return !IsWhiteSpace(ch); + case 't': return ch == '\t'; + case 'v': return ch == '\v'; + case 'w': return IsWordChar(ch); + case 'W': return !IsWordChar(ch); + } + return IsPunct(pattern_char) && pattern_char == ch; + } + + return (pattern_char == '.' && ch != '\n') || pattern_char == ch; +} + +// Helper function used by ValidateRegex() to format error messages. +String FormatRegexSyntaxError(const char* regex, int index) { + return (Message() << "Syntax error at index " << index + << " in simple regular expression \"" << regex << "\": ").GetString(); +} + +// Generates non-fatal failures and returns false if regex is invalid; +// otherwise returns true. +bool ValidateRegex(const char* regex) { + if (regex == NULL) { + // TODO(wan@google.com): fix the source file location in the + // assertion failures to match where the regex is used in user + // code. + ADD_FAILURE() << "NULL is not a valid simple regular expression."; + return false; + } + + bool is_valid = true; + + // True iff ?, *, or + can follow the previous atom. + bool prev_repeatable = false; + for (int i = 0; regex[i]; i++) { + if (regex[i] == '\\') { // An escape sequence + i++; + if (regex[i] == '\0') { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) + << "'\\' cannot appear at the end."; + return false; + } + + if (!IsValidEscape(regex[i])) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) + << "invalid escape sequence \"\\" << regex[i] << "\"."; + is_valid = false; + } + prev_repeatable = true; + } else { // Not an escape sequence. + const char ch = regex[i]; + + if (ch == '^' && i > 0) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'^' can only appear at the beginning."; + is_valid = false; + } else if (ch == '$' && regex[i + 1] != '\0') { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'$' can only appear at the end."; + is_valid = false; + } else if (IsInSet(ch, "()[]{}|")) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'" << ch << "' is unsupported."; + is_valid = false; + } else if (IsRepeat(ch) && !prev_repeatable) { + ADD_FAILURE() << FormatRegexSyntaxError(regex, i) + << "'" << ch << "' can only follow a repeatable token."; + is_valid = false; + } + + prev_repeatable = !IsInSet(ch, "^$?*+"); + } + } + + return is_valid; +} + +// Matches a repeated regex atom followed by a valid simple regular +// expression. The regex atom is defined as c if escaped is false, +// or \c otherwise. repeat is the repetition meta character (?, *, +// or +). The behavior is undefined if str contains too many +// characters to be indexable by size_t, in which case the test will +// probably time out anyway. We are fine with this limitation as +// std::string has it too. +bool MatchRepetitionAndRegexAtHead( + bool escaped, char c, char repeat, const char* regex, + const char* str) { + const size_t min_count = (repeat == '+') ? 1 : 0; + const size_t max_count = (repeat == '?') ? 1 : + static_cast(-1) - 1; + // We cannot call numeric_limits::max() as it conflicts with the + // max() macro on Windows. + + for (size_t i = 0; i <= max_count; ++i) { + // We know that the atom matches each of the first i characters in str. + if (i >= min_count && MatchRegexAtHead(regex, str + i)) { + // We have enough matches at the head, and the tail matches too. + // Since we only care about *whether* the pattern matches str + // (as opposed to *how* it matches), there is no need to find a + // greedy match. + return true; + } + if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) + return false; + } + return false; +} + +// Returns true iff regex matches a prefix of str. regex must be a +// valid simple regular expression and not start with "^", or the +// result is undefined. +bool MatchRegexAtHead(const char* regex, const char* str) { + if (*regex == '\0') // An empty regex matches a prefix of anything. + return true; + + // "$" only matches the end of a string. Note that regex being + // valid guarantees that there's nothing after "$" in it. + if (*regex == '$') + return *str == '\0'; + + // Is the first thing in regex an escape sequence? + const bool escaped = *regex == '\\'; + if (escaped) + ++regex; + if (IsRepeat(regex[1])) { + // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so + // here's an indirect recursion. It terminates as the regex gets + // shorter in each recursion. + return MatchRepetitionAndRegexAtHead( + escaped, regex[0], regex[1], regex + 2, str); + } else { + // regex isn't empty, isn't "$", and doesn't start with a + // repetition. We match the first atom of regex with the first + // character of str and recurse. + return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) && + MatchRegexAtHead(regex + 1, str + 1); + } +} + +// Returns true iff regex matches any substring of str. regex must be +// a valid simple regular expression, or the result is undefined. +// +// The algorithm is recursive, but the recursion depth doesn't exceed +// the regex length, so we won't need to worry about running out of +// stack space normally. In rare cases the time complexity can be +// exponential with respect to the regex length + the string length, +// but usually it's must faster (often close to linear). +bool MatchRegexAnywhere(const char* regex, const char* str) { + if (regex == NULL || str == NULL) + return false; + + if (*regex == '^') + return MatchRegexAtHead(regex + 1, str); + + // A successful match can be anywhere in str. + do { + if (MatchRegexAtHead(regex, str)) + return true; + } while (*str++ != '\0'); + return false; +} + +// Implements the RE class. + +RE::~RE() { + free(const_cast(pattern_)); + free(const_cast(full_pattern_)); +} + +// Returns true iff regular expression re matches the entire str. +bool RE::FullMatch(const char* str, const RE& re) { + return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str); +} + +// Returns true iff regular expression re matches a substring of str +// (including str itself). +bool RE::PartialMatch(const char* str, const RE& re) { + return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str); +} + +// Initializes an RE from its string representation. +void RE::Init(const char* regex) { + pattern_ = full_pattern_ = NULL; + if (regex != NULL) { +#ifdef GTEST_OS_WINDOWS + pattern_ = _strdup(regex); +#else + pattern_ = strdup(regex); +#endif + } + + is_valid_ = ValidateRegex(regex); + if (!is_valid_) { + // No need to calculate the full pattern when the regex is invalid. + return; + } + + const size_t len = strlen(regex); + // Reserves enough bytes to hold the regular expression used for a + // full match: we need space to prepend a '^', append a '$', and + // terminate the string with '\0'. + char* buffer = static_cast(malloc(len + 3)); + full_pattern_ = buffer; + + if (*regex != '^') + *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'. + + // We don't use snprintf or strncpy, as they trigger a warning when + // compiled with VC++ 8.0. + memcpy(buffer, regex, len); + buffer += len; + + if (len == 0 || regex[len - 1] != '$') + *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'. + + *buffer = '\0'; +} + +#endif // GTEST_USES_POSIX_RE // Logs a message at the given severity level. void GTestLog(GTestLogSeverity severity, const char* file, diff --git a/test/gtest-port_test.cc b/test/gtest-port_test.cc index d945c58..0041c91 100644 --- a/test/gtest-port_test.cc +++ b/test/gtest-port_test.cc @@ -27,7 +27,7 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // -// Author: vladl@google.com (Vlad Losev) +// Authors: vladl@google.com (Vlad Losev), wan@google.com (Zhanyong Wan) // // This file tests the internal cross-platform support utilities. @@ -35,6 +35,18 @@ #include #include +// Indicates that this translation unit is part of Google Test's +// implementation. It must come before gtest-internal-inl.h is +// included, or there will be a compiler error. This trick is to +// prevent a user from accidentally including gtest-internal-inl.h in +// his code. +#define GTEST_IMPLEMENTATION +#include "src/gtest-internal-inl.h" +#undef GTEST_IMPLEMENTATION + +namespace testing { +namespace internal { + TEST(GtestCheckSyntaxTest, BehavesLikeASingleStatement) { if (false) GTEST_CHECK_(false) << "This should never be executed; " @@ -87,9 +99,7 @@ TEST(GtestCheckDeathTest, LivesSilentlyOnSuccess) { #endif // GTEST_HAS_DEATH_TEST -#ifdef GTEST_USES_POSIX_RE - -using ::testing::internal::RE; +#if GTEST_USES_POSIX_RE template class RETest : public ::testing::Test {}; @@ -109,30 +119,30 @@ TYPED_TEST_CASE(RETest, StringTypes); // Tests RE's implicit constructors. TYPED_TEST(RETest, ImplicitConstructorWorks) { - const RE empty = TypeParam(""); + const RE empty(TypeParam("")); EXPECT_STREQ("", empty.pattern()); - const RE simple = TypeParam("hello"); + const RE simple(TypeParam("hello")); EXPECT_STREQ("hello", simple.pattern()); - const RE normal = TypeParam(".*(\\w+)"); + const RE normal(TypeParam(".*(\\w+)")); EXPECT_STREQ(".*(\\w+)", normal.pattern()); } // Tests that RE's constructors reject invalid regular expressions. TYPED_TEST(RETest, RejectsInvalidRegex) { EXPECT_NONFATAL_FAILURE({ - const RE invalid = TypeParam("?"); + const RE invalid(TypeParam("?")); }, "\"?\" is not a valid POSIX Extended regular expression."); } // Tests RE::FullMatch(). TYPED_TEST(RETest, FullMatchWorks) { - const RE empty = TypeParam(""); + const RE empty(TypeParam("")); EXPECT_TRUE(RE::FullMatch(TypeParam(""), empty)); EXPECT_FALSE(RE::FullMatch(TypeParam("a"), empty)); - const RE re = TypeParam("a.*z"); + const RE re(TypeParam("a.*z")); EXPECT_TRUE(RE::FullMatch(TypeParam("az"), re)); EXPECT_TRUE(RE::FullMatch(TypeParam("axyz"), re)); EXPECT_FALSE(RE::FullMatch(TypeParam("baz"), re)); @@ -141,11 +151,11 @@ TYPED_TEST(RETest, FullMatchWorks) { // Tests RE::PartialMatch(). TYPED_TEST(RETest, PartialMatchWorks) { - const RE empty = TypeParam(""); + const RE empty(TypeParam("")); EXPECT_TRUE(RE::PartialMatch(TypeParam(""), empty)); EXPECT_TRUE(RE::PartialMatch(TypeParam("a"), empty)); - const RE re = TypeParam("a.*z"); + const RE re(TypeParam("a.*z")); EXPECT_TRUE(RE::PartialMatch(TypeParam("az"), re)); EXPECT_TRUE(RE::PartialMatch(TypeParam("axyz"), re)); EXPECT_TRUE(RE::PartialMatch(TypeParam("baz"), re)); @@ -153,4 +163,471 @@ TYPED_TEST(RETest, PartialMatchWorks) { EXPECT_FALSE(RE::PartialMatch(TypeParam("zza"), re)); } +#elif GTEST_USES_SIMPLE_RE + +TEST(IsInSetTest, NulCharIsNotInAnySet) { + EXPECT_FALSE(IsInSet('\0', "")); + EXPECT_FALSE(IsInSet('\0', "\0")); + EXPECT_FALSE(IsInSet('\0', "a")); +} + +TEST(IsInSetTest, WorksForNonNulChars) { + EXPECT_FALSE(IsInSet('a', "Ab")); + EXPECT_FALSE(IsInSet('c', "")); + + EXPECT_TRUE(IsInSet('b', "bcd")); + EXPECT_TRUE(IsInSet('b', "ab")); +} + +TEST(IsDigitTest, IsFalseForNonDigit) { + EXPECT_FALSE(IsDigit('\0')); + EXPECT_FALSE(IsDigit(' ')); + EXPECT_FALSE(IsDigit('+')); + EXPECT_FALSE(IsDigit('-')); + EXPECT_FALSE(IsDigit('.')); + EXPECT_FALSE(IsDigit('a')); +} + +TEST(IsDigitTest, IsTrueForDigit) { + EXPECT_TRUE(IsDigit('0')); + EXPECT_TRUE(IsDigit('1')); + EXPECT_TRUE(IsDigit('5')); + EXPECT_TRUE(IsDigit('9')); +} + +TEST(IsPunctTest, IsFalseForNonPunct) { + EXPECT_FALSE(IsPunct('\0')); + EXPECT_FALSE(IsPunct(' ')); + EXPECT_FALSE(IsPunct('\n')); + EXPECT_FALSE(IsPunct('a')); + EXPECT_FALSE(IsPunct('0')); +} + +TEST(IsPunctTest, IsTrueForPunct) { + for (const char* p = "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"; *p; p++) { + EXPECT_PRED1(IsPunct, *p); + } +} + +TEST(IsRepeatTest, IsFalseForNonRepeatChar) { + EXPECT_FALSE(IsRepeat('\0')); + EXPECT_FALSE(IsRepeat(' ')); + EXPECT_FALSE(IsRepeat('a')); + EXPECT_FALSE(IsRepeat('1')); + EXPECT_FALSE(IsRepeat('-')); +} + +TEST(IsRepeatTest, IsTrueForRepeatChar) { + EXPECT_TRUE(IsRepeat('?')); + EXPECT_TRUE(IsRepeat('*')); + EXPECT_TRUE(IsRepeat('+')); +} + +TEST(IsWhiteSpaceTest, IsFalseForNonWhiteSpace) { + EXPECT_FALSE(IsWhiteSpace('\0')); + EXPECT_FALSE(IsWhiteSpace('a')); + EXPECT_FALSE(IsWhiteSpace('1')); + EXPECT_FALSE(IsWhiteSpace('+')); + EXPECT_FALSE(IsWhiteSpace('_')); +} + +TEST(IsWhiteSpaceTest, IsTrueForWhiteSpace) { + EXPECT_TRUE(IsWhiteSpace(' ')); + EXPECT_TRUE(IsWhiteSpace('\n')); + EXPECT_TRUE(IsWhiteSpace('\r')); + EXPECT_TRUE(IsWhiteSpace('\t')); + EXPECT_TRUE(IsWhiteSpace('\v')); + EXPECT_TRUE(IsWhiteSpace('\f')); +} + +TEST(IsWordCharTest, IsFalseForNonWordChar) { + EXPECT_FALSE(IsWordChar('\0')); + EXPECT_FALSE(IsWordChar('+')); + EXPECT_FALSE(IsWordChar('.')); + EXPECT_FALSE(IsWordChar(' ')); + EXPECT_FALSE(IsWordChar('\n')); +} + +TEST(IsWordCharTest, IsTrueForLetter) { + EXPECT_TRUE(IsWordChar('a')); + EXPECT_TRUE(IsWordChar('b')); + EXPECT_TRUE(IsWordChar('A')); + EXPECT_TRUE(IsWordChar('Z')); +} + +TEST(IsWordCharTest, IsTrueForDigit) { + EXPECT_TRUE(IsWordChar('0')); + EXPECT_TRUE(IsWordChar('1')); + EXPECT_TRUE(IsWordChar('7')); + EXPECT_TRUE(IsWordChar('9')); +} + +TEST(IsWordCharTest, IsTrueForUnderscore) { + EXPECT_TRUE(IsWordChar('_')); +} + +TEST(IsValidEscapeTest, IsFalseForNonPrintable) { + EXPECT_FALSE(IsValidEscape('\0')); + EXPECT_FALSE(IsValidEscape('\007')); +} + +TEST(IsValidEscapeTest, IsFalseForDigit) { + EXPECT_FALSE(IsValidEscape('0')); + EXPECT_FALSE(IsValidEscape('9')); +} + +TEST(IsValidEscapeTest, IsFalseForWhiteSpace) { + EXPECT_FALSE(IsValidEscape(' ')); + EXPECT_FALSE(IsValidEscape('\n')); +} + +TEST(IsValidEscapeTest, IsFalseForSomeLetter) { + EXPECT_FALSE(IsValidEscape('a')); + EXPECT_FALSE(IsValidEscape('Z')); +} + +TEST(IsValidEscapeTest, IsTrueForPunct) { + EXPECT_TRUE(IsValidEscape('.')); + EXPECT_TRUE(IsValidEscape('-')); + EXPECT_TRUE(IsValidEscape('^')); + EXPECT_TRUE(IsValidEscape('$')); + EXPECT_TRUE(IsValidEscape('(')); + EXPECT_TRUE(IsValidEscape(']')); + EXPECT_TRUE(IsValidEscape('{')); + EXPECT_TRUE(IsValidEscape('|')); +} + +TEST(IsValidEscapeTest, IsTrueForSomeLetter) { + EXPECT_TRUE(IsValidEscape('d')); + EXPECT_TRUE(IsValidEscape('D')); + EXPECT_TRUE(IsValidEscape('s')); + EXPECT_TRUE(IsValidEscape('S')); + EXPECT_TRUE(IsValidEscape('w')); + EXPECT_TRUE(IsValidEscape('W')); +} + +TEST(AtomMatchesCharTest, EscapedPunct) { + EXPECT_FALSE(AtomMatchesChar(true, '\\', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, '\\', ' ')); + EXPECT_FALSE(AtomMatchesChar(true, '_', '.')); + EXPECT_FALSE(AtomMatchesChar(true, '.', 'a')); + + EXPECT_TRUE(AtomMatchesChar(true, '\\', '\\')); + EXPECT_TRUE(AtomMatchesChar(true, '_', '_')); + EXPECT_TRUE(AtomMatchesChar(true, '+', '+')); + EXPECT_TRUE(AtomMatchesChar(true, '.', '.')); +} + +TEST(AtomMatchesCharTest, Escaped_d) { + EXPECT_FALSE(AtomMatchesChar(true, 'd', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'd', 'a')); + EXPECT_FALSE(AtomMatchesChar(true, 'd', '.')); + + EXPECT_TRUE(AtomMatchesChar(true, 'd', '0')); + EXPECT_TRUE(AtomMatchesChar(true, 'd', '9')); +} + +TEST(AtomMatchesCharTest, Escaped_D) { + EXPECT_FALSE(AtomMatchesChar(true, 'D', '0')); + EXPECT_FALSE(AtomMatchesChar(true, 'D', '9')); + + EXPECT_TRUE(AtomMatchesChar(true, 'D', '\0')); + EXPECT_TRUE(AtomMatchesChar(true, 'D', 'a')); + EXPECT_TRUE(AtomMatchesChar(true, 'D', '-')); +} + +TEST(AtomMatchesCharTest, Escaped_s) { + EXPECT_FALSE(AtomMatchesChar(true, 's', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 's', 'a')); + EXPECT_FALSE(AtomMatchesChar(true, 's', '.')); + EXPECT_FALSE(AtomMatchesChar(true, 's', '9')); + + EXPECT_TRUE(AtomMatchesChar(true, 's', ' ')); + EXPECT_TRUE(AtomMatchesChar(true, 's', '\n')); + EXPECT_TRUE(AtomMatchesChar(true, 's', '\t')); +} + +TEST(AtomMatchesCharTest, Escaped_S) { + EXPECT_FALSE(AtomMatchesChar(true, 'S', ' ')); + EXPECT_FALSE(AtomMatchesChar(true, 'S', '\r')); + + EXPECT_TRUE(AtomMatchesChar(true, 'S', '\0')); + EXPECT_TRUE(AtomMatchesChar(true, 'S', 'a')); + EXPECT_TRUE(AtomMatchesChar(true, 'S', '9')); +} + +TEST(AtomMatchesCharTest, Escaped_w) { + EXPECT_FALSE(AtomMatchesChar(true, 'w', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'w', '+')); + EXPECT_FALSE(AtomMatchesChar(true, 'w', ' ')); + EXPECT_FALSE(AtomMatchesChar(true, 'w', '\n')); + + EXPECT_TRUE(AtomMatchesChar(true, 'w', '0')); + EXPECT_TRUE(AtomMatchesChar(true, 'w', 'b')); + EXPECT_TRUE(AtomMatchesChar(true, 'w', 'C')); + EXPECT_TRUE(AtomMatchesChar(true, 'w', '_')); +} + +TEST(AtomMatchesCharTest, Escaped_W) { + EXPECT_FALSE(AtomMatchesChar(true, 'W', 'A')); + EXPECT_FALSE(AtomMatchesChar(true, 'W', 'b')); + EXPECT_FALSE(AtomMatchesChar(true, 'W', '9')); + EXPECT_FALSE(AtomMatchesChar(true, 'W', '_')); + + EXPECT_TRUE(AtomMatchesChar(true, 'W', '\0')); + EXPECT_TRUE(AtomMatchesChar(true, 'W', '*')); + EXPECT_TRUE(AtomMatchesChar(true, 'W', '\n')); +} + +TEST(AtomMatchesCharTest, EscapedWhiteSpace) { + EXPECT_FALSE(AtomMatchesChar(true, 'f', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'f', '\n')); + EXPECT_FALSE(AtomMatchesChar(true, 'n', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'n', '\r')); + EXPECT_FALSE(AtomMatchesChar(true, 'r', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'r', 'a')); + EXPECT_FALSE(AtomMatchesChar(true, 't', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 't', 't')); + EXPECT_FALSE(AtomMatchesChar(true, 'v', '\0')); + EXPECT_FALSE(AtomMatchesChar(true, 'v', '\f')); + + EXPECT_TRUE(AtomMatchesChar(true, 'f', '\f')); + EXPECT_TRUE(AtomMatchesChar(true, 'n', '\n')); + EXPECT_TRUE(AtomMatchesChar(true, 'r', '\r')); + EXPECT_TRUE(AtomMatchesChar(true, 't', '\t')); + EXPECT_TRUE(AtomMatchesChar(true, 'v', '\v')); +} + +TEST(AtomMatchesCharTest, UnescapedDot) { + EXPECT_FALSE(AtomMatchesChar(false, '.', '\n')); + + EXPECT_TRUE(AtomMatchesChar(false, '.', '\0')); + EXPECT_TRUE(AtomMatchesChar(false, '.', '.')); + EXPECT_TRUE(AtomMatchesChar(false, '.', 'a')); + EXPECT_TRUE(AtomMatchesChar(false, '.', ' ')); +} + +TEST(AtomMatchesCharTest, UnescapedChar) { + EXPECT_FALSE(AtomMatchesChar(false, 'a', '\0')); + EXPECT_FALSE(AtomMatchesChar(false, 'a', 'b')); + EXPECT_FALSE(AtomMatchesChar(false, '$', 'a')); + + EXPECT_TRUE(AtomMatchesChar(false, '$', '$')); + EXPECT_TRUE(AtomMatchesChar(false, '5', '5')); + EXPECT_TRUE(AtomMatchesChar(false, 'Z', 'Z')); +} + +TEST(ValidateRegexTest, GeneratesFailureAndReturnsFalseForInvalid) { + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex(NULL)), + "NULL is not a valid simple regular expression"); + EXPECT_NONFATAL_FAILURE( + ASSERT_FALSE(ValidateRegex("a\\")), + "Syntax error at index 1 in simple regular expression \"a\\\": "); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a\\")), + "'\\' cannot appear at the end"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("\\n\\")), + "'\\' cannot appear at the end"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("\\s\\hb")), + "invalid escape sequence \"\\h\""); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^^")), + "'^' can only appear at the beginning"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex(".*^b")), + "'^' can only appear at the beginning"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("$$")), + "'$' can only appear at the end"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^$a")), + "'$' can only appear at the end"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a(b")), + "'(' is unsupported"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("ab)")), + "')' is unsupported"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("[ab")), + "'[' is unsupported"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a{2")), + "'{' is unsupported"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("?")), + "'?' can only follow a repeatable token"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^*")), + "'*' can only follow a repeatable token"); + EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("5*+")), + "'+' can only follow a repeatable token"); +} + +TEST(ValidateRegexTest, ReturnsTrueForValid) { + EXPECT_TRUE(ValidateRegex("")); + EXPECT_TRUE(ValidateRegex("a")); + EXPECT_TRUE(ValidateRegex(".*")); + EXPECT_TRUE(ValidateRegex("^a_+")); + EXPECT_TRUE(ValidateRegex("^a\\t\\&?")); + EXPECT_TRUE(ValidateRegex("09*$")); + EXPECT_TRUE(ValidateRegex("^Z$")); + EXPECT_TRUE(ValidateRegex("a\\^Z\\$\\(\\)\\|\\[\\]\\{\\}")); +} + +TEST(MatchRepetitionAndRegexAtHeadTest, WorksForZeroOrOne) { + EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "a", "ba")); + // Repeating more than once. + EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "aab")); + + // Repeating zero times. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "ba")); + // Repeating once. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "ab")); + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '#', '?', ".", "##")); +} + +TEST(MatchRepetitionAndRegexAtHeadTest, WorksForZeroOrMany) { + EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '*', "a$", "baab")); + + // Repeating zero times. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '*', "b", "bc")); + // Repeating once. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '*', "b", "abc")); + // Repeating more than once. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(true, 'w', '*', "-", "ab_1-g")); +} + +TEST(MatchRepetitionAndRegexAtHeadTest, WorksForOneOrMany) { + EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '+', "a$", "baab")); + // Repeating zero times. + EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '+', "b", "bc")); + + // Repeating once. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '+', "b", "abc")); + // Repeating more than once. + EXPECT_TRUE(MatchRepetitionAndRegexAtHead(true, 'w', '+', "-", "ab_1-g")); +} + +TEST(MatchRegexAtHeadTest, ReturnsTrueForEmptyRegex) { + EXPECT_TRUE(MatchRegexAtHead("", "")); + EXPECT_TRUE(MatchRegexAtHead("", "ab")); +} + +TEST(MatchRegexAtHeadTest, WorksWhenDollarIsInRegex) { + EXPECT_FALSE(MatchRegexAtHead("$", "a")); + + EXPECT_TRUE(MatchRegexAtHead("$", "")); + EXPECT_TRUE(MatchRegexAtHead("a$", "a")); +} + +TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithEscapeSequence) { + EXPECT_FALSE(MatchRegexAtHead("\\w", "+")); + EXPECT_FALSE(MatchRegexAtHead("\\W", "ab")); + + EXPECT_TRUE(MatchRegexAtHead("\\sa", "\nab")); + EXPECT_TRUE(MatchRegexAtHead("\\d", "1a")); +} + +TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithRepetition) { + EXPECT_FALSE(MatchRegexAtHead(".+a", "abc")); + EXPECT_FALSE(MatchRegexAtHead("a?b", "aab")); + + EXPECT_TRUE(MatchRegexAtHead(".*a", "bc12-ab")); + EXPECT_TRUE(MatchRegexAtHead("a?b", "b")); + EXPECT_TRUE(MatchRegexAtHead("a?b", "ab")); +} + +TEST(MatchRegexAtHeadTest, + WorksWhenRegexStartsWithRepetionOfEscapeSequence) { + EXPECT_FALSE(MatchRegexAtHead("\\.+a", "abc")); + EXPECT_FALSE(MatchRegexAtHead("\\s?b", " b")); + + EXPECT_TRUE(MatchRegexAtHead("\\(*a", "((((ab")); + EXPECT_TRUE(MatchRegexAtHead("\\^?b", "^b")); + EXPECT_TRUE(MatchRegexAtHead("\\\\?b", "b")); + EXPECT_TRUE(MatchRegexAtHead("\\\\?b", "\\b")); +} + +TEST(MatchRegexAtHeadTest, MatchesSequentially) { + EXPECT_FALSE(MatchRegexAtHead("ab.*c", "acabc")); + + EXPECT_TRUE(MatchRegexAtHead("ab.*c", "ab-fsc")); +} + +TEST(MatchRegexAnywhereTest, ReturnsFalseWhenStringIsNull) { + EXPECT_FALSE(MatchRegexAnywhere("", NULL)); +} + +TEST(MatchRegexAnywhereTest, WorksWhenRegexStartsWithCaret) { + EXPECT_FALSE(MatchRegexAnywhere("^a", "ba")); + EXPECT_FALSE(MatchRegexAnywhere("^$", "a")); + + EXPECT_TRUE(MatchRegexAnywhere("^a", "ab")); + EXPECT_TRUE(MatchRegexAnywhere("^", "ab")); + EXPECT_TRUE(MatchRegexAnywhere("^$", "")); +} + +TEST(MatchRegexAnywhereTest, ReturnsFalseWhenNoMatch) { + EXPECT_FALSE(MatchRegexAnywhere("a", "bcde123")); + EXPECT_FALSE(MatchRegexAnywhere("a.+a", "--aa88888888")); +} + +TEST(MatchRegexAnywhereTest, ReturnsTrueWhenMatchingPrefix) { + EXPECT_TRUE(MatchRegexAnywhere("\\w+", "ab1_ - 5")); + EXPECT_TRUE(MatchRegexAnywhere(".*=", "=")); + EXPECT_TRUE(MatchRegexAnywhere("x.*ab?.*bc", "xaaabc")); +} + +TEST(MatchRegexAnywhereTest, ReturnsTrueWhenMatchingNonPrefix) { + EXPECT_TRUE(MatchRegexAnywhere("\\w+", "$$$ ab1_ - 5")); + EXPECT_TRUE(MatchRegexAnywhere("\\.+=", "= ...=")); +} + +// Tests RE's implicit constructors. +TEST(RETest, ImplicitConstructorWorks) { + const RE empty = ""; + EXPECT_STREQ("", empty.pattern()); + + const RE simple = "hello"; + EXPECT_STREQ("hello", simple.pattern()); +} + +// Tests that RE's constructors reject invalid regular expressions. +TEST(RETest, RejectsInvalidRegex) { + EXPECT_NONFATAL_FAILURE({ + const RE normal = NULL; + }, "NULL is not a valid simple regular expression"); + + EXPECT_NONFATAL_FAILURE({ + const RE normal = ".*(\\w+"; + }, "'(' is unsupported"); + + EXPECT_NONFATAL_FAILURE({ + const RE invalid = "^?"; + }, "'?' can only follow a repeatable token"); +} + +// Tests RE::FullMatch(). +TEST(RETest, FullMatchWorks) { + const RE empty(""); + EXPECT_TRUE(RE::FullMatch("", empty)); + EXPECT_FALSE(RE::FullMatch("a", empty)); + + const RE re1 = "a"; + EXPECT_TRUE(RE::FullMatch("a", re1)); + + const RE re = "a.*z"; + EXPECT_TRUE(RE::FullMatch("az", re)); + EXPECT_TRUE(RE::FullMatch("axyz", re)); + EXPECT_FALSE(RE::FullMatch("baz", re)); + EXPECT_FALSE(RE::FullMatch("azy", re)); +} + +// Tests RE::PartialMatch(). +TEST(RETest, PartialMatchWorks) { + const RE empty = ""; + EXPECT_TRUE(RE::PartialMatch("", empty)); + EXPECT_TRUE(RE::PartialMatch("a", empty)); + + const RE re = "a.*z"; + EXPECT_TRUE(RE::PartialMatch("az", re)); + EXPECT_TRUE(RE::PartialMatch("axyz", re)); + EXPECT_TRUE(RE::PartialMatch("baz", re)); + EXPECT_TRUE(RE::PartialMatch("azy", re)); + EXPECT_FALSE(RE::PartialMatch("zza", re)); +} + #endif // GTEST_USES_POSIX_RE + +} // namespace internal +} // namespace testing -- cgit v0.12