From e7c36fc2e420f1cee4370020b9f50bb5a6dfe92a Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Wed, 16 Sep 2009 20:46:10 +0200 Subject: Add a new wildcard mode similar to bash in QRegExp It is not possible to escape a wildcard character in the Wildcard mode of QRegExp. This follows the kind of wildcard of the CLI of Windows The new WildCardUnix follows the escaping of a unix's bash. Task-number: 241346 Reviewed-by: Olivier Goffart Reviewed-by: Matthew Cattell --- src/corelib/tools/qregexp.cpp | 95 +++++++++++++++++++++++++++++++------- src/corelib/tools/qregexp.h | 8 +++- tests/auto/qregexp/tst_qregexp.cpp | 75 +++++++++++++++++++++++++++++- 3 files changed, 159 insertions(+), 19 deletions(-) diff --git a/src/corelib/tools/qregexp.cpp b/src/corelib/tools/qregexp.cpp index 8d7a75d..1f23211 100644 --- a/src/corelib/tools/qregexp.cpp +++ b/src/corelib/tools/qregexp.cpp @@ -522,6 +522,10 @@ int qFindString(const QChar *haystack, int haystackLen, int from, outside, backslash has no special meaning. \endtable + In the mode Wildcard, the wildcard characters cannot be + escaped. In the mode WildcardUnix, the character '\' escapes the + wildcard. + For example if we are in wildcard mode and have strings which contain filenames we could identify HTML files with \bold{*.html}. This will match zero or more characters followed by a dot followed @@ -751,50 +755,100 @@ static void mergeInto(QVector *a, const QVector &b) /* Translates a wildcard pattern to an equivalent regular expression pattern (e.g., *.cpp to .*\.cpp). + + If enableEscaping is true, it is possible to escape the wildcard + characters with \ */ -static QString wc2rx(const QString &wc_str) +static QString wc2rx(const QString &wc_str, const bool enableEscaping) { - int wclen = wc_str.length(); + const int wclen = wc_str.length(); QString rx; int i = 0; + bool isEscaping = false; // the previous character is '\' const QChar *wc = wc_str.unicode(); + while (i < wclen) { - QChar c = wc[i++]; + const QChar c = wc[i++]; switch (c.unicode()) { + case '\\': + if (enableEscaping) { + if (isEscaping) { + rx += QLatin1String("\\\\"); + } // we insert the \\ later if necessary + if (i+1 == wclen) { // the end + rx += QLatin1String("\\\\"); + } + } else { + rx += QLatin1String("\\\\"); + } + isEscaping = true; + break; case '*': - rx += QLatin1String(".*"); + if (isEscaping) { + rx += QLatin1String("\\*"); + isEscaping = false; + } else { + rx += QLatin1String(".*"); + } break; case '?': - rx += QLatin1Char('.'); + if (isEscaping) { + rx += QLatin1String("\\?"); + isEscaping = false; + } else { + rx += QLatin1Char('.'); + } + break; case '$': case '(': case ')': case '+': case '.': - case '\\': case '^': case '{': case '|': case '}': + if (isEscaping) { + isEscaping = false; + rx += QLatin1String("\\\\"); + } rx += QLatin1Char('\\'); rx += c; break; - case '[': - rx += c; - if (wc[i] == QLatin1Char('^')) - rx += wc[i++]; - if (i < wclen) { - if (rx[i] == QLatin1Char(']')) - rx += wc[i++]; - while (i < wclen && wc[i] != QLatin1Char(']')) { - if (wc[i] == QLatin1Char('\\')) - rx += QLatin1Char('\\'); + case '[': + if (isEscaping) { + isEscaping = false; + rx += QLatin1String("\\["); + } else { + rx += c; + if (wc[i] == QLatin1Char('^')) rx += wc[i++]; + if (i < wclen) { + if (rx[i] == QLatin1Char(']')) + rx += wc[i++]; + while (i < wclen && wc[i] != QLatin1Char(']')) { + if (wc[i] == QLatin1Char('\\')) + rx += QLatin1Char('\\'); + rx += wc[i++]; + } } } + break; + + case ']': + if(isEscaping){ + isEscaping = false; + rx += QLatin1String("\\"); + } + rx += c; break; + default: + if(isEscaping){ + isEscaping = false; + rx += QLatin1String("\\\\"); + } rx += c; } } @@ -1272,7 +1326,10 @@ Q_CORE_EXPORT QString qt_regexp_toCanonical(const QString &pattern, QRegExp::Pat switch (patternSyntax) { #ifndef QT_NO_REGEXP_WILDCARD case QRegExp::Wildcard: - return wc2rx(pattern); + return wc2rx(pattern, false); + break; + case QRegExp::WildcardUnix: + return wc2rx(pattern, true); break; #endif case QRegExp::FixedString: @@ -3715,6 +3772,10 @@ static void invalidateEngine(QRegExpPrivate *priv) similar to that used by shells (command interpreters) for "file globbing". See \l{Wildcard Matching}. + \value WildcardUnix This is similar to Wildcard but with the + behavior of a Unix shell. The wildcard characters can be escaped + with the character "\". + \value FixedString The pattern is a fixed string. This is equivalent to using the RegExp pattern on a string in which all metacharacters are escaped using escape(). diff --git a/src/corelib/tools/qregexp.h b/src/corelib/tools/qregexp.h index c03e9e4..1a7cf53 100644 --- a/src/corelib/tools/qregexp.h +++ b/src/corelib/tools/qregexp.h @@ -61,7 +61,13 @@ class QStringList; class Q_CORE_EXPORT QRegExp { public: - enum PatternSyntax { RegExp, Wildcard, FixedString, RegExp2, W3CXmlSchema11 }; + enum PatternSyntax { + RegExp, + Wildcard, + FixedString, + RegExp2, + WildcardUnix, + W3CXmlSchema11 }; enum CaretMode { CaretAtZero, CaretAtOffset, CaretWontMatch }; QRegExp(); diff --git a/tests/auto/qregexp/tst_qregexp.cpp b/tests/auto/qregexp/tst_qregexp.cpp index 7496ec6..86d831e 100644 --- a/tests/auto/qregexp/tst_qregexp.cpp +++ b/tests/auto/qregexp/tst_qregexp.cpp @@ -70,6 +70,10 @@ private slots: void matchedLength(); void wildcard_data(); void wildcard(); + void testEscapingWildcard_data(); + void testEscapingWildcard(); + void testInvalidWildcard_data(); + void testInvalidWildcard(); void caretAnchoredOptimization(); void isEmpty(); void prepareEngineOptimization(); @@ -909,10 +913,79 @@ void tst_QRegExp::wildcard() QFETCH( int, foundIndex ); QRegExp r( rxp ); - r.setPatternSyntax(QRegExp::Wildcard); + r.setPatternSyntax(QRegExp::WildcardUnix); QCOMPARE( r.indexIn( string ), foundIndex ); } +void tst_QRegExp::testEscapingWildcard_data(){ + QTest::addColumn("pattern"); + QTest::addColumn("teststring"); + QTest::addColumn("isMatching"); + + QTest::newRow("[ Not escaped") << "[Qt;" << "[Qt;" << false; + QTest::newRow("[ Escaped") << "\\[Qt;" << "[Qt;" << true; + + QTest::newRow("] Not escaped") << "]Ik;" << "]Ik;" << false; + QTest::newRow("] Escaped") << "\\]Ip;" << "]Ip;" << true; + + QTest::newRow("? Not escaped valid") << "?Ou:" << ".Ou:" << true; + QTest::newRow("? Not escaped invalid") << "?Tr;" << "Tr;" << false; + QTest::newRow("? Escaped") << "\\?O;" << "?O;" << true; + + QTest::newRow("[] not escaped") << "[lL]" << "l" << true; + QTest::newRow("case [[]") << "[[abc]" << "[" << true; + QTest::newRow("case []abc] match ]") << "[]abc]" << "]" << true; + QTest::newRow("case []abc] match a") << "[]abc]" << "a" << true; + QTest::newRow("case [abc] match a") << "[abc]" << "a" << true; + QTest::newRow("case []] don't match [") << "[]abc]" << "[" << false; + QTest::newRow("case [^]abc] match d") << "[^]abc]" << "d" << true; + QTest::newRow("case [^]abc] don't match ]") << "[^]abc]" << "]" << false; + + QTest::newRow("* Not escaped with char") << "*Te;" << "12345Te;" << true; + QTest::newRow("* Not escaped without char") << "*Ch;" << "Ch;" << true; + QTest::newRow("* Not escaped invalid") << "*Ro;" << "o;" << false; + QTest::newRow("* Escaped") << "\\[Cks;" << "[Cks;" << true; + + QTest::newRow("a true '\\' in input") << "\\Qt;" << "\\Qt;" << true; + QTest::newRow("two true '\\' in input") << "\\\\Qt;" << "\\\\Qt;" << true; + QTest::newRow("a '\\' at the end") << "\\\\Qt;" << "\\\\Qt;" << true; + +} +void tst_QRegExp::testEscapingWildcard(){ + QFETCH(QString, pattern); + + QRegExp re(pattern); + re.setPatternSyntax(QRegExp::WildcardUnix); + + QFETCH(QString, teststring); + QFETCH(bool, isMatching); + QCOMPARE(re.exactMatch(teststring), isMatching); +} + +void tst_QRegExp::testInvalidWildcard_data(){ + QTest::addColumn("pattern"); + QTest::addColumn("isValid"); + + QTest::newRow("valid []") << "[abc]" << true; + QTest::newRow("invalid [") << "[abc" << false; + QTest::newRow("ending [") << "abc[" << false; + QTest::newRow("ending ]") << "abc]" << false; + QTest::newRow("ending [^") << "abc[^" << false; + QTest::newRow("ending [\\") << "abc[\\" << false; + QTest::newRow("ending []") << "abc[]" << false; + QTest::newRow("ending [[") << "abc[[" << false; + +} +void tst_QRegExp::testInvalidWildcard(){ + QFETCH(QString, pattern); + + QRegExp re(pattern); + re.setPatternSyntax(QRegExp::Wildcard); + + QFETCH(bool, isValid); + QCOMPARE(re.isValid(), isValid); +} + void tst_QRegExp::caretAnchoredOptimization() { QString s = "---babnana----"; -- cgit v0.12