summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2009-09-16 18:46:10 (GMT)
committerBenjamin Poulain <benjamin.poulain@nokia.com>2009-09-17 07:37:17 (GMT)
commite7c36fc2e420f1cee4370020b9f50bb5a6dfe92a (patch)
treec3053cb018572650bf839195c5cc19092034dd71
parentdfcf988a3f0c88f96e202482e5d363d880b9d6d2 (diff)
downloadQt-e7c36fc2e420f1cee4370020b9f50bb5a6dfe92a.zip
Qt-e7c36fc2e420f1cee4370020b9f50bb5a6dfe92a.tar.gz
Qt-e7c36fc2e420f1cee4370020b9f50bb5a6dfe92a.tar.bz2
Add a new wildcard mode similar to bash in QRegExp
It is not possible to escape a wildcard character in the Wildcard mode of QRegExp. This follows the kind of wildcard of the CLI of Windows The new WildCardUnix follows the escaping of a unix's bash. Task-number: 241346 Reviewed-by: Olivier Goffart Reviewed-by: Matthew Cattell
-rw-r--r--src/corelib/tools/qregexp.cpp95
-rw-r--r--src/corelib/tools/qregexp.h8
-rw-r--r--tests/auto/qregexp/tst_qregexp.cpp75
3 files changed, 159 insertions, 19 deletions
diff --git a/src/corelib/tools/qregexp.cpp b/src/corelib/tools/qregexp.cpp
index 8d7a75d..1f23211 100644
--- a/src/corelib/tools/qregexp.cpp
+++ b/src/corelib/tools/qregexp.cpp
@@ -522,6 +522,10 @@ int qFindString(const QChar *haystack, int haystackLen, int from,
outside, backslash has no special meaning.
\endtable
+ In the mode Wildcard, the wildcard characters cannot be
+ escaped. In the mode WildcardUnix, the character '\' escapes the
+ wildcard.
+
For example if we are in wildcard mode and have strings which
contain filenames we could identify HTML files with \bold{*.html}.
This will match zero or more characters followed by a dot followed
@@ -751,50 +755,100 @@ static void mergeInto(QVector<int> *a, const QVector<int> &b)
/*
Translates a wildcard pattern to an equivalent regular expression
pattern (e.g., *.cpp to .*\.cpp).
+
+ If enableEscaping is true, it is possible to escape the wildcard
+ characters with \
*/
-static QString wc2rx(const QString &wc_str)
+static QString wc2rx(const QString &wc_str, const bool enableEscaping)
{
- int wclen = wc_str.length();
+ const int wclen = wc_str.length();
QString rx;
int i = 0;
+ bool isEscaping = false; // the previous character is '\'
const QChar *wc = wc_str.unicode();
+
while (i < wclen) {
- QChar c = wc[i++];
+ const QChar c = wc[i++];
switch (c.unicode()) {
+ case '\\':
+ if (enableEscaping) {
+ if (isEscaping) {
+ rx += QLatin1String("\\\\");
+ } // we insert the \\ later if necessary
+ if (i+1 == wclen) { // the end
+ rx += QLatin1String("\\\\");
+ }
+ } else {
+ rx += QLatin1String("\\\\");
+ }
+ isEscaping = true;
+ break;
case '*':
- rx += QLatin1String(".*");
+ if (isEscaping) {
+ rx += QLatin1String("\\*");
+ isEscaping = false;
+ } else {
+ rx += QLatin1String(".*");
+ }
break;
case '?':
- rx += QLatin1Char('.');
+ if (isEscaping) {
+ rx += QLatin1String("\\?");
+ isEscaping = false;
+ } else {
+ rx += QLatin1Char('.');
+ }
+
break;
case '$':
case '(':
case ')':
case '+':
case '.':
- case '\\':
case '^':
case '{':
case '|':
case '}':
+ if (isEscaping) {
+ isEscaping = false;
+ rx += QLatin1String("\\\\");
+ }
rx += QLatin1Char('\\');
rx += c;
break;
- case '[':
- rx += c;
- if (wc[i] == QLatin1Char('^'))
- rx += wc[i++];
- if (i < wclen) {
- if (rx[i] == QLatin1Char(']'))
- rx += wc[i++];
- while (i < wclen && wc[i] != QLatin1Char(']')) {
- if (wc[i] == QLatin1Char('\\'))
- rx += QLatin1Char('\\');
+ case '[':
+ if (isEscaping) {
+ isEscaping = false;
+ rx += QLatin1String("\\[");
+ } else {
+ rx += c;
+ if (wc[i] == QLatin1Char('^'))
rx += wc[i++];
+ if (i < wclen) {
+ if (rx[i] == QLatin1Char(']'))
+ rx += wc[i++];
+ while (i < wclen && wc[i] != QLatin1Char(']')) {
+ if (wc[i] == QLatin1Char('\\'))
+ rx += QLatin1Char('\\');
+ rx += wc[i++];
+ }
}
}
+ break;
+
+ case ']':
+ if(isEscaping){
+ isEscaping = false;
+ rx += QLatin1String("\\");
+ }
+ rx += c;
break;
+
default:
+ if(isEscaping){
+ isEscaping = false;
+ rx += QLatin1String("\\\\");
+ }
rx += c;
}
}
@@ -1272,7 +1326,10 @@ Q_CORE_EXPORT QString qt_regexp_toCanonical(const QString &pattern, QRegExp::Pat
switch (patternSyntax) {
#ifndef QT_NO_REGEXP_WILDCARD
case QRegExp::Wildcard:
- return wc2rx(pattern);
+ return wc2rx(pattern, false);
+ break;
+ case QRegExp::WildcardUnix:
+ return wc2rx(pattern, true);
break;
#endif
case QRegExp::FixedString:
@@ -3715,6 +3772,10 @@ static void invalidateEngine(QRegExpPrivate *priv)
similar to that used by shells (command interpreters) for "file
globbing". See \l{Wildcard Matching}.
+ \value WildcardUnix This is similar to Wildcard but with the
+ behavior of a Unix shell. The wildcard characters can be escaped
+ with the character "\".
+
\value FixedString The pattern is a fixed string. This is
equivalent to using the RegExp pattern on a string in
which all metacharacters are escaped using escape().
diff --git a/src/corelib/tools/qregexp.h b/src/corelib/tools/qregexp.h
index c03e9e4..1a7cf53 100644
--- a/src/corelib/tools/qregexp.h
+++ b/src/corelib/tools/qregexp.h
@@ -61,7 +61,13 @@ class QStringList;
class Q_CORE_EXPORT QRegExp
{
public:
- enum PatternSyntax { RegExp, Wildcard, FixedString, RegExp2, W3CXmlSchema11 };
+ enum PatternSyntax {
+ RegExp,
+ Wildcard,
+ FixedString,
+ RegExp2,
+ WildcardUnix,
+ W3CXmlSchema11 };
enum CaretMode { CaretAtZero, CaretAtOffset, CaretWontMatch };
QRegExp();
diff --git a/tests/auto/qregexp/tst_qregexp.cpp b/tests/auto/qregexp/tst_qregexp.cpp
index 7496ec6..86d831e 100644
--- a/tests/auto/qregexp/tst_qregexp.cpp
+++ b/tests/auto/qregexp/tst_qregexp.cpp
@@ -70,6 +70,10 @@ private slots:
void matchedLength();
void wildcard_data();
void wildcard();
+ void testEscapingWildcard_data();
+ void testEscapingWildcard();
+ void testInvalidWildcard_data();
+ void testInvalidWildcard();
void caretAnchoredOptimization();
void isEmpty();
void prepareEngineOptimization();
@@ -909,10 +913,79 @@ void tst_QRegExp::wildcard()
QFETCH( int, foundIndex );
QRegExp r( rxp );
- r.setPatternSyntax(QRegExp::Wildcard);
+ r.setPatternSyntax(QRegExp::WildcardUnix);
QCOMPARE( r.indexIn( string ), foundIndex );
}
+void tst_QRegExp::testEscapingWildcard_data(){
+ QTest::addColumn<QString>("pattern");
+ QTest::addColumn<QString>("teststring");
+ QTest::addColumn<bool>("isMatching");
+
+ QTest::newRow("[ Not escaped") << "[Qt;" << "[Qt;" << false;
+ QTest::newRow("[ Escaped") << "\\[Qt;" << "[Qt;" << true;
+
+ QTest::newRow("] Not escaped") << "]Ik;" << "]Ik;" << false;
+ QTest::newRow("] Escaped") << "\\]Ip;" << "]Ip;" << true;
+
+ QTest::newRow("? Not escaped valid") << "?Ou:" << ".Ou:" << true;
+ QTest::newRow("? Not escaped invalid") << "?Tr;" << "Tr;" << false;
+ QTest::newRow("? Escaped") << "\\?O;" << "?O;" << true;
+
+ QTest::newRow("[] not escaped") << "[lL]" << "l" << true;
+ QTest::newRow("case [[]") << "[[abc]" << "[" << true;
+ QTest::newRow("case []abc] match ]") << "[]abc]" << "]" << true;
+ QTest::newRow("case []abc] match a") << "[]abc]" << "a" << true;
+ QTest::newRow("case [abc] match a") << "[abc]" << "a" << true;
+ QTest::newRow("case []] don't match [") << "[]abc]" << "[" << false;
+ QTest::newRow("case [^]abc] match d") << "[^]abc]" << "d" << true;
+ QTest::newRow("case [^]abc] don't match ]") << "[^]abc]" << "]" << false;
+
+ QTest::newRow("* Not escaped with char") << "*Te;" << "12345Te;" << true;
+ QTest::newRow("* Not escaped without char") << "*Ch;" << "Ch;" << true;
+ QTest::newRow("* Not escaped invalid") << "*Ro;" << "o;" << false;
+ QTest::newRow("* Escaped") << "\\[Cks;" << "[Cks;" << true;
+
+ QTest::newRow("a true '\\' in input") << "\\Qt;" << "\\Qt;" << true;
+ QTest::newRow("two true '\\' in input") << "\\\\Qt;" << "\\\\Qt;" << true;
+ QTest::newRow("a '\\' at the end") << "\\\\Qt;" << "\\\\Qt;" << true;
+
+}
+void tst_QRegExp::testEscapingWildcard(){
+ QFETCH(QString, pattern);
+
+ QRegExp re(pattern);
+ re.setPatternSyntax(QRegExp::WildcardUnix);
+
+ QFETCH(QString, teststring);
+ QFETCH(bool, isMatching);
+ QCOMPARE(re.exactMatch(teststring), isMatching);
+}
+
+void tst_QRegExp::testInvalidWildcard_data(){
+ QTest::addColumn<QString>("pattern");
+ QTest::addColumn<bool>("isValid");
+
+ QTest::newRow("valid []") << "[abc]" << true;
+ QTest::newRow("invalid [") << "[abc" << false;
+ QTest::newRow("ending [") << "abc[" << false;
+ QTest::newRow("ending ]") << "abc]" << false;
+ QTest::newRow("ending [^") << "abc[^" << false;
+ QTest::newRow("ending [\\") << "abc[\\" << false;
+ QTest::newRow("ending []") << "abc[]" << false;
+ QTest::newRow("ending [[") << "abc[[" << false;
+
+}
+void tst_QRegExp::testInvalidWildcard(){
+ QFETCH(QString, pattern);
+
+ QRegExp re(pattern);
+ re.setPatternSyntax(QRegExp::Wildcard);
+
+ QFETCH(bool, isValid);
+ QCOMPARE(re.isValid(), isValid);
+}
+
void tst_QRegExp::caretAnchoredOptimization()
{
QString s = "---babnana----";