From 2792e2570b1eef7652890958dd2b48209aa1a37b Mon Sep 17 00:00:00 2001 From: Olivier Goffart Date: Thu, 23 Jul 2009 13:14:23 +0200 Subject: Try best to convert a regexp to a ECMAScript expression --- src/corelib/tools/qregexp.cpp | 32 ++++++++----- src/script/api/qscriptengine.cpp | 41 ++++++++++++++++- tests/auto/qscriptengine/tst_qscriptengine.cpp | 64 ++++++++++++++++++++++++-- 3 files changed, 120 insertions(+), 17 deletions(-) diff --git a/src/corelib/tools/qregexp.cpp b/src/corelib/tools/qregexp.cpp index 3108f31..b0f2054 100644 --- a/src/corelib/tools/qregexp.cpp +++ b/src/corelib/tools/qregexp.cpp @@ -1252,25 +1252,33 @@ struct QRegExpLookahead }; #endif -QRegExpEngine::QRegExpEngine(const QRegExpEngineKey &key) - : cs(key.cs), greedyQuantifiers(key.patternSyntax == QRegExp::RegExp2) -{ - setup(); - - QString rx; +/*! \internal + convert the pattern string to the RegExp syntax. - switch (key.patternSyntax) { - case QRegExp::Wildcard: + This is also used by QScriptEngine::newRegExp to convert to a pattern that JavaScriptCore can understan + */ +Q_CORE_EXPORT QString qt_regexp_toCanonical(const QString &pattern, QRegExp::PatternSyntax patternSyntax) +{ + switch (patternSyntax) { #ifndef QT_NO_REGEXP_WILDCARD - rx = wc2rx(key.pattern); -#endif + case QRegExp::Wildcard: + return wc2rx(pattern); break; +#endif case QRegExp::FixedString: - rx = QRegExp::escape(key.pattern); + return QRegExp::escape(pattern); break; default: - rx = key.pattern; + return pattern; } +} + +QRegExpEngine::QRegExpEngine(const QRegExpEngineKey &key) + : cs(key.cs), greedyQuantifiers(key.patternSyntax == QRegExp::RegExp2) +{ + setup(); + + QString rx = qt_regexp_toCanonical(key.pattern, key.patternSyntax); valid = (parse(rx.unicode(), rx.length()) == rx.length()); if (!valid) { diff --git a/src/script/api/qscriptengine.cpp b/src/script/api/qscriptengine.cpp index 0a3fbf1..43f14fa 100644 --- a/src/script/api/qscriptengine.cpp +++ b/src/script/api/qscriptengine.cpp @@ -1435,7 +1435,46 @@ QScriptValue QScriptEngine::newRegExp(const QRegExp ®exp) JSC::ExecState* exec = d->currentFrame; JSC::JSValue buf[2]; JSC::ArgList args(buf, sizeof(buf)); - JSC::UString jscPattern = QScript::qtStringToJSCUString(regexp.pattern()); + + //convert the pattern to a ECMAScript pattern + extern QString qt_regexp_toCanonical(const QString &, QRegExp::PatternSyntax); + QString pattern = qt_regexp_toCanonical(regexp.pattern(), regexp.patternSyntax()); + if (regexp.isMinimal()) { + QString ecmaPattern; + int len = pattern.length(); + ecmaPattern.reserve(len); + int i = 0; + const QChar *wc = pattern.unicode(); + bool inBracket = false; + while (i < len) { + QChar c = wc[i++]; + ecmaPattern += c; + switch (c.unicode()) { + case '?': + case '+': + case '*': + case '}': + if (!inBracket) + ecmaPattern += QLatin1Char('?'); + break; + case '\\': + if (i < len) + ecmaPattern += wc[i++]; + break; + case '[': + inBracket = true; + break; + case ']': + inBracket = false; + break; + default: + break; + } + } + pattern = ecmaPattern; + } + + JSC::UString jscPattern = QScript::qtStringToJSCUString(pattern); QString flags; if (regexp.caseSensitivity() == Qt::CaseInsensitive) flags.append(QLatin1Char('i')); diff --git a/tests/auto/qscriptengine/tst_qscriptengine.cpp b/tests/auto/qscriptengine/tst_qscriptengine.cpp index 0d44a14..618dc04 100644 --- a/tests/auto/qscriptengine/tst_qscriptengine.cpp +++ b/tests/auto/qscriptengine/tst_qscriptengine.cpp @@ -123,6 +123,9 @@ private slots: void reentrancy(); void incDecNonObjectProperty(); void installTranslatorFunctions(); + + void qRegExpInport_data(); + void qRegExpInport(); }; tst_QScriptEngine::tst_QScriptEngine() @@ -229,7 +232,7 @@ void tst_QScriptEngine::newFunction() QCOMPARE(fun.prototype().isValid(), true); QCOMPARE(fun.prototype().isFunction(), true); QCOMPARE(fun.prototype().strictlyEquals(eng.evaluate("Function.prototype")), true); - + QCOMPARE(fun.call().isNull(), true); QCOMPARE(fun.construct().isObject(), true); } @@ -251,7 +254,7 @@ void tst_QScriptEngine::newFunction() QCOMPARE(fun.prototype().isValid(), true); QCOMPARE(fun.prototype().isFunction(), true); QCOMPARE(fun.prototype().strictlyEquals(eng.evaluate("Function.prototype")), true); - + QCOMPARE(fun.call().isNull(), true); QCOMPARE(fun.construct().isObject(), true); } @@ -2759,7 +2762,7 @@ void tst_QScriptEngine::numberClass() QCOMPARE(ctor.propertyFlags("MIN_VALUE"), flags); QVERIFY(ctor.property("NaN").isNumber()); QCOMPARE(ctor.propertyFlags("NaN"), flags); - QVERIFY(ctor.property("NEGATIVE_INFINITY").isNumber()); + QVERIFY(ctor.property("NEGATIVE_INFINITY").isNumber()); QCOMPARE(ctor.propertyFlags("NEGATIVE_INFINITY"), flags); QVERIFY(ctor.property("POSITIVE_INFINITY").isNumber()); QCOMPARE(ctor.propertyFlags("POSITIVE_INFINITY"), flags); @@ -2982,7 +2985,7 @@ void tst_QScriptEngine::functionExpression() " else\n" " function baz() { return 'baz'; }\n" " return (arg == 'bar') ? bar : baz;\n" - "}"); + "}"); QVERIFY(!eng.globalObject().property("bar").isValid()); QVERIFY(!eng.globalObject().property("baz").isValid()); QVERIFY(eng.evaluate("foo").isFunction()); @@ -3771,5 +3774,58 @@ void tst_QScriptEngine::installTranslatorFunctions() } } +static QRegExp minimal(QRegExp r) { r.setMinimal(true); return r; } + +void tst_QScriptEngine::qRegExpInport_data() +{ + QTest::addColumn("rx"); + QTest::addColumn("string"); + QTest::addColumn("matched"); + + QTest::newRow("normal") << QRegExp("(test|foo)") << "test _ foo _ test _ Foo"; + QTest::newRow("normal2") << QRegExp("(Test|Foo)") << "test _ foo _ test _ Foo"; + QTest::newRow("case insensitive)") << QRegExp("(test|foo)", Qt::CaseInsensitive) << "test _ foo _ test _ Foo"; + QTest::newRow("case insensitive2)") << QRegExp("(Test|Foo)", Qt::CaseInsensitive) << "test _ foo _ test _ Foo"; + QTest::newRow("b(a*)(b*)") << QRegExp("b(a*)(b*)", Qt::CaseInsensitive) << "aaabbBbaAabaAaababaaabbaaab"; + QTest::newRow("greedy") << QRegExp("a*(a*)", Qt::CaseInsensitive, QRegExp::RegExp2) << "aaaabaaba"; + // this one will fail because we do not support the QRegExp::RegExp in JSC + //QTest::newRow("not_greedy") << QRegExp("a*(a*)", Qt::CaseInsensitive, QRegExp::RegExp) << "aaaabaaba"; + QTest::newRow("willcard") << QRegExp("*.txt", Qt::CaseSensitive, QRegExp::Wildcard) << "file.txt"; + QTest::newRow("willcard 2") << QRegExp("a?b.txt", Qt::CaseSensitive, QRegExp::Wildcard) << "ab.txt abb.rtc acb.txt"; + QTest::newRow("slash") << QRegExp("g/.*/s", Qt::CaseInsensitive, QRegExp::RegExp2) << "string/string/string"; + QTest::newRow("slash2") << QRegExp("g / .* / s", Qt::CaseInsensitive, QRegExp::RegExp2) << "string / string / string"; + QTest::newRow("fixed") << QRegExp("a*aa.a(ba)*a\\ba", Qt::CaseInsensitive, QRegExp::FixedString) << "aa*aa.a(ba)*a\\ba"; + QTest::newRow("fixed insensitive") << QRegExp("A*A", Qt::CaseInsensitive, QRegExp::FixedString) << "a*A A*a A*A a*a"; + QTest::newRow("fixed sensitive") << QRegExp("A*A", Qt::CaseSensitive, QRegExp::FixedString) << "a*A A*a A*A a*a"; + QTest::newRow("html") << QRegExp("(.*)", Qt::CaseSensitive, QRegExp::RegExp2) << "bolditalicbold"; + QTest::newRow("html minimal") << minimal(QRegExp("(.*)", Qt::CaseSensitive, QRegExp::RegExp2)) << "bolditalicbold"; + QTest::newRow("aaa") << QRegExp("a{2,5}") << "aAaAaaaaaAa"; + QTest::newRow("aaa minimal") << minimal(QRegExp("a{2,5}")) << "aAaAaaaaaAa"; + QTest::newRow("minimal") << minimal(QRegExp(".*\\} [*8]")) << "}?} ?} *"; +} + +void tst_QScriptEngine::qRegExpInport() +{ + QFETCH(QRegExp, rx); + QFETCH(QString, string); + + QScriptEngine eng; + QScriptValue rexp; + rexp = eng.newRegExp(rx); + + QCOMPARE(rexp.isValid(), true); + QCOMPARE(rexp.isRegExp(), true); + QVERIFY(rexp.isFunction()); + + QScriptValue func = eng.evaluate("(function(string, regexp) { return string.match(regexp); })"); + QScriptValue result = func.call(QScriptValue(), QScriptValueList() << string << rexp); + + rx.indexIn(string); + for (int i = 0; i <= rx.numCaptures(); i++) { + QCOMPARE(result.property(i).toString(), rx.cap(i)); + } +} + + QTEST_MAIN(tst_QScriptEngine) #include "tst_qscriptengine.moc" -- cgit v0.12