From dbe294cdf383d8c8c0da21730be155c5291541d8 Mon Sep 17 00:00:00 2001 From: Jocelyn Turcotte Date: Tue, 29 Sep 2009 18:00:40 +0200 Subject: Adds QUrl::fromUserInput, gathered from QWebView::guessUrlFromString. Reviewed-by: Thiago Macieira --- src/corelib/io/qurl.cpp | 75 ++++++++++++++++++++++++++++++++++++++++++++ src/corelib/io/qurl.h | 2 ++ tests/auto/qurl/tst_qurl.cpp | 65 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+) diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp index face923..c9a4cf1 100644 --- a/src/corelib/io/qurl.cpp +++ b/src/corelib/io/qurl.cpp @@ -172,6 +172,8 @@ #include "private/qunicodetables_p.h" #include "qatomic.h" #include "qbytearray.h" +#include "qdir.h" +#include "qfile.h" #include "qlist.h" #ifndef QT_NO_REGEXP #include "qregexp.h" @@ -5547,6 +5549,79 @@ QUrl QUrl::fromEncoded(const QByteArray &input, ParsingMode parsingMode) } /*! + Returns a valid URL from a user supplied \a userInput string if one can be + deducted. In the case that is not possible, an invalid QUrl() is returned. + + \since 4.6 + + Most applications that can browse the web, allow the user to input a URL + in the form of a plain string. This string can be manually typed into + a location bar, obtained from the clipboard, or passed in via command + line arguments. + + When the string is not already a valid URL, a best guess is performed, + making various web related assumptions. + + In the case the string corresponds to a valid file path on the system, + a file:// URL is constructed, using QUrl::fromLocalFile(). + + If that is not the case, an attempt is made to turn the string into a + http:// or ftp:// URL. The latter in the case the string starts with + 'ftp'. The result is then passed through QUrl's tolerant parser, and + in the case or success, a valid QUrl is returned, or else a QUrl(). + + \section1 Examples: + + \list + \o qt.nokia.com becomes http://qt.nokia.com + \o ftp.qt.nokia.com becomes ftp://ftp.qt.nokia.com + \o localhost becomes http://localhost + \o /home/user/test.html becomes file:///home/user/test.html (if exists) + \endlist + + \section2 Tips to avoid erroneous character conversion when dealing with + URLs and strings: + + \list + \o When creating an URL QString from a QByteArray or a char*, always use + QString::fromUtf8(). + \o Favor the use of QUrl::fromEncoded() and QUrl::toEncoded() instead of + QUrl(string) and QUrl::toString() when converting QUrl to/from string. + \endlist +*/ +QUrl QUrl::fromUserInput(const QString &userInput) +{ + QString trimmedString = userInput.trimmed(); + + // Absolute files + if (QDir::isAbsolutePath(trimmedString)) + return QUrl::fromLocalFile(trimmedString); + + // Check the most common case of a valid url with scheme and host first + QUrl url = QUrl::fromEncoded(trimmedString.toUtf8(), QUrl::TolerantMode); + if (url.isValid() && !url.scheme().isEmpty() && !url.host().isEmpty()) + return url; + + // If the string is missing the scheme or the scheme is not valid, prepend a scheme + QString scheme = url.scheme(); + if (scheme.isEmpty() || scheme.contains(QLatin1Char('.')) || scheme == QLatin1String("localhost")) { + // Do not do anything for strings such as "foo", only "foo.com" + int dotIndex = trimmedString.indexOf(QLatin1Char('.')); + if (dotIndex != -1 || trimmedString.startsWith(QLatin1String("localhost"))) { + const QString hostscheme = trimmedString.left(dotIndex).toLower(); + QByteArray scheme = (hostscheme == QLatin1String("ftp")) ? "ftp" : "http"; + trimmedString = QLatin1String(scheme) + QLatin1String("://") + trimmedString; + } + url = QUrl::fromEncoded(trimmedString.toUtf8(), QUrl::TolerantMode); + } + + if (url.isValid()) + return url; + + return QUrl(); +} + +/*! Returns a decoded copy of \a input. \a input is first decoded from percent encoding, then converted from UTF-8 to unicode. */ diff --git a/src/corelib/io/qurl.h b/src/corelib/io/qurl.h index b00074a..f76d345 100644 --- a/src/corelib/io/qurl.h +++ b/src/corelib/io/qurl.h @@ -189,6 +189,8 @@ public: static QUrl fromEncoded(const QByteArray &url, ParsingMode mode); // ### Qt 5: merge the two fromEncoded() functions, with mode = TolerantMode + static QUrl fromUserInput(const QString &userInput); + void detach(); bool isDetached() const; diff --git a/tests/auto/qurl/tst_qurl.cpp b/tests/auto/qurl/tst_qurl.cpp index 8899176..fb3cf0e 100644 --- a/tests/auto/qurl/tst_qurl.cpp +++ b/tests/auto/qurl/tst_qurl.cpp @@ -186,6 +186,8 @@ private slots: void resolvedWithAbsoluteSchemes_data() const; void binaryData_data(); void binaryData(); + void fromUserInput_data(); + void fromUserInput(); void task_199967(); void task_240612(); @@ -3637,6 +3639,69 @@ void tst_QUrl::binaryData() QCOMPARE(url2, url); } +void tst_QUrl::fromUserInput_data() +{ + QTest::addColumn("string"); + QTest::addColumn("url"); + + // Null + QTest::newRow("null") << QString() << QUrl(); + + // File + QDirIterator it(QDir::homePath()); + QString fileString; + int c = 0; + while (it.hasNext()) { + it.next(); + QTest::newRow(QString("file-%1").arg(c++).toLatin1()) << it.filePath() << QUrl::fromLocalFile(it.filePath()); + } + + // basic latin1 + QTest::newRow("unicode-0") << QString::fromUtf8("\xC3\xA5.com/") << QUrl::fromEncoded(QString::fromUtf8("http://\xC3\xA5.com/").toUtf8(), QUrl::TolerantMode); + // unicode + QTest::newRow("unicode-1") << QString::fromUtf8("\xCE\xBB.com/") << QUrl::fromEncoded(QString::fromUtf8("http://\xCE\xBB.com/").toUtf8(), QUrl::TolerantMode); + + // no scheme + QTest::newRow("add scheme-0") << "webkit.org" << QUrl("http://webkit.org"); + QTest::newRow("add scheme-1") << "www.webkit.org" << QUrl("http://www.webkit.org"); + QTest::newRow("add scheme-2") << "ftp.webkit.org" << QUrl("ftp://ftp.webkit.org"); + QTest::newRow("add scheme-3") << "webkit" << QUrl("webkit"); + + // QUrl's tolerant parser should already handle this + QTest::newRow("not-encoded-0") << "http://webkit.org/test page.html" << QUrl("http://webkit.org/test%20page.html"); + + // Make sure the :80, i.e. port doesn't screw anything up + QUrl portUrl("http://webkit.org"); + portUrl.setPort(80); + QTest::newRow("port-0") << "webkit.org:80" << portUrl; + QTest::newRow("port-1") << "http://webkit.org:80" << portUrl; + + // mailto doesn't have a ://, but is valid + QUrl mailto("somebody@somewhere.net"); + mailto.setScheme("mailto"); + QTest::newRow("mailto") << "mailto:somebody@somewhere.net" << mailto; + + // misc + QTest::newRow("localhost-0") << "localhost" << QUrl("http://localhost"); + QTest::newRow("localhost-1") << "localhost:80" << QUrl("http://localhost:80"); + QTest::newRow("spaces-0") << " http://webkit.org/test page.html " << QUrl("http://webkit.org/test%20page.html"); + QTest::newRow("trash-0") << "webkit.org/test?someData=42%&someOtherData=abcde#anchor" << QUrl::fromEncoded("http://webkit.org/test?someData=42%25&someOtherData=abcde#anchor"); + + // FYI: The scheme in the resulting url user + QUrl authUrl("user:pass@domain.com"); + QTest::newRow("misc-1") << "user:pass@domain.com" << authUrl; +} + +// public static QUrl guessUrlFromString(QString const& string) +void tst_QUrl::fromUserInput() +{ + QFETCH(QString, string); + QFETCH(QUrl, url); + + QUrl guessedUrl = QUrl::fromUserInput(string); + QCOMPARE(guessedUrl, url); +} + void tst_QUrl::task_199967() { { -- cgit v0.12