summaryrefslogtreecommitdiffstats
path: root/util/corelib/qurl-generateTLDs
diff options
context:
space:
mode:
authorRobert Hogan <robert@roberthogan.net>2011-05-24 09:04:27 (GMT)
committerPeter Hartmann <peter.hartmann@nokia.com>2011-05-24 09:47:16 (GMT)
commit154402f56dcf8303a6ce601a52215226af8d31ba (patch)
treea3a84d06e6c80812b55304031ba605d6aea46c63 /util/corelib/qurl-generateTLDs
parent83c37059df7f23be482d4ecb2c54603a3665a33d (diff)
downloadQt-154402f56dcf8303a6ce601a52215226af8d31ba.zip
Qt-154402f56dcf8303a6ce601a52215226af8d31ba.tar.gz
Qt-154402f56dcf8303a6ce601a52215226af8d31ba.tar.bz2
Add QUrl::topLevelDomain() and move TLD table from QtNetwork to QtCore
Move Qt's copy of the Mozilla public suffix list from QtNetwork to QtCore and use it to expose a new API function QUrl::topLevelDomain(). This function returns the section of the url that is a registrar-controlled top level domain. QtCore now exports a couple of functions to the other Qt modules: qTopLevelDomain, a helper function for QUrl::topLevelDomain(); and qIsEffectiveTLD(), a helper function for QNetworkCookeieJar. The motivation for this new API is to allow QtWebKit implement a Third-Party Cookie blocking policy. For this QtWebKit needs to know the element of the url that is the registry-controlled TLD. Without this knowledge it would end up blocking third-party cookies per host rather than per registry-controlled domain. See also https://bugs.webkit.org/show_bug.cgi?id=45455 Merge-request: 1205 Task-number: QTBUG-13601 Reviewed-by: Peter Hartmann <peter.hartmann@nokia.com>
Diffstat (limited to 'util/corelib/qurl-generateTLDs')
-rw-r--r--util/corelib/qurl-generateTLDs/main.cpp161
-rw-r--r--util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro9
2 files changed, 170 insertions, 0 deletions
diff --git a/util/corelib/qurl-generateTLDs/main.cpp b/util/corelib/qurl-generateTLDs/main.cpp
new file mode 100644
index 0000000..baaf256
--- /dev/null
+++ b/util/corelib/qurl-generateTLDs/main.cpp
@@ -0,0 +1,161 @@
+/****************************************************************************
+**
+** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <QtCore>
+
+static QString utf8encode(const QByteArray &array) // turns e.g. tranøy.no to tran\xc3\xb8y.no
+{
+ QString result;
+ result.reserve(array.length() + array.length() / 3);
+ for (int i = 0; i < array.length(); ++i) {
+ char c = array.at(i);
+ // if char is non-ascii, escape it
+ if (c < 0x20 || uchar(c) >= 0x7f) {
+ result += "\\x" + QString::number(uchar(c), 16);
+ } else {
+ // if previous char was escaped, we need to make sure the next char is not
+ // interpreted as part of the hex value, e.g. "äc.com" -> "\xabc.com"; this
+ // should be "\xab""c.com"
+ QRegExp hexEscape("\\\\x[a-fA-F0-9][a-fA-F0-9]$");
+ bool isHexChar = ((c >= '0' && c <= '9') ||
+ (c >= 'a' && c <= 'f') ||
+ (c >= 'A' && c <= 'F'));
+ if (result.contains(hexEscape) && isHexChar)
+ result += "\"\"";
+ result += c;
+ }
+ }
+ return result;
+}
+
+int main(int argc, char **argv) {
+
+ QCoreApplication app(argc, argv);
+ if (argc < 3) {
+ printf("\nusage: %s inputFile outputFile\n\n", argv[0]);
+ printf("'inputFile' should be a list of effective TLDs, one per line,\n");
+ printf("as obtained from http://publicsuffix.org . To create indices and data file\n");
+ printf("file, do the following:\n\n");
+ printf(" wget http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1 -O effective_tld_names.dat\n");
+ printf(" grep '^[^\\/\\/]' effective_tld_names.dat > effective_tld_names.dat.trimmed\n");
+ printf(" %s effective_tld_names.dat.trimmed effective_tld_names.dat.qt\n\n", argv[0]);
+ printf("Now copy the data from effective_tld_names.dat.qt to the file src/corelib/io/qurltlds_p.h in your Qt repo\n\n");
+ exit(1);
+ }
+ QFile file(argv[1]);
+ QFile outFile(argv[2]);
+ file.open(QIODevice::ReadOnly);
+ outFile.open(QIODevice::WriteOnly);
+
+ QByteArray outIndicesBufferBA;
+ QBuffer outIndicesBuffer(&outIndicesBufferBA);
+ outIndicesBuffer.open(QIODevice::WriteOnly);
+
+ QByteArray outDataBufferBA;
+ QBuffer outDataBuffer(&outDataBufferBA);
+ outDataBuffer.open(QIODevice::WriteOnly);
+
+ int lineCount = 0;
+ while (!file.atEnd()) {
+ file.readLine();
+ lineCount++;
+ }
+ file.reset();
+ QVector<QString> strings(lineCount);
+ while (!file.atEnd()) {
+ QString s = QString::fromUtf8(file.readLine());
+ QString st = s.trimmed();
+ int num = qHash(st) % lineCount;
+
+ QString utf8String = utf8encode(st.toUtf8());
+
+ // for domain 1.com, we could get something like
+ // a.com\01.com, which would be interpreted as octal 01,
+ // so we need to separate those strings with quotes
+ QRegExp regexpOctalEscape(QLatin1String("^[0-9]"));
+ if (!strings.at(num).isEmpty() && st.contains(regexpOctalEscape))
+ strings[num].append("\"\"");
+
+ strings[num].append(utf8String);
+ strings[num].append("\\0");
+ }
+
+ outIndicesBuffer.write("static const quint16 tldCount = ");
+ outIndicesBuffer.write(QByteArray::number(lineCount));
+ outIndicesBuffer.write(";\n");
+ outIndicesBuffer.write("static const quint16 tldIndices[");
+// outIndicesBuffer.write(QByteArray::number(lineCount+1)); // not needed
+ outIndicesBuffer.write("] = {\n");
+
+ int utf8Size = 0;
+// int charSize = 0;
+ for (int a = 0; a < lineCount; a++) {
+ bool lineIsEmpty = strings.at(a).isEmpty();
+ if (!lineIsEmpty) {
+ strings[a].prepend("\"");
+ strings[a].append("\"");
+ }
+ int zeroCount = strings.at(a).count(QLatin1String("\\0"));
+ int utf8CharsCount = strings.at(a).count(QLatin1String("\\x"));
+ int quoteCount = strings.at(a).count('"');
+ outDataBuffer.write(strings.at(a).toUtf8());
+ if (!lineIsEmpty)
+ outDataBuffer.write("\n");
+ outIndicesBuffer.write(QByteArray::number(utf8Size));
+ outIndicesBuffer.write(",\n");
+ utf8Size += strings.at(a).count() - (zeroCount + quoteCount + utf8CharsCount * 3);
+// charSize += strings.at(a).count();
+ }
+ outIndicesBuffer.write(QByteArray::number(utf8Size));
+ outIndicesBuffer.write("};\n");
+ outIndicesBuffer.close();
+ outFile.write(outIndicesBufferBA);
+
+ outDataBuffer.close();
+ outFile.write("\nstatic const char tldData[");
+// outFile.write(QByteArray::number(charSize)); // not needed
+ outFile.write("] = {\n");
+ outFile.write(outDataBufferBA);
+ outFile.write("};\n");
+ outFile.close();
+ printf("data generated to %s . Now copy the data from this file to src/corelib/io/qurltlds_p.h in your Qt repo\n", argv[2]);
+ exit(0);
+}
diff --git a/util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro b/util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro
new file mode 100644
index 0000000..9d5f1cf
--- /dev/null
+++ b/util/corelib/qurl-generateTLDs/qurl-generateTLDs.pro
@@ -0,0 +1,9 @@
+TEMPLATE = app
+TARGET =
+DEPENDPATH += .
+INCLUDEPATH += .
+
+QT = core
+
+# Input
+SOURCES += main.cpp