From 512a1ce0698d370c313bb561bbf078935fa0342e Mon Sep 17 00:00:00 2001 From: Mitch Curtis Date: Thu, 7 Nov 2013 09:36:29 +0100 Subject: Disallow deep or widely nested entity references. Nested references with a depth of 2 or greater will fail. References that partially expand to greater than 1024 characters will also fail. This is a backport of 46a8885ae486e238a39efa5119c2714f328b08e4. Change-Id: I0c2e1fa13d6ccb5f88641dae2ed3f28bfdeaf609 Reviewed-by: Richard J. Moore Reviewed-by: Lars Knoll --- src/xml/sax/qxml.cpp | 51 +++++++++++++++++++ .../auto/qxmlsimplereader/tst_qxmlsimplereader.cpp | 58 ++++++++++++++++++++++ .../xmldocs/1-levels-nested-dtd.xml | 12 +++++ .../xmldocs/2-levels-nested-dtd.xml | 13 +++++ .../internal-entity-polynomial-attribute.xml | 13 +++++ 5 files changed, 147 insertions(+) create mode 100644 tests/auto/qxmlsimplereader/xmldocs/1-levels-nested-dtd.xml create mode 100644 tests/auto/qxmlsimplereader/xmldocs/2-levels-nested-dtd.xml create mode 100644 tests/auto/qxmlsimplereader/xmldocs/internal-entity-polynomial-attribute.xml diff --git a/src/xml/sax/qxml.cpp b/src/xml/sax/qxml.cpp index a1777c5..3904632 100644 --- a/src/xml/sax/qxml.cpp +++ b/src/xml/sax/qxml.cpp @@ -424,6 +424,10 @@ private: int stringValueLen; QString emptyStr; + // The limit to the amount of times the DTD parsing functions can be called + // for the DTD currently being parsed. + int dtdRecursionLimit; + const QString &string(); void stringClear(); void stringAddC(QChar); @@ -492,6 +496,7 @@ private: void unexpectedEof(ParseFunction where, int state); void parseFailed(ParseFunction where, int state); void pushParseState(ParseFunction function, int state); + bool isPartiallyExpandedEntityValueTooLarge(QString *errorMessage); Q_DECLARE_PUBLIC(QXmlSimpleReader) QXmlSimpleReader *q_ptr; @@ -2759,6 +2764,7 @@ QXmlSimpleReaderPrivate::QXmlSimpleReaderPrivate(QXmlSimpleReader *reader) useNamespacePrefixes = false; reportWhitespaceCharData = true; reportEntities = false; + dtdRecursionLimit = 2; } QXmlSimpleReaderPrivate::~QXmlSimpleReaderPrivate() @@ -5018,6 +5024,11 @@ bool QXmlSimpleReaderPrivate::parseDoctype() } break; case Mup: + if (dtdRecursionLimit > 0 && parameterEntities.size() > dtdRecursionLimit) { + reportParseError(QString::fromLatin1( + "DTD parsing exceeded recursion limit of %1.").arg(dtdRecursionLimit)); + return false; + } if (!parseMarkupdecl()) { parseFailed(&QXmlSimpleReaderPrivate::parseDoctype, state); return false; @@ -6627,6 +6638,37 @@ bool QXmlSimpleReaderPrivate::parseChoiceSeq() return false; } +bool QXmlSimpleReaderPrivate::isPartiallyExpandedEntityValueTooLarge(QString *errorMessage) +{ + const QString value = string(); + QMap referencedEntityCounts; + foreach (QString entityName, entities.keys()) { + for (int i = 0; i < value.size() && i != -1; ) { + i = value.indexOf(entityName, i); + if (i != -1) { + // The entityName we're currently trying to find + // was matched in this string; increase our count. + ++referencedEntityCounts[entityName]; + i += entityName.size(); + } + } + } + + foreach (QString entityName, referencedEntityCounts.keys()) { + const int timesReferenced = referencedEntityCounts[entityName]; + const QString entityValue = entities[entityName]; + if (entityValue.size() * timesReferenced > 1024) { + if (errorMessage) { + *errorMessage = QString::fromLatin1("The XML entity \"%1\"" + "expands too a string that is too large to process when " + "referencing \"%2\" %3 times.").arg(entityName).arg(entityName).arg(timesReferenced); + } + return true; + } + } + return false; +} + /* Parse a EntityDecl [70]. @@ -6721,6 +6763,15 @@ bool QXmlSimpleReaderPrivate::parseEntityDecl() switch (state) { case EValue: if ( !entityExist(name())) { + QString errorMessage; + if (isPartiallyExpandedEntityValueTooLarge(&errorMessage)) { + // The entity at entityName is entityValue.size() characters + // long in its unexpanded form, and was mentioned timesReferenced times, + // resulting in a string that would be greater than 1024 characters. + reportParseError(errorMessage); + return false; + } + entities.insert(name(), string()); if (declHnd) { if (!declHnd->internalEntityDecl(name(), string())) { diff --git a/tests/auto/qxmlsimplereader/tst_qxmlsimplereader.cpp b/tests/auto/qxmlsimplereader/tst_qxmlsimplereader.cpp index 942d9ea..2c04d8e 100644 --- a/tests/auto/qxmlsimplereader/tst_qxmlsimplereader.cpp +++ b/tests/auto/qxmlsimplereader/tst_qxmlsimplereader.cpp @@ -163,6 +163,7 @@ class tst_QXmlSimpleReader : public QObject void reportNamespace() const; void reportNamespace_data() const; void roundtripWithNamespaces() const; + void dtdRecursionLimit(); private: static QDomDocument fromByteArray(const QString &title, const QByteArray &ba, bool *ok); @@ -771,5 +772,62 @@ void tst_QXmlSimpleReader::roundtripWithNamespaces() const } } +class TestHandler : public QXmlDefaultHandler +{ +public: + TestHandler() : + recursionCount(0) + { + } + + bool internalEntityDecl(const QString &name, const QString &value) + { + ++recursionCount; + return QXmlDefaultHandler::internalEntityDecl(name, value); + } + + int recursionCount; +}; + +void tst_QXmlSimpleReader::dtdRecursionLimit() +{ + QFile file("xmldocs/2-levels-nested-dtd.xml"); + QVERIFY(file.open(QIODevice::ReadOnly)); + QXmlSimpleReader xmlReader; + { + QXmlInputSource *source = new QXmlInputSource(&file); + TestHandler handler; + xmlReader.setDeclHandler(&handler); + xmlReader.setErrorHandler(&handler); + QVERIFY(!xmlReader.parse(source)); + } + + file.close(); + file.setFileName("xmldocs/1-levels-nested-dtd.xml"); + QVERIFY(file.open(QIODevice::ReadOnly)); + { + QXmlInputSource *source = new QXmlInputSource(&file); + TestHandler handler; + xmlReader.setDeclHandler(&handler); + xmlReader.setErrorHandler(&handler); + QVERIFY(!xmlReader.parse(source)); + // The error wasn't because of the recursion limit being reached, + // it was because the document is not valid. + QVERIFY(handler.recursionCount < 2); + } + + file.close(); + file.setFileName("xmldocs/internal-entity-polynomial-attribute.xml"); + QVERIFY(file.open(QIODevice::ReadOnly)); + { + QXmlInputSource *source = new QXmlInputSource(&file); + TestHandler handler; + xmlReader.setDeclHandler(&handler); + xmlReader.setErrorHandler(&handler); + QVERIFY(!xmlReader.parse(source)); + QVERIFY(handler.recursionCount == 1); + } +} + QTEST_MAIN(tst_QXmlSimpleReader) #include "tst_qxmlsimplereader.moc" diff --git a/tests/auto/qxmlsimplereader/xmldocs/1-levels-nested-dtd.xml b/tests/auto/qxmlsimplereader/xmldocs/1-levels-nested-dtd.xml new file mode 100644 index 0000000..0dfc15b --- /dev/null +++ b/tests/auto/qxmlsimplereader/xmldocs/1-levels-nested-dtd.xml @@ -0,0 +1,12 @@ + + + + + +]> + \ No newline at end of file diff --git a/tests/auto/qxmlsimplereader/xmldocs/2-levels-nested-dtd.xml b/tests/auto/qxmlsimplereader/xmldocs/2-levels-nested-dtd.xml new file mode 100644 index 0000000..7ec06db --- /dev/null +++ b/tests/auto/qxmlsimplereader/xmldocs/2-levels-nested-dtd.xml @@ -0,0 +1,13 @@ + + + + + + +]> + diff --git a/tests/auto/qxmlsimplereader/xmldocs/internal-entity-polynomial-attribute.xml b/tests/auto/qxmlsimplereader/xmldocs/internal-entity-polynomial-attribute.xml new file mode 100644 index 0000000..bbb88f3 --- /dev/null +++ b/tests/auto/qxmlsimplereader/xmldocs/internal-entity-polynomial-attribute.xml @@ -0,0 +1,13 @@ + + + + + + + + +]> + + -- cgit v0.12