diff options
5 files changed, 147 insertions, 0 deletions
diff --git a/src/xml/sax/qxml.cpp b/src/xml/sax/qxml.cpp index a1777c5..3904632 100644 --- a/src/xml/sax/qxml.cpp +++ b/src/xml/sax/qxml.cpp @@ -424,6 +424,10 @@ private: int stringValueLen; QString emptyStr; + // The limit to the amount of times the DTD parsing functions can be called + // for the DTD currently being parsed. + int dtdRecursionLimit; + const QString &string(); void stringClear(); void stringAddC(QChar); @@ -492,6 +496,7 @@ private: void unexpectedEof(ParseFunction where, int state); void parseFailed(ParseFunction where, int state); void pushParseState(ParseFunction function, int state); + bool isPartiallyExpandedEntityValueTooLarge(QString *errorMessage); Q_DECLARE_PUBLIC(QXmlSimpleReader) QXmlSimpleReader *q_ptr; @@ -2759,6 +2764,7 @@ QXmlSimpleReaderPrivate::QXmlSimpleReaderPrivate(QXmlSimpleReader *reader) useNamespacePrefixes = false; reportWhitespaceCharData = true; reportEntities = false; + dtdRecursionLimit = 2; } QXmlSimpleReaderPrivate::~QXmlSimpleReaderPrivate() @@ -5018,6 +5024,11 @@ bool QXmlSimpleReaderPrivate::parseDoctype() } break; case Mup: + if (dtdRecursionLimit > 0 && parameterEntities.size() > dtdRecursionLimit) { + reportParseError(QString::fromLatin1( + "DTD parsing exceeded recursion limit of %1.").arg(dtdRecursionLimit)); + return false; + } if (!parseMarkupdecl()) { parseFailed(&QXmlSimpleReaderPrivate::parseDoctype, state); return false; @@ -6627,6 +6638,37 @@ bool QXmlSimpleReaderPrivate::parseChoiceSeq() return false; } +bool QXmlSimpleReaderPrivate::isPartiallyExpandedEntityValueTooLarge(QString *errorMessage) +{ + const QString value = string(); + QMap<QString, int> referencedEntityCounts; + foreach (QString entityName, entities.keys()) { + for (int i = 0; i < value.size() && i != -1; ) { + i = value.indexOf(entityName, i); + if (i != -1) { + // The entityName we're currently trying to find + // was matched in this string; increase our count. + ++referencedEntityCounts[entityName]; + i += entityName.size(); + } + } + } + + foreach (QString entityName, referencedEntityCounts.keys()) { + const int timesReferenced = referencedEntityCounts[entityName]; + const QString entityValue = entities[entityName]; + if (entityValue.size() * timesReferenced > 1024) { + if (errorMessage) { + *errorMessage = QString::fromLatin1("The XML entity \"%1\"" + "expands too a string that is too large to process when " + "referencing \"%2\" %3 times.").arg(entityName).arg(entityName).arg(timesReferenced); + } + return true; + } + } + return false; +} + /* Parse a EntityDecl [70]. @@ -6721,6 +6763,15 @@ bool QXmlSimpleReaderPrivate::parseEntityDecl() switch (state) { case EValue: if ( !entityExist(name())) { + QString errorMessage; + if (isPartiallyExpandedEntityValueTooLarge(&errorMessage)) { + // The entity at entityName is entityValue.size() characters + // long in its unexpanded form, and was mentioned timesReferenced times, + // resulting in a string that would be greater than 1024 characters. + reportParseError(errorMessage); + return false; + } + entities.insert(name(), string()); if (declHnd) { if (!declHnd->internalEntityDecl(name(), string())) { diff --git a/tests/auto/qxmlsimplereader/tst_qxmlsimplereader.cpp b/tests/auto/qxmlsimplereader/tst_qxmlsimplereader.cpp index 942d9ea..2c04d8e 100644 --- a/tests/auto/qxmlsimplereader/tst_qxmlsimplereader.cpp +++ b/tests/auto/qxmlsimplereader/tst_qxmlsimplereader.cpp @@ -163,6 +163,7 @@ class tst_QXmlSimpleReader : public QObject void reportNamespace() const; void reportNamespace_data() const; void roundtripWithNamespaces() const; + void dtdRecursionLimit(); private: static QDomDocument fromByteArray(const QString &title, const QByteArray &ba, bool *ok); @@ -771,5 +772,62 @@ void tst_QXmlSimpleReader::roundtripWithNamespaces() const } } +class TestHandler : public QXmlDefaultHandler +{ +public: + TestHandler() : + recursionCount(0) + { + } + + bool internalEntityDecl(const QString &name, const QString &value) + { + ++recursionCount; + return QXmlDefaultHandler::internalEntityDecl(name, value); + } + + int recursionCount; +}; + +void tst_QXmlSimpleReader::dtdRecursionLimit() +{ + QFile file("xmldocs/2-levels-nested-dtd.xml"); + QVERIFY(file.open(QIODevice::ReadOnly)); + QXmlSimpleReader xmlReader; + { + QXmlInputSource *source = new QXmlInputSource(&file); + TestHandler handler; + xmlReader.setDeclHandler(&handler); + xmlReader.setErrorHandler(&handler); + QVERIFY(!xmlReader.parse(source)); + } + + file.close(); + file.setFileName("xmldocs/1-levels-nested-dtd.xml"); + QVERIFY(file.open(QIODevice::ReadOnly)); + { + QXmlInputSource *source = new QXmlInputSource(&file); + TestHandler handler; + xmlReader.setDeclHandler(&handler); + xmlReader.setErrorHandler(&handler); + QVERIFY(!xmlReader.parse(source)); + // The error wasn't because of the recursion limit being reached, + // it was because the document is not valid. + QVERIFY(handler.recursionCount < 2); + } + + file.close(); + file.setFileName("xmldocs/internal-entity-polynomial-attribute.xml"); + QVERIFY(file.open(QIODevice::ReadOnly)); + { + QXmlInputSource *source = new QXmlInputSource(&file); + TestHandler handler; + xmlReader.setDeclHandler(&handler); + xmlReader.setErrorHandler(&handler); + QVERIFY(!xmlReader.parse(source)); + QVERIFY(handler.recursionCount == 1); + } +} + QTEST_MAIN(tst_QXmlSimpleReader) #include "tst_qxmlsimplereader.moc" diff --git a/tests/auto/qxmlsimplereader/xmldocs/1-levels-nested-dtd.xml b/tests/auto/qxmlsimplereader/xmldocs/1-levels-nested-dtd.xml new file mode 100644 index 0000000..0dfc15b --- /dev/null +++ b/tests/auto/qxmlsimplereader/xmldocs/1-levels-nested-dtd.xml @@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<!-- Test non-deterministic content model matching. + +Entity references are not part of the internal DTD subset (for good reason). + +--> +<!DOCTYPE root [ +<!ELEMENT e0 EMPTY> +<!ENTITY % e1 "(e0,e0)"> +<!ELEMENT root (%e1;)?> +]> +<root/>
\ No newline at end of file diff --git a/tests/auto/qxmlsimplereader/xmldocs/2-levels-nested-dtd.xml b/tests/auto/qxmlsimplereader/xmldocs/2-levels-nested-dtd.xml new file mode 100644 index 0000000..7ec06db --- /dev/null +++ b/tests/auto/qxmlsimplereader/xmldocs/2-levels-nested-dtd.xml @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<!-- Test non-deterministic content model matching. + +Entity references are not part of the internal DTD subset (for good reason). + +--> +<!DOCTYPE root [ +<!ELEMENT e0 EMPTY> +<!ENTITY % e1 "(e0,e0)"> +<!ENTITY % e2 "(%e1;,%e1;,%e1;,%e1;,%e1;,%e1;,%e1;,%e1;,%e1;,%e1;,%e1;,%e1;,%e1;)"> +<!ELEMENT root (%e2;)?> +]> +<root/> diff --git a/tests/auto/qxmlsimplereader/xmldocs/internal-entity-polynomial-attribute.xml b/tests/auto/qxmlsimplereader/xmldocs/internal-entity-polynomial-attribute.xml new file mode 100644 index 0000000..bbb88f3 --- /dev/null +++ b/tests/auto/qxmlsimplereader/xmldocs/internal-entity-polynomial-attribute.xml @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<!-- Test polynomial growth of expanded XML. + Expansion happens in an attribute. --> +<!DOCTYPE root [ +<!ELEMENT root EMPTY> +<!ATTLIST root id CDATA #IMPLIED> +<!ENTITY e1 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"> +<!ENTITY e2 "&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;&e1;"> +<!ENTITY e3 "&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;&e2;"> +<!ENTITY e4 "&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;&e3;"> +]> +<root id="&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;&e4;"/> + |