diff options
author | Mitch Curtis <mitch.curtis@digia.com> | 2014-01-10 10:19:08 (GMT) |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2014-01-10 23:03:01 (GMT) |
commit | 718fae50a4509c2cfc07552e6b1187ed2502f9b9 (patch) | |
tree | 4fe5fa8cda37355bd0027998200f791af3637ef7 /src/xml/sax | |
parent | f6002697ece87a99c52b32872194fc8217579e17 (diff) | |
download | Qt-718fae50a4509c2cfc07552e6b1187ed2502f9b9.zip Qt-718fae50a4509c2cfc07552e6b1187ed2502f9b9.tar.gz Qt-718fae50a4509c2cfc07552e6b1187ed2502f9b9.tar.bz2 |
Mitigate performance regression in isExpandedEntityValueTooLarge().
512a1ce0698d370c313bb561bbf078935fa0342e fixed a security issue [1],
but also caused a large performance regression. This patch improves
the performance from ~196 seconds to ~.23 seconds for 1000 entities,
using the benchmark in the bug report:
"0": 0 msecs per iteration (total: 0, iterations: 1)
"250": 2,952 msecs per iteration (total: 2,952, iterations: 1)
"500": 23,418 msecs per iteration (total: 23,418, iterations: 1)
"750": 79,913 msecs per iteration (total: 79,913, iterations: 1)
"1000": 196,335 msecs per iteration (total: 196,335, iterations: 1)
"0": 0 msecs per iteration (total: 0, iterations: 1)
"250": 16 msecs per iteration (total: 16, iterations: 1)
"500": 59 msecs per iteration (total: 59, iterations: 1)
"750": 132 msecs per iteration (total: 132, iterations: 1)
"1000": 232 msecs per iteration (total: 232, iterations: 1)
This is a cherry-pick of 62c4e288a11769bde45c9c74d731ed8628303f19.
[1] http://lists.qt-project.org/pipermail/announce/2013-December/000036.html
Task-number: QTBUG-35919
Change-Id: Ibc2fc94dc0d88b227b9510e15bac9c07f4600591
Reviewed-by: Richard J. Moore <rich@kde.org>
Diffstat (limited to 'src/xml/sax')
-rw-r--r-- | src/xml/sax/qxml.cpp | 85 |
1 files changed, 57 insertions, 28 deletions
diff --git a/src/xml/sax/qxml.cpp b/src/xml/sax/qxml.cpp index befa801..ffc07b5 100644 --- a/src/xml/sax/qxml.cpp +++ b/src/xml/sax/qxml.cpp @@ -44,6 +44,7 @@ #include "qbuffer.h" #include "qregexp.h" #include "qmap.h" +#include "qhash.h" #include "qstack.h" #include <qdebug.h> @@ -424,6 +425,10 @@ private: int stringValueLen; QString emptyStr; + QHash<QString, int> literalEntitySizes; + // The entity at (QMap<QString,) referenced the entities at (QMap<QString,) (int>) times. + QHash<QString, QHash<QString, int> > referencesToOtherEntities; + QHash<QString, int> expandedSizes; // The limit to the amount of times the DTD parsing functions can be called // for the DTD currently being parsed. static const int dtdRecursionLimit = 2; @@ -3426,6 +3431,10 @@ bool QXmlSimpleReader::parse(const QXmlInputSource *input, bool incremental) { Q_D(QXmlSimpleReader); + d->literalEntitySizes.clear(); + d->referencesToOtherEntities.clear(); + d->expandedSizes.clear(); + if (incremental) { d->initIncrementalParsing(); } else { @@ -6641,43 +6650,63 @@ bool QXmlSimpleReaderPrivate::parseChoiceSeq() bool QXmlSimpleReaderPrivate::isExpandedEntityValueTooLarge(QString *errorMessage) { - QMap<QString, int> literalEntitySizes; - // The entity at (QMap<QString,) referenced the entities at (QMap<QString,) (int>) times. - QMap<QString, QMap<QString, int> > referencesToOtherEntities; - QMap<QString, int> expandedSizes; + QString entityNameBuffer; // For every entity, check how many times all entity names were referenced in its value. - foreach (QString toSearch, entities.keys()) { - // The amount of characters that weren't entity names, but literals, like 'X'. - QString leftOvers = entities.value(toSearch); - // How many times was entityName referenced by toSearch? - foreach (QString entityName, entities.keys()) { - for (int i = 0; i < leftOvers.size() && i != -1; ) { - i = leftOvers.indexOf(QString::fromLatin1("&%1;").arg(entityName), i); - if (i != -1) { - leftOvers.remove(i, entityName.size() + 2); - // The entityName we're currently trying to find was matched in this string; increase our count. - ++referencesToOtherEntities[toSearch][entityName]; + for (QMap<QString,QString>::const_iterator toSearchIt = entities.constBegin(); + toSearchIt != entities.constEnd(); + ++toSearchIt) { + const QString &toSearch = toSearchIt.key(); + + // Don't check the same entities twice. + if (!literalEntitySizes.contains(toSearch)) { + // The amount of characters that weren't entity names, but literals, like 'X'. + QString leftOvers = entities.value(toSearch); + // How many times was entityName referenced by toSearch? + for (QMap<QString,QString>::const_iterator referencedIt = entities.constBegin(); + referencedIt != entities.constEnd(); + ++referencedIt) { + const QString &entityName = referencedIt.key(); + + for (int i = 0; i < leftOvers.size() && i != -1; ) { + entityNameBuffer = QLatin1Char('&') + entityName + QLatin1Char(';'); + + i = leftOvers.indexOf(entityNameBuffer, i); + if (i != -1) { + leftOvers.remove(i, entityName.size() + 2); + // The entityName we're currently trying to find was matched in this string; increase our count. + ++referencesToOtherEntities[toSearch][entityName]; + } } } + literalEntitySizes[toSearch] = leftOvers.size(); } - literalEntitySizes[toSearch] = leftOvers.size(); } - foreach (QString entity, referencesToOtherEntities.keys()) { - expandedSizes[entity] = literalEntitySizes[entity]; - foreach (QString referenceTo, referencesToOtherEntities.value(entity).keys()) { - const int references = referencesToOtherEntities.value(entity).value(referenceTo); - // The total size of an entity's value is the expanded size of all of its referenced entities, plus its literal size. - expandedSizes[entity] += expandedSizes[referenceTo] * references + literalEntitySizes[referenceTo] * references; - } + for (QHash<QString, QHash<QString, int> >::const_iterator entityIt = referencesToOtherEntities.constBegin(); + entityIt != referencesToOtherEntities.constEnd(); + ++entityIt) { + const QString &entity = entityIt.key(); + + QHash<QString, int>::iterator expandedIt = expandedSizes.find(entity); + if (expandedIt == expandedSizes.end()) { + expandedIt = expandedSizes.insert(entity, literalEntitySizes.value(entity)); + for (QHash<QString, int>::const_iterator referenceIt = entityIt->constBegin(); + referenceIt != entityIt->constEnd(); + ++referenceIt) { + const QString &referenceTo = referenceIt.key(); + const int references = referencesToOtherEntities.value(entity).value(referenceTo); + // The total size of an entity's value is the expanded size of all of its referenced entities, plus its literal size. + *expandedIt += expandedSizes.value(referenceTo) * references + literalEntitySizes.value(referenceTo) * references; + } - if (expandedSizes[entity] > entityCharacterLimit) { - if (errorMessage) { - *errorMessage = QString::fromLatin1("The XML entity \"%1\" expands too a string that is too large to process (%2 characters > %3)."); - *errorMessage = (*errorMessage).arg(entity).arg(expandedSizes[entity]).arg(entityCharacterLimit); + if (*expandedIt > entityCharacterLimit) { + if (errorMessage) { + *errorMessage = QString::fromLatin1("The XML entity \"%1\" expands to a string that is too large to process (%2 characters > %3).") + .arg(entity, *expandedIt, entityCharacterLimit); + } + return true; } - return true; } } return false; |