summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorSeth Michael Larson <seth@python.org>2024-02-14 16:29:06 (GMT)
committerGitHub <noreply@github.com>2024-02-14 16:29:06 (GMT)
commit4b2d1786ccf913bc80ff571c32b196be1543ca54 (patch)
tree9d1dba38a63d0866eadf6b2bea645286282a0169 /Modules
parent671360161f0b7a5ff4c1d062e570962e851b4bde (diff)
downloadcpython-4b2d1786ccf913bc80ff571c32b196be1543ca54.zip
cpython-4b2d1786ccf913bc80ff571c32b196be1543ca54.tar.gz
cpython-4b2d1786ccf913bc80ff571c32b196be1543ca54.tar.bz2
gh-115399: Upgrade bundled libexpat to 2.6.0 (#115431)
Diffstat (limited to 'Modules')
-rw-r--r--Modules/expat/expat.h28
-rw-r--r--Modules/expat/expat_config.h1
-rw-r--r--Modules/expat/internal.h8
-rw-r--r--Modules/expat/siphash.h10
-rw-r--r--Modules/expat/winconfig.h7
-rw-r--r--Modules/expat/xmlparse.c558
-rw-r--r--Modules/expat/xmlrole.c6
-rw-r--r--Modules/expat/xmlrole.h6
-rw-r--r--Modules/expat/xmltok.c29
-rw-r--r--Modules/expat/xmltok.h8
-rw-r--r--Modules/expat/xmltok_impl.c2
11 files changed, 401 insertions, 262 deletions
diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h
index 1c83563..95464b0 100644
--- a/Modules/expat/expat.h
+++ b/Modules/expat/expat.h
@@ -11,11 +11,13 @@
Copyright (c) 2000-2005 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net>
Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
- Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org>
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl>
+ Copyright (c) 2023 Hanno Böck <hanno@gentoo.org>
+ Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -269,7 +271,7 @@ XML_ParserCreate_MM(const XML_Char *encoding,
const XML_Memory_Handling_Suite *memsuite,
const XML_Char *namespaceSeparator);
-/* Prepare a parser object to be re-used. This is particularly
+/* Prepare a parser object to be reused. This is particularly
valuable when memory allocation overhead is disproportionately high,
such as when a large number of small documnents need to be parsed.
All handlers are cleared from the parser, except for the
@@ -951,7 +953,7 @@ XMLPARSEAPI(XML_Index) XML_GetCurrentByteIndex(XML_Parser parser);
XMLPARSEAPI(int)
XML_GetCurrentByteCount(XML_Parser parser);
-/* If XML_CONTEXT_BYTES is defined, returns the input buffer, sets
+/* If XML_CONTEXT_BYTES is >=1, returns the input buffer, sets
the integer pointed to by offset to the offset within this buffer
of the current parse position, and sets the integer pointed to by size
to the size of this buffer (the number of input bytes). Otherwise
@@ -1025,7 +1027,9 @@ enum XML_FeatureEnum {
XML_FEATURE_ATTR_INFO,
/* Added in Expat 2.4.0. */
XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
- XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT
+ XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
+ /* Added in Expat 2.6.0. */
+ XML_FEATURE_GE
/* Additional features must be added to the end of this enum. */
};
@@ -1038,23 +1042,29 @@ typedef struct {
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
-#ifdef XML_DTD
-/* Added in Expat 2.4.0. */
+#if XML_GE == 1
+/* Added in Expat 2.4.0 for XML_DTD defined and
+ * added in Expat 2.6.0 for XML_GE == 1. */
XMLPARSEAPI(XML_Bool)
XML_SetBillionLaughsAttackProtectionMaximumAmplification(
XML_Parser parser, float maximumAmplificationFactor);
-/* Added in Expat 2.4.0. */
+/* Added in Expat 2.4.0 for XML_DTD defined and
+ * added in Expat 2.6.0 for XML_GE == 1. */
XMLPARSEAPI(XML_Bool)
XML_SetBillionLaughsAttackProtectionActivationThreshold(
XML_Parser parser, unsigned long long activationThresholdBytes);
#endif
+/* Added in Expat 2.6.0. */
+XMLPARSEAPI(XML_Bool)
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
+
/* Expat follows the semantic versioning convention.
- See http://semver.org.
+ See https://semver.org
*/
#define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 5
+#define XML_MINOR_VERSION 6
#define XML_MICRO_VERSION 0
#ifdef __cplusplus
diff --git a/Modules/expat/expat_config.h b/Modules/expat/expat_config.h
index 6671f7b..e7d9499 100644
--- a/Modules/expat/expat_config.h
+++ b/Modules/expat/expat_config.h
@@ -16,6 +16,7 @@
#define XML_NS 1
#define XML_DTD 1
+#define XML_GE 1
#define XML_CONTEXT_BYTES 1024
// bpo-30947: Python uses best available entropy sources to
diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h
index e09f533..cce71e4 100644
--- a/Modules/expat/internal.h
+++ b/Modules/expat/internal.h
@@ -28,9 +28,10 @@
Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net>
- Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2018 Yury Gribov <tetra2005@gmail.com>
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
+ Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -154,12 +155,15 @@ extern "C" {
void _INTERNAL_trim_to_complete_utf8_characters(const char *from,
const char **fromLimRef);
-#if defined(XML_DTD)
+#if XML_GE == 1
unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser);
unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser);
const char *unsignedCharToPrintable(unsigned char c);
#endif
+extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
+extern unsigned int g_parseAttempts; // used for testing only
+
#ifdef __cplusplus
}
#endif
diff --git a/Modules/expat/siphash.h b/Modules/expat/siphash.h
index 303283a..a1ed99e 100644
--- a/Modules/expat/siphash.h
+++ b/Modules/expat/siphash.h
@@ -106,7 +106,7 @@
* if this code is included and compiled as C++; related GCC warning is:
* warning: use of C++11 long long integer constant [-Wlong-long]
*/
-#define _SIP_ULL(high, low) ((((uint64_t)high) << 32) | (low))
+#define SIP_ULL(high, low) ((((uint64_t)high) << 32) | (low))
#define SIP_ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
@@ -190,10 +190,10 @@ sip_round(struct siphash *H, const int rounds) {
static struct siphash *
sip24_init(struct siphash *H, const struct sipkey *key) {
- H->v0 = _SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0];
- H->v1 = _SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1];
- H->v2 = _SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0];
- H->v3 = _SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1];
+ H->v0 = SIP_ULL(0x736f6d65U, 0x70736575U) ^ key->k[0];
+ H->v1 = SIP_ULL(0x646f7261U, 0x6e646f6dU) ^ key->k[1];
+ H->v2 = SIP_ULL(0x6c796765U, 0x6e657261U) ^ key->k[0];
+ H->v3 = SIP_ULL(0x74656462U, 0x79746573U) ^ key->k[1];
H->p = H->buf;
H->c = 0;
diff --git a/Modules/expat/winconfig.h b/Modules/expat/winconfig.h
index 2ecd61b..0580551 100644
--- a/Modules/expat/winconfig.h
+++ b/Modules/expat/winconfig.h
@@ -9,7 +9,8 @@
Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net>
Copyright (c) 2005 Karl Waclawek <karl@waclawek.net>
- Copyright (c) 2017-2021 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2017-2023 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2023 Orgad Shaneh <orgad.shaneh@audiocodes.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -35,7 +36,9 @@
#ifndef WINCONFIG_H
#define WINCONFIG_H
-#define WIN32_LEAN_AND_MEAN
+#ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+#endif
#include <windows.h>
#undef WIN32_LEAN_AND_MEAN
diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c
index b6c2eca..aaf0fa9 100644
--- a/Modules/expat/xmlparse.c
+++ b/Modules/expat/xmlparse.c
@@ -1,4 +1,4 @@
-/* 5ab094ffadd6edfc94c3eee53af44a86951f9f1f0933ada3114bbce2bfb02c99 (2.5.0+)
+/* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@@ -13,7 +13,7 @@
Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
Copyright (c) 2016 Eric Rahm <erahm@mozilla.com>
- Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2016 Gaurav <g.gupta@samsung.com>
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr>
@@ -32,10 +32,13 @@
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
Copyright (c) 2019-2020 Ben Wagner <bungeman@chromium.org>
Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org>
- Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
+ Copyright (c) 2021 Donghee Na <donghee.na@python.org>
Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
Copyright (c) 2022 Jann Horn <jannh@google.com>
+ Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
+ Copyright (c) 2023 Owain Davies <owaind@bath.edu>
+ Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -60,10 +63,25 @@
#define XML_BUILDING_EXPAT 1
-#include <expat_config.h>
+#include "expat_config.h"
-#if ! defined(_GNU_SOURCE)
-# define _GNU_SOURCE 1 /* syscall prototype */
+#if ! defined(XML_GE) || (1 - XML_GE - 1 == 2) || (XML_GE < 0) || (XML_GE > 1)
+# error XML_GE (for general entities) must be defined, non-empty, either 1 or 0 (0 to disable, 1 to enable; 1 is a common default)
+#endif
+
+#if defined(XML_DTD) && XML_GE == 0
+# error Either undefine XML_DTD or define XML_GE to 1.
+#endif
+
+#if ! defined(XML_CONTEXT_BYTES) || (1 - XML_CONTEXT_BYTES - 1 == 2) \
+ || (XML_CONTEXT_BYTES + 0 < 0)
+# error XML_CONTEXT_BYTES must be defined, non-empty and >=0 (0 to disable, >=1 to enable; 1024 is a common default)
+#endif
+
+#if defined(HAVE_SYSCALL_GETRANDOM)
+# if ! defined(_GNU_SOURCE)
+# define _GNU_SOURCE 1 /* syscall prototype */
+# endif
#endif
#ifdef _WIN32
@@ -73,6 +91,7 @@
# endif
#endif
+#include <stdbool.h>
#include <stddef.h>
#include <string.h> /* memset(), memcpy() */
#include <assert.h>
@@ -131,8 +150,8 @@
Your options include: \
* Linux >=3.17 + glibc >=2.25 (getrandom): HAVE_GETRANDOM, \
* Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \
- * BSD / macOS >=10.7 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
- * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
+ * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \
+ * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
* Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
@@ -196,6 +215,8 @@ typedef char ICHAR;
/* Do safe (NULL-aware) pointer arithmetic */
#define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0)
+#define EXPAT_MIN(a, b) (((a) < (b)) ? (a) : (b))
+
#include "internal.h"
#include "xmltok.h"
#include "xmlrole.h"
@@ -279,7 +300,7 @@ typedef struct {
XML_Parse()/XML_ParseBuffer(), the buffer is re-allocated to
contain the 'raw' name as well.
- A parser re-uses these structures, maintaining a list of allocated
+ A parser reuses these structures, maintaining a list of allocated
TAG objects in a free list.
*/
typedef struct tag {
@@ -408,12 +429,12 @@ enum XML_Account {
XML_ACCOUNT_NONE /* i.e. do not account, was accounted already */
};
-#ifdef XML_DTD
+#if XML_GE == 1
typedef unsigned long long XmlBigCount;
typedef struct accounting {
XmlBigCount countBytesDirect;
XmlBigCount countBytesIndirect;
- int debugLevel;
+ unsigned long debugLevel;
float maximumAmplificationFactor; // >=1.0
unsigned long long activationThresholdBytes;
} ACCOUNTING;
@@ -422,9 +443,9 @@ typedef struct entity_stats {
unsigned int countEverOpened;
unsigned int currentDepth;
unsigned int maximumDepthSeen;
- int debugLevel;
+ unsigned long debugLevel;
} ENTITY_STATS;
-#endif /* XML_DTD */
+#endif /* XML_GE == 1 */
typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start,
const char *end, const char **endPtr);
@@ -464,41 +485,47 @@ static enum XML_Error doContent(XML_Parser parser, int startTagLevel,
const ENCODING *enc, const char *start,
const char *end, const char **endPtr,
XML_Bool haveMore, enum XML_Account account);
-static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *,
+static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *enc,
const char **startPtr, const char *end,
const char **nextPtr, XML_Bool haveMore,
enum XML_Account account);
#ifdef XML_DTD
-static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *,
+static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *enc,
const char **startPtr, const char *end,
const char **nextPtr, XML_Bool haveMore);
#endif /* XML_DTD */
static void freeBindings(XML_Parser parser, BINDING *bindings);
-static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *,
- const char *s, TAG_NAME *tagNamePtr,
+static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
+ const char *attStr, TAG_NAME *tagNamePtr,
BINDING **bindingsPtr,
enum XML_Account account);
static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix,
const ATTRIBUTE_ID *attId, const XML_Char *uri,
BINDING **bindingsPtr);
-static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
- XML_Bool isId, const XML_Char *dfltValue,
- XML_Parser parser);
-static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *,
- XML_Bool isCdata, const char *,
- const char *, STRING_POOL *,
+static int defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId,
+ XML_Bool isCdata, XML_Bool isId,
+ const XML_Char *value, XML_Parser parser);
+static enum XML_Error storeAttributeValue(XML_Parser parser,
+ const ENCODING *enc, XML_Bool isCdata,
+ const char *ptr, const char *end,
+ STRING_POOL *pool,
enum XML_Account account);
-static enum XML_Error appendAttributeValue(XML_Parser parser, const ENCODING *,
- XML_Bool isCdata, const char *,
- const char *, STRING_POOL *,
+static enum XML_Error appendAttributeValue(XML_Parser parser,
+ const ENCODING *enc,
+ XML_Bool isCdata, const char *ptr,
+ const char *end, STRING_POOL *pool,
enum XML_Account account);
static ATTRIBUTE_ID *getAttributeId(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
-static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
+static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType);
+#if XML_GE == 1
static enum XML_Error storeEntityValue(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end,
enum XML_Account account);
+#else
+static enum XML_Error storeSelfEntityValue(XML_Parser parser, ENTITY *entity);
+#endif
static int reportProcessingInstruction(XML_Parser parser, const ENCODING *enc,
const char *start, const char *end);
static int reportComment(XML_Parser parser, const ENCODING *enc,
@@ -518,21 +545,22 @@ static void dtdDestroy(DTD *p, XML_Bool isDocEntity,
const XML_Memory_Handling_Suite *ms);
static int dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd,
const XML_Memory_Handling_Suite *ms);
-static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *, STRING_POOL *,
- const HASH_TABLE *);
+static int copyEntityTable(XML_Parser oldParser, HASH_TABLE *newTable,
+ STRING_POOL *newPool, const HASH_TABLE *oldTable);
static NAMED *lookup(XML_Parser parser, HASH_TABLE *table, KEY name,
size_t createSize);
-static void FASTCALL hashTableInit(HASH_TABLE *,
+static void FASTCALL hashTableInit(HASH_TABLE *table,
const XML_Memory_Handling_Suite *ms);
-static void FASTCALL hashTableClear(HASH_TABLE *);
-static void FASTCALL hashTableDestroy(HASH_TABLE *);
-static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *, const HASH_TABLE *);
-static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *);
+static void FASTCALL hashTableClear(HASH_TABLE *table);
+static void FASTCALL hashTableDestroy(HASH_TABLE *table);
+static void FASTCALL hashTableIterInit(HASH_TABLE_ITER *iter,
+ const HASH_TABLE *table);
+static NAMED *FASTCALL hashTableIterNext(HASH_TABLE_ITER *iter);
-static void FASTCALL poolInit(STRING_POOL *,
+static void FASTCALL poolInit(STRING_POOL *pool,
const XML_Memory_Handling_Suite *ms);
-static void FASTCALL poolClear(STRING_POOL *);
-static void FASTCALL poolDestroy(STRING_POOL *);
+static void FASTCALL poolClear(STRING_POOL *pool);
+static void FASTCALL poolDestroy(STRING_POOL *pool);
static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
const char *ptr, const char *end);
static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
@@ -562,7 +590,7 @@ static XML_Parser parserCreate(const XML_Char *encodingName,
static void parserInit(XML_Parser parser, const XML_Char *encodingName);
-#ifdef XML_DTD
+#if XML_GE == 1
static float accountingGetCurrentAmplification(XML_Parser rootParser);
static void accountingReportStats(XML_Parser originParser, const char *epilog);
static void accountingOnAbort(XML_Parser originParser);
@@ -585,13 +613,12 @@ static void entityTrackingOnClose(XML_Parser parser, ENTITY *entity,
static XML_Parser getRootParserOf(XML_Parser parser,
unsigned int *outLevelDiff);
-#endif /* XML_DTD */
+#endif /* XML_GE == 1 */
static unsigned long getDebugLevel(const char *variableName,
unsigned long defaultDebugLevel);
#define poolStart(pool) ((pool)->start)
-#define poolEnd(pool) ((pool)->ptr)
#define poolLength(pool) ((pool)->ptr - (pool)->start)
#define poolChop(pool) ((void)--(pool->ptr))
#define poolLastChar(pool) (((pool)->ptr)[-1])
@@ -602,21 +629,35 @@ static unsigned long getDebugLevel(const char *variableName,
? 0 \
: ((*((pool)->ptr)++ = c), 1))
+XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
+unsigned int g_parseAttempts = 0; // used for testing only
+
struct XML_ParserStruct {
/* The first member must be m_userData so that the XML_GetUserData
macro works. */
void *m_userData;
void *m_handlerArg;
- char *m_buffer;
+
+ // How the four parse buffer pointers below relate in time and space:
+ //
+ // m_buffer <= m_bufferPtr <= m_bufferEnd <= m_bufferLim
+ // | | | |
+ // <--parsed-->| | |
+ // <---parsing--->| |
+ // <--unoccupied-->|
+ // <---------total-malloced/realloced-------->|
+
+ char *m_buffer; // malloc/realloc base pointer of parse buffer
const XML_Memory_Handling_Suite m_mem;
- /* first character to be parsed */
- const char *m_bufferPtr;
- /* past last character to be parsed */
- char *m_bufferEnd;
- /* allocated end of m_buffer */
- const char *m_bufferLim;
+ const char *m_bufferPtr; // first character to be parsed
+ char *m_bufferEnd; // past last character to be parsed
+ const char *m_bufferLim; // allocated end of m_buffer
+
XML_Index m_parseEndByteIndex;
const char *m_parseEndPtr;
+ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
+ XML_Bool m_reparseDeferralEnabled;
+ int m_lastBufferRequestSize;
XML_Char *m_dataBuf;
XML_Char *m_dataBufEnd;
XML_StartElementHandler m_startElementHandler;
@@ -703,7 +744,7 @@ struct XML_ParserStruct {
enum XML_ParamEntityParsing m_paramEntityParsing;
#endif
unsigned long m_hash_secret_salt;
-#ifdef XML_DTD
+#if XML_GE == 1
ACCOUNTING m_accounting;
ENTITY_STATS m_entity_stats;
#endif
@@ -948,6 +989,47 @@ get_hash_secret_salt(XML_Parser parser) {
return parser->m_hash_secret_salt;
}
+static enum XML_Error
+callProcessor(XML_Parser parser, const char *start, const char *end,
+ const char **endPtr) {
+ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
+
+ if (parser->m_reparseDeferralEnabled
+ && ! parser->m_parsingStatus.finalBuffer) {
+ // Heuristic: don't try to parse a partial token again until the amount of
+ // available data has increased significantly.
+ const size_t had_before = parser->m_partialTokenBytesBefore;
+ // ...but *do* try anyway if we're close to causing a reallocation.
+ size_t available_buffer
+ = EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
+#if XML_CONTEXT_BYTES > 0
+ available_buffer -= EXPAT_MIN(available_buffer, XML_CONTEXT_BYTES);
+#endif
+ available_buffer
+ += EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd);
+ // m_lastBufferRequestSize is never assigned a value < 0, so the cast is ok
+ const bool enough
+ = (have_now >= 2 * had_before)
+ || ((size_t)parser->m_lastBufferRequestSize > available_buffer);
+
+ if (! enough) {
+ *endPtr = start; // callers may expect this to be set
+ return XML_ERROR_NONE;
+ }
+ }
+ g_parseAttempts += 1;
+ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+ if (ret == XML_ERROR_NONE) {
+ // if we consumed nothing, remember what we had on this parse attempt.
+ if (*endPtr == start) {
+ parser->m_partialTokenBytesBefore = have_now;
+ } else {
+ parser->m_partialTokenBytesBefore = 0;
+ }
+ }
+ return ret;
+}
+
static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser) {
/* hash functions must be initialized before setContext() is called */
@@ -1129,6 +1211,9 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
parser->m_bufferEnd = parser->m_buffer;
parser->m_parseEndByteIndex = 0;
parser->m_parseEndPtr = NULL;
+ parser->m_partialTokenBytesBefore = 0;
+ parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
+ parser->m_lastBufferRequestSize = 0;
parser->m_declElementType = NULL;
parser->m_declAttributeId = NULL;
parser->m_declEntity = NULL;
@@ -1163,7 +1248,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
#endif
parser->m_hash_secret_salt = 0;
-#ifdef XML_DTD
+#if XML_GE == 1
memset(&parser->m_accounting, 0, sizeof(ACCOUNTING));
parser->m_accounting.debugLevel = getDebugLevel("EXPAT_ACCOUNTING_DEBUG", 0u);
parser->m_accounting.maximumAmplificationFactor
@@ -1298,6 +1383,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
to worry which hash secrets each table has.
*/
unsigned long oldhash_secret_salt;
+ XML_Bool oldReparseDeferralEnabled;
/* Validate the oldParser parameter before we pull everything out of it */
if (oldParser == NULL)
@@ -1342,6 +1428,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
to worry which hash secrets each table has.
*/
oldhash_secret_salt = parser->m_hash_secret_salt;
+ oldReparseDeferralEnabled = parser->m_reparseDeferralEnabled;
#ifdef XML_DTD
if (! context)
@@ -1394,6 +1481,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context,
parser->m_defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
parser->m_ns_triplets = oldns_triplets;
parser->m_hash_secret_salt = oldhash_secret_salt;
+ parser->m_reparseDeferralEnabled = oldReparseDeferralEnabled;
parser->m_parentParser = oldParser;
#ifdef XML_DTD
parser->m_paramEntityParsing = oldParamEntityParsing;
@@ -1848,55 +1936,8 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
parser->m_parsingStatus.parsing = XML_PARSING;
}
- if (len == 0) {
- parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
- if (! isFinal)
- return XML_STATUS_OK;
- parser->m_positionPtr = parser->m_bufferPtr;
- parser->m_parseEndPtr = parser->m_bufferEnd;
-
- /* If data are left over from last buffer, and we now know that these
- data are the final chunk of input, then we have to check them again
- to detect errors based on that fact.
- */
- parser->m_errorCode
- = parser->m_processor(parser, parser->m_bufferPtr,
- parser->m_parseEndPtr, &parser->m_bufferPtr);
-
- if (parser->m_errorCode == XML_ERROR_NONE) {
- switch (parser->m_parsingStatus.parsing) {
- case XML_SUSPENDED:
- /* It is hard to be certain, but it seems that this case
- * cannot occur. This code is cleaning up a previous parse
- * with no new data (since len == 0). Changing the parsing
- * state requires getting to execute a handler function, and
- * there doesn't seem to be an opportunity for that while in
- * this circumstance.
- *
- * Given the uncertainty, we retain the code but exclude it
- * from coverage tests.
- *
- * LCOV_EXCL_START
- */
- XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
- parser->m_bufferPtr, &parser->m_position);
- parser->m_positionPtr = parser->m_bufferPtr;
- return XML_STATUS_SUSPENDED;
- /* LCOV_EXCL_STOP */
- case XML_INITIALIZED:
- case XML_PARSING:
- parser->m_parsingStatus.parsing = XML_FINISHED;
- /* fall through */
- default:
- return XML_STATUS_OK;
- }
- }
- parser->m_eventEndPtr = parser->m_eventPtr;
- parser->m_processor = errorProcessor;
- return XML_STATUS_ERROR;
- }
-#ifndef XML_CONTEXT_BYTES
- else if (parser->m_bufferPtr == parser->m_bufferEnd) {
+#if XML_CONTEXT_BYTES == 0
+ if (parser->m_bufferPtr == parser->m_bufferEnd) {
const char *end;
int nLeftOver;
enum XML_Status result;
@@ -1907,12 +1948,15 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
parser->m_processor = errorProcessor;
return XML_STATUS_ERROR;
}
+ // though this isn't a buffer request, we assume that `len` is the app's
+ // preferred buffer fill size, and therefore save it here.
+ parser->m_lastBufferRequestSize = len;
parser->m_parseEndByteIndex += len;
parser->m_positionPtr = s;
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
parser->m_errorCode
- = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
+ = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
if (parser->m_errorCode != XML_ERROR_NONE) {
parser->m_eventEndPtr = parser->m_eventPtr;
@@ -1939,23 +1983,25 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
&parser->m_position);
nLeftOver = s + len - end;
if (nLeftOver) {
- if (parser->m_buffer == NULL
- || nLeftOver > parser->m_bufferLim - parser->m_buffer) {
- /* avoid _signed_ integer overflow */
- char *temp = NULL;
- const int bytesToAllocate = (int)((unsigned)len * 2U);
- if (bytesToAllocate > 0) {
- temp = (char *)REALLOC(parser, parser->m_buffer, bytesToAllocate);
- }
- if (temp == NULL) {
- parser->m_errorCode = XML_ERROR_NO_MEMORY;
- parser->m_eventPtr = parser->m_eventEndPtr = NULL;
- parser->m_processor = errorProcessor;
- return XML_STATUS_ERROR;
- }
- parser->m_buffer = temp;
- parser->m_bufferLim = parser->m_buffer + bytesToAllocate;
+ // Back up and restore the parsing status to avoid XML_ERROR_SUSPENDED
+ // (and XML_ERROR_FINISHED) from XML_GetBuffer.
+ const enum XML_Parsing originalStatus = parser->m_parsingStatus.parsing;
+ parser->m_parsingStatus.parsing = XML_PARSING;
+ void *const temp = XML_GetBuffer(parser, nLeftOver);
+ parser->m_parsingStatus.parsing = originalStatus;
+ // GetBuffer may have overwritten this, but we want to remember what the
+ // app requested, not how many bytes were left over after parsing.
+ parser->m_lastBufferRequestSize = len;
+ if (temp == NULL) {
+ // NOTE: parser->m_errorCode has already been set by XML_GetBuffer().
+ parser->m_eventPtr = parser->m_eventEndPtr = NULL;
+ parser->m_processor = errorProcessor;
+ return XML_STATUS_ERROR;
}
+ // Since we know that the buffer was empty and XML_CONTEXT_BYTES is 0, we
+ // don't have any data to preserve, and can copy straight into the start
+ // of the buffer rather than the GetBuffer return pointer (which may be
+ // pointing further into the allocated buffer).
memcpy(parser->m_buffer, end, nLeftOver);
}
parser->m_bufferPtr = parser->m_buffer;
@@ -1966,16 +2012,15 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
parser->m_eventEndPtr = parser->m_bufferPtr;
return result;
}
-#endif /* not defined XML_CONTEXT_BYTES */
- else {
- void *buff = XML_GetBuffer(parser, len);
- if (buff == NULL)
- return XML_STATUS_ERROR;
- else {
- memcpy(buff, s, len);
- return XML_ParseBuffer(parser, len, isFinal);
- }
+#endif /* XML_CONTEXT_BYTES == 0 */
+ void *buff = XML_GetBuffer(parser, len);
+ if (buff == NULL)
+ return XML_STATUS_ERROR;
+ if (len > 0) {
+ assert(s != NULL); // make sure s==NULL && len!=0 was rejected above
+ memcpy(buff, s, len);
}
+ return XML_ParseBuffer(parser, len, isFinal);
}
enum XML_Status XMLCALL
@@ -2015,8 +2060,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
parser->m_parseEndByteIndex += len;
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
- parser->m_errorCode = parser->m_processor(
- parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
+ parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
+ &parser->m_bufferPtr);
if (parser->m_errorCode != XML_ERROR_NONE) {
parser->m_eventEndPtr = parser->m_eventPtr;
@@ -2061,10 +2106,14 @@ XML_GetBuffer(XML_Parser parser, int len) {
default:;
}
- if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)) {
-#ifdef XML_CONTEXT_BYTES
+ // whether or not the request succeeds, `len` seems to be the app's preferred
+ // buffer fill size; remember it.
+ parser->m_lastBufferRequestSize = len;
+ if (len > EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferEnd)
+ || parser->m_buffer == NULL) {
+#if XML_CONTEXT_BYTES > 0
int keep;
-#endif /* defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES > 0 */
/* Do not invoke signed arithmetic overflow: */
int neededSize = (int)((unsigned)len
+ (unsigned)EXPAT_SAFE_PTR_DIFF(
@@ -2073,7 +2122,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
parser->m_errorCode = XML_ERROR_NO_MEMORY;
return NULL;
}
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
keep = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer);
if (keep > XML_CONTEXT_BYTES)
keep = XML_CONTEXT_BYTES;
@@ -2083,10 +2132,11 @@ XML_GetBuffer(XML_Parser parser, int len) {
return NULL;
}
neededSize += keep;
-#endif /* defined XML_CONTEXT_BYTES */
- if (neededSize
- <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
-#ifdef XML_CONTEXT_BYTES
+#endif /* XML_CONTEXT_BYTES > 0 */
+ if (parser->m_buffer && parser->m_bufferPtr
+ && neededSize
+ <= EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer)) {
+#if XML_CONTEXT_BYTES > 0
if (keep < EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)) {
int offset
= (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferPtr, parser->m_buffer)
@@ -2099,19 +2149,17 @@ XML_GetBuffer(XML_Parser parser, int len) {
parser->m_bufferPtr -= offset;
}
#else
- if (parser->m_buffer && parser->m_bufferPtr) {
- memmove(parser->m_buffer, parser->m_bufferPtr,
- EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
- parser->m_bufferEnd
- = parser->m_buffer
- + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
- parser->m_bufferPtr = parser->m_buffer;
- }
-#endif /* not defined XML_CONTEXT_BYTES */
+ memmove(parser->m_buffer, parser->m_bufferPtr,
+ EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr));
+ parser->m_bufferEnd
+ = parser->m_buffer
+ + EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr);
+ parser->m_bufferPtr = parser->m_buffer;
+#endif /* XML_CONTEXT_BYTES > 0 */
} else {
char *newBuf;
int bufferSize
- = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_bufferPtr);
+ = (int)EXPAT_SAFE_PTR_DIFF(parser->m_bufferLim, parser->m_buffer);
if (bufferSize == 0)
bufferSize = INIT_BUFFER_SIZE;
do {
@@ -2128,7 +2176,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
return NULL;
}
parser->m_bufferLim = newBuf + bufferSize;
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
if (parser->m_bufferPtr) {
memcpy(newBuf, &parser->m_bufferPtr[-keep],
EXPAT_SAFE_PTR_DIFF(parser->m_bufferEnd, parser->m_bufferPtr)
@@ -2158,7 +2206,7 @@ XML_GetBuffer(XML_Parser parser, int len) {
parser->m_bufferEnd = newBuf;
}
parser->m_bufferPtr = parser->m_buffer = newBuf;
-#endif /* not defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES > 0 */
}
parser->m_eventPtr = parser->m_eventEndPtr = NULL;
parser->m_positionPtr = NULL;
@@ -2208,7 +2256,7 @@ XML_ResumeParser(XML_Parser parser) {
}
parser->m_parsingStatus.parsing = XML_PARSING;
- parser->m_errorCode = parser->m_processor(
+ parser->m_errorCode = callProcessor(
parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
if (parser->m_errorCode != XML_ERROR_NONE) {
@@ -2272,7 +2320,7 @@ XML_GetCurrentByteCount(XML_Parser parser) {
const char *XMLCALL
XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
if (parser == NULL)
return NULL;
if (parser->m_eventPtr && parser->m_buffer) {
@@ -2286,7 +2334,7 @@ XML_GetInputContext(XML_Parser parser, int *offset, int *size) {
(void)parser;
(void)offset;
(void)size;
-#endif /* defined XML_CONTEXT_BYTES */
+#endif /* XML_CONTEXT_BYTES > 0 */
return (const char *)0;
}
@@ -2506,7 +2554,7 @@ XML_GetFeatureList(void) {
#ifdef XML_DTD
{XML_FEATURE_DTD, XML_L("XML_DTD"), 0},
#endif
-#ifdef XML_CONTEXT_BYTES
+#if XML_CONTEXT_BYTES > 0
{XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"),
XML_CONTEXT_BYTES},
#endif
@@ -2522,8 +2570,9 @@ XML_GetFeatureList(void) {
#ifdef XML_ATTR_INFO
{XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0},
#endif
-#ifdef XML_DTD
- /* Added in Expat 2.4.0. */
+#if XML_GE == 1
+ /* Added in Expat 2.4.0 for XML_DTD defined and
+ * added in Expat 2.6.0 for XML_GE == 1. */
{XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
XML_L("XML_BLAP_MAX_AMP"),
(long int)
@@ -2531,13 +2580,15 @@ XML_GetFeatureList(void) {
{XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT,
XML_L("XML_BLAP_ACT_THRES"),
EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT},
+ /* Added in Expat 2.6.0. */
+ {XML_FEATURE_GE, XML_L("XML_GE"), 0},
#endif
{XML_FEATURE_END, NULL, 0}};
return features;
}
-#ifdef XML_DTD
+#if XML_GE == 1
XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(
XML_Parser parser, float maximumAmplificationFactor) {
@@ -2559,7 +2610,16 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
parser->m_accounting.activationThresholdBytes = activationThresholdBytes;
return XML_TRUE;
}
-#endif /* XML_DTD */
+#endif /* XML_GE == 1 */
+
+XML_Bool XMLCALL
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
+ if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
+ parser->m_reparseDeferralEnabled = enabled;
+ return XML_TRUE;
+ }
+ return XML_FALSE;
+}
/* Initially tag->rawName always points into the parse buffer;
for those TAG instances opened while the current parse buffer was
@@ -2581,7 +2641,7 @@ storeRawNames(XML_Parser parser) {
*/
if (tag->rawName == rawNameBuf)
break;
- /* For re-use purposes we need to ensure that the
+ /* For reuse purposes we need to ensure that the
size of tag->buf is a multiple of sizeof(XML_Char).
*/
rawNameLen = ROUND_UP(tag->rawNameLength, sizeof(XML_Char));
@@ -2645,13 +2705,13 @@ externalEntityInitProcessor2(XML_Parser parser, const char *start,
int tok = XmlContentTok(parser->m_encoding, start, end, &next);
switch (tok) {
case XML_TOK_BOM:
-#ifdef XML_DTD
+#if XML_GE == 1
if (! accountingDiffTolerated(parser, tok, start, next, __LINE__,
XML_ACCOUNT_DIRECT)) {
accountingOnAbort(parser);
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
}
-#endif /* XML_DTD */
+#endif /* XML_GE == 1 */
/* If we are at the end of the buffer, this would cause the next stage,
i.e. externalEntityInitProcessor3, to pass control directly to
@@ -2765,7 +2825,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
for (;;) {
const char *next = s; /* XmlContentTok doesn't always set the last arg */
int tok = XmlContentTok(enc, s, end, &next);
-#ifdef XML_DTD
+#if XML_GE == 1
const char *accountAfter
= ((tok == XML_TOK_TRAILING_RSQB) || (tok == XML_TOK_TRAILING_CR))
? (haveMore ? s /* i.e. 0 bytes */ : end)
@@ -2831,14 +2891,14 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
enc, s + enc->minBytesPerChar, next - enc->minBytesPerChar);
if (ch) {
-#ifdef XML_DTD
+#if XML_GE == 1
/* NOTE: We are replacing 4-6 characters original input for 1 character
* so there is no amplification and hence recording without
* protection. */
accountingDiffTolerated(parser, tok, (char *)&ch,
((char *)&ch) + sizeof(XML_Char), __LINE__,
XML_ACCOUNT_ENTITY_EXPANSION);
-#endif /* XML_DTD */
+#endif /* XML_GE == 1 */
if (parser->m_characterDataHandler)
parser->m_characterDataHandler(parser->m_handlerArg, &ch, 1);
else if (parser->m_defaultHandler)
@@ -3039,13 +3099,13 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
if (parser->m_ns && localPart) {
/* localPart and prefix may have been overwritten in
tag->name.str, since this points to the binding->uri
- buffer which gets re-used; so we have to add them again
+ buffer which gets reused; so we have to add them again
*/
uri = (XML_Char *)tag->name.str + tag->name.uriLen;
/* don't need to check for space - already done in storeAtts() */
while (*localPart)
*uri++ = *localPart++;
- prefix = (XML_Char *)tag->name.prefix;
+ prefix = tag->name.prefix;
if (parser->m_ns_triplets && prefix) {
*uri++ = parser->m_namespaceSeparator;
while (*prefix)
@@ -3112,7 +3172,7 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
However, now we have a start/endCdataSectionHandler, so it seems
easier to let the user deal with this.
*/
- else if (0 && parser->m_characterDataHandler)
+ else if ((0) && parser->m_characterDataHandler)
parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
0);
/* END disabled code */
@@ -3141,8 +3201,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
(int)(dataPtr - (ICHAR *)parser->m_dataBuf));
} else
parser->m_characterDataHandler(
- parser->m_handlerArg, (XML_Char *)s,
- (int)((XML_Char *)end - (XML_Char *)s));
+ parser->m_handlerArg, (const XML_Char *)s,
+ (int)((const XML_Char *)end - (const XML_Char *)s));
} else if (parser->m_defaultHandler)
reportDefault(parser, enc, s, end);
/* We are at the end of the final buffer, should we check for
@@ -3175,8 +3235,8 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
*eventPP = s;
}
} else
- charDataHandler(parser->m_handlerArg, (XML_Char *)s,
- (int)((XML_Char *)next - (XML_Char *)s));
+ charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
+ (int)((const XML_Char *)next - (const XML_Char *)s));
} else if (parser->m_defaultHandler)
reportDefault(parser, enc, s, next);
} break;
@@ -4040,7 +4100,7 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
for (;;) {
const char *next = s; /* in case of XML_TOK_NONE or XML_TOK_PARTIAL */
int tok = XmlCdataSectionTok(enc, s, end, &next);
-#ifdef XML_DTD
+#if XML_GE == 1
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
accountingOnAbort(parser);
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
@@ -4055,7 +4115,7 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
parser->m_endCdataSectionHandler(parser->m_handlerArg);
/* BEGIN disabled code */
/* see comment under XML_TOK_CDATA_SECT_OPEN */
- else if (0 && parser->m_characterDataHandler)
+ else if ((0) && parser->m_characterDataHandler)
parser->m_characterDataHandler(parser->m_handlerArg, parser->m_dataBuf,
0);
/* END disabled code */
@@ -4091,8 +4151,8 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
*eventPP = s;
}
} else
- charDataHandler(parser->m_handlerArg, (XML_Char *)s,
- (int)((XML_Char *)next - (XML_Char *)s));
+ charDataHandler(parser->m_handlerArg, (const XML_Char *)s,
+ (int)((const XML_Char *)next - (const XML_Char *)s));
} else if (parser->m_defaultHandler)
reportDefault(parser, enc, s, next);
} break;
@@ -4192,7 +4252,7 @@ doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr,
*eventPP = s;
*startPtr = NULL;
tok = XmlIgnoreSectionTok(enc, s, end, &next);
-# ifdef XML_DTD
+# if XML_GE == 1
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
XML_ACCOUNT_DIRECT)) {
accountingOnAbort(parser);
@@ -4284,7 +4344,7 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *s,
const XML_Char *storedversion = NULL;
int standalone = -1;
-#ifdef XML_DTD
+#if XML_GE == 1
if (! accountingDiffTolerated(parser, XML_TOK_XML_DECL, s, next, __LINE__,
XML_ACCOUNT_DIRECT)) {
accountingOnAbort(parser);
@@ -4482,16 +4542,16 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
parser->m_processor = entityValueProcessor;
return entityValueProcessor(parser, next, end, nextPtr);
}
- /* If we are at the end of the buffer, this would cause XmlPrologTok to
- return XML_TOK_NONE on the next call, which would then cause the
- function to exit with *nextPtr set to s - that is what we want for other
- tokens, but not for the BOM - we would rather like to skip it;
- then, when this routine is entered the next time, XmlPrologTok will
- return XML_TOK_INVALID, since the BOM is still in the buffer
+ /* XmlPrologTok has now set the encoding based on the BOM it found, and we
+ must move s and nextPtr forward to consume the BOM.
+
+ If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
+ would leave the BOM in the buffer and return. On the next call to this
+ function, our XmlPrologTok call would return XML_TOK_INVALID, since it
+ is not valid to have multiple BOMs.
*/
- else if (tok == XML_TOK_BOM && next == end
- && ! parser->m_parsingStatus.finalBuffer) {
-# ifdef XML_DTD
+ else if (tok == XML_TOK_BOM) {
+# if XML_GE == 1
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
XML_ACCOUNT_DIRECT)) {
accountingOnAbort(parser);
@@ -4500,7 +4560,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
# endif
*nextPtr = next;
- return XML_ERROR_NONE;
+ s = next;
}
/* If we get this token, we have the start of what might be a
normal tag, but not a declaration (i.e. it doesn't begin with
@@ -4707,11 +4767,13 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
}
}
role = XmlTokenRole(&parser->m_prologState, tok, s, next, enc);
-#ifdef XML_DTD
+#if XML_GE == 1
switch (role) {
case XML_ROLE_INSTANCE_START: // bytes accounted in contentProcessor
case XML_ROLE_XML_DECL: // bytes accounted in processXmlDecl
- case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
+# ifdef XML_DTD
+ case XML_ROLE_TEXT_DECL: // bytes accounted in processXmlDecl
+# endif
break;
default:
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, account)) {
@@ -5029,6 +5091,9 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
break;
case XML_ROLE_ENTITY_VALUE:
if (dtd->keepProcessing) {
+#if XML_GE == 1
+ // This will store the given replacement text in
+ // parser->m_declEntity->textPtr.
enum XML_Error result
= storeEntityValue(parser, enc, s + enc->minBytesPerChar,
next - enc->minBytesPerChar, XML_ACCOUNT_NONE);
@@ -5049,6 +5114,25 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
poolDiscard(&dtd->entityValuePool);
if (result != XML_ERROR_NONE)
return result;
+#else
+ // This will store "&amp;entity123;" in parser->m_declEntity->textPtr
+ // to end up as "&entity123;" in the handler.
+ if (parser->m_declEntity != NULL) {
+ const enum XML_Error result
+ = storeSelfEntityValue(parser, parser->m_declEntity);
+ if (result != XML_ERROR_NONE)
+ return result;
+
+ if (parser->m_entityDeclHandler) {
+ *eventEndPP = s;
+ parser->m_entityDeclHandler(
+ parser->m_handlerArg, parser->m_declEntity->name,
+ parser->m_declEntity->is_param, parser->m_declEntity->textPtr,
+ parser->m_declEntity->textLen, parser->m_curBase, 0, 0, 0);
+ handleDefault = XML_FALSE;
+ }
+ }
+#endif
}
break;
case XML_ROLE_DOCTYPE_SYSTEM_ID:
@@ -5107,6 +5191,16 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end,
}
break;
case XML_ROLE_ENTITY_COMPLETE:
+#if XML_GE == 0
+ // This will store "&amp;entity123;" in entity->textPtr
+ // to end up as "&entity123;" in the handler.
+ if (parser->m_declEntity != NULL) {
+ const enum XML_Error result
+ = storeSelfEntityValue(parser, parser->m_declEntity);
+ if (result != XML_ERROR_NONE)
+ return result;
+ }
+#endif
if (dtd->keepProcessing && parser->m_declEntity
&& parser->m_entityDeclHandler) {
*eventEndPP = s;
@@ -5648,7 +5742,7 @@ epilogProcessor(XML_Parser parser, const char *s, const char *end,
for (;;) {
const char *next = NULL;
int tok = XmlPrologTok(parser->m_encoding, s, end, &next);
-#ifdef XML_DTD
+#if XML_GE == 1
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
XML_ACCOUNT_DIRECT)) {
accountingOnAbort(parser);
@@ -5728,7 +5822,7 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
return XML_ERROR_NO_MEMORY;
}
entity->open = XML_TRUE;
-#ifdef XML_DTD
+#if XML_GE == 1
entityTrackingOnOpen(parser, entity, __LINE__);
#endif
entity->processed = 0;
@@ -5761,10 +5855,10 @@ processInternalEntity(XML_Parser parser, ENTITY *entity, XML_Bool betweenDecl) {
if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) {
entity->processed = (int)(next - textStart);
parser->m_processor = internalEntityProcessor;
- } else {
-#ifdef XML_DTD
+ } else if (parser->m_openInternalEntities->entity == entity) {
+#if XML_GE == 1
entityTrackingOnClose(parser, entity, __LINE__);
-#endif /* XML_DTD */
+#endif /* XML_GE == 1 */
entity->open = XML_FALSE;
parser->m_openInternalEntities = openEntity->next;
/* put openEntity back in list of free instances */
@@ -5813,7 +5907,7 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end,
return result;
}
-#ifdef XML_DTD
+#if XML_GE == 1
entityTrackingOnClose(parser, entity, __LINE__);
#endif
entity->open = XML_FALSE;
@@ -5892,7 +5986,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
const char *next
= ptr; /* XmlAttributeValueTok doesn't always set the last arg */
int tok = XmlAttributeValueTok(enc, ptr, end, &next);
-#ifdef XML_DTD
+#if XML_GE == 1
if (! accountingDiffTolerated(parser, tok, ptr, next, __LINE__, account)) {
accountingOnAbort(parser);
return XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
@@ -5957,14 +6051,14 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
XML_Char ch = (XML_Char)XmlPredefinedEntityName(
enc, ptr + enc->minBytesPerChar, next - enc->minBytesPerChar);
if (ch) {
-#ifdef XML_DTD
+#if XML_GE == 1
/* NOTE: We are replacing 4-6 characters original input for 1 character
* so there is no amplification and hence recording without
* protection. */
accountingDiffTolerated(parser, tok, (char *)&ch,
((char *)&ch) + sizeof(XML_Char), __LINE__,
XML_ACCOUNT_ENTITY_EXPANSION);
-#endif /* XML_DTD */
+#endif /* XML_GE == 1 */
if (! poolAppendChar(pool, ch))
return XML_ERROR_NO_MEMORY;
break;
@@ -6042,14 +6136,14 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
enum XML_Error result;
const XML_Char *textEnd = entity->textPtr + entity->textLen;
entity->open = XML_TRUE;
-#ifdef XML_DTD
+#if XML_GE == 1
entityTrackingOnOpen(parser, entity, __LINE__);
#endif
result = appendAttributeValue(parser, parser->m_internalEncoding,
isCdata, (const char *)entity->textPtr,
(const char *)textEnd, pool,
XML_ACCOUNT_ENTITY_EXPANSION);
-#ifdef XML_DTD
+#if XML_GE == 1
entityTrackingOnClose(parser, entity, __LINE__);
#endif
entity->open = XML_FALSE;
@@ -6079,6 +6173,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata,
/* not reached */
}
+#if XML_GE == 1
static enum XML_Error
storeEntityValue(XML_Parser parser, const ENCODING *enc,
const char *entityTextPtr, const char *entityTextEnd,
@@ -6086,12 +6181,12 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
STRING_POOL *pool = &(dtd->entityValuePool);
enum XML_Error result = XML_ERROR_NONE;
-#ifdef XML_DTD
+# ifdef XML_DTD
int oldInEntityValue = parser->m_prologState.inEntityValue;
parser->m_prologState.inEntityValue = 1;
-#else
+# else
UNUSED_P(account);
-#endif /* XML_DTD */
+# endif /* XML_DTD */
/* never return Null for the value argument in EntityDeclHandler,
since this would indicate an external entity; therefore we
have to make sure that entityValuePool.start is not null */
@@ -6105,18 +6200,16 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
= entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */
int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
-#ifdef XML_DTD
if (! accountingDiffTolerated(parser, tok, entityTextPtr, next, __LINE__,
account)) {
accountingOnAbort(parser);
result = XML_ERROR_AMPLIFICATION_LIMIT_BREACH;
goto endEntityValue;
}
-#endif
switch (tok) {
case XML_TOK_PARAM_ENTITY_REF:
-#ifdef XML_DTD
+# ifdef XML_DTD
if (parser->m_isParamEntity || enc != parser->m_encoding) {
const XML_Char *name;
ENTITY *entity;
@@ -6178,7 +6271,7 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
}
break;
}
-#endif /* XML_DTD */
+# endif /* XML_DTD */
/* In the internal subset, PE references are not legal
within markup declarations, e.g entity values in this case. */
parser->m_eventPtr = entityTextPtr;
@@ -6259,12 +6352,38 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc,
entityTextPtr = next;
}
endEntityValue:
-#ifdef XML_DTD
+# ifdef XML_DTD
parser->m_prologState.inEntityValue = oldInEntityValue;
-#endif /* XML_DTD */
+# endif /* XML_DTD */
return result;
}
+#else /* XML_GE == 0 */
+
+static enum XML_Error
+storeSelfEntityValue(XML_Parser parser, ENTITY *entity) {
+ // This will store "&amp;entity123;" in entity->textPtr
+ // to end up as "&entity123;" in the handler.
+ const char *const entity_start = "&amp;";
+ const char *const entity_end = ";";
+
+ STRING_POOL *const pool = &(parser->m_dtd->entityValuePool);
+ if (! poolAppendString(pool, entity_start)
+ || ! poolAppendString(pool, entity->name)
+ || ! poolAppendString(pool, entity_end)) {
+ poolDiscard(pool);
+ return XML_ERROR_NO_MEMORY;
+ }
+
+ entity->textPtr = poolStart(pool);
+ entity->textLen = (int)(poolLength(pool));
+ poolFinish(pool);
+
+ return XML_ERROR_NONE;
+}
+
+#endif /* XML_GE == 0 */
+
static void FASTCALL
normalizeLines(XML_Char *s) {
XML_Char *p;
@@ -6375,8 +6494,9 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s,
} while ((convert_res != XML_CONVERT_COMPLETED)
&& (convert_res != XML_CONVERT_INPUT_INCOMPLETE));
} else
- parser->m_defaultHandler(parser->m_handlerArg, (XML_Char *)s,
- (int)((XML_Char *)end - (XML_Char *)s));
+ parser->m_defaultHandler(
+ parser->m_handlerArg, (const XML_Char *)s,
+ (int)((const XML_Char *)end - (const XML_Char *)s));
}
static int
@@ -6480,7 +6600,7 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start,
name = poolStoreString(&dtd->pool, enc, start, end);
if (! name)
return NULL;
- /* skip quotation mark - its storage will be re-used (like in name[-1]) */
+ /* skip quotation mark - its storage will be reused (like in name[-1]) */
++name;
id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name,
sizeof(ATTRIBUTE_ID));
@@ -6630,6 +6750,10 @@ getContext(XML_Parser parser) {
static XML_Bool
setContext(XML_Parser parser, const XML_Char *context) {
+ if (context == NULL) {
+ return XML_FALSE;
+ }
+
DTD *const dtd = parser->m_dtd; /* save one level of indirection */
const XML_Char *s = context;
@@ -7220,7 +7344,7 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc, const char *ptr,
return NULL;
for (;;) {
const enum XML_Convert_Result convert_res = XmlConvert(
- enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
+ enc, &ptr, end, (ICHAR **)&(pool->ptr), (const ICHAR *)pool->end);
if ((convert_res == XML_CONVERT_COMPLETED)
|| (convert_res == XML_CONVERT_INPUT_INCOMPLETE))
break;
@@ -7651,7 +7775,7 @@ copyString(const XML_Char *s, const XML_Memory_Handling_Suite *memsuite) {
return result;
}
-#ifdef XML_DTD
+#if XML_GE == 1
static float
accountingGetCurrentAmplification(XML_Parser rootParser) {
@@ -7672,7 +7796,7 @@ accountingReportStats(XML_Parser originParser, const char *epilog) {
const XML_Parser rootParser = getRootParserOf(originParser, NULL);
assert(! rootParser->m_parentParser);
- if (rootParser->m_accounting.debugLevel < 1) {
+ if (rootParser->m_accounting.debugLevel == 0u) {
return;
}
@@ -7709,7 +7833,7 @@ accountingReportDiff(XML_Parser rootParser,
/* Note: Performance is of no concern here */
const char *walker = before;
- if ((rootParser->m_accounting.debugLevel >= 3)
+ if ((rootParser->m_accounting.debugLevel >= 3u)
|| (after - before)
<= (ptrdiff_t)(contextLength + ellipsisLength + contextLength)) {
for (; walker < after; walker++) {
@@ -7774,7 +7898,7 @@ accountingDiffTolerated(XML_Parser originParser, int tok, const char *before,
|| (amplificationFactor
<= rootParser->m_accounting.maximumAmplificationFactor);
- if (rootParser->m_accounting.debugLevel >= 2) {
+ if (rootParser->m_accounting.debugLevel >= 2u) {
accountingReportStats(rootParser, "");
accountingReportDiff(rootParser, levelsAwayFromRootParser, before, after,
bytesMore, source_line, account);
@@ -7801,7 +7925,7 @@ static void
entityTrackingReportStats(XML_Parser rootParser, ENTITY *entity,
const char *action, int sourceLine) {
assert(! rootParser->m_parentParser);
- if (rootParser->m_entity_stats.debugLevel < 1)
+ if (rootParser->m_entity_stats.debugLevel == 0u)
return;
# if defined(XML_UNICODE)
@@ -8382,7 +8506,7 @@ unsignedCharToPrintable(unsigned char c) {
assert(0); /* never gets here */
}
-#endif /* XML_DTD */
+#endif /* XML_GE == 1 */
static unsigned long
getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
@@ -8393,9 +8517,9 @@ getDebugLevel(const char *variableName, unsigned long defaultDebugLevel) {
const char *const value = valueOrNull;
errno = 0;
- char *afterValue = (char *)value;
+ char *afterValue = NULL;
unsigned long debugLevel = strtoul(value, &afterValue, 10);
- if ((errno != 0) || (afterValue[0] != '\0')) {
+ if ((errno != 0) || (afterValue == value) || (afterValue[0] != '\0')) {
errno = 0;
return defaultDebugLevel;
}
diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c
index 3f0f5c1..2c48bf4 100644
--- a/Modules/expat/xmlrole.c
+++ b/Modules/expat/xmlrole.c
@@ -12,10 +12,10 @@
Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net>
Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
- Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
- Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
+ Copyright (c) 2021 Donghee Na <donghee.na@python.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -38,7 +38,7 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <expat_config.h>
+#include "expat_config.h"
#include <stddef.h>
diff --git a/Modules/expat/xmlrole.h b/Modules/expat/xmlrole.h
index d6e1fa1..a790427 100644
--- a/Modules/expat/xmlrole.h
+++ b/Modules/expat/xmlrole.h
@@ -10,7 +10,7 @@
Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
Copyright (c) 2002 Karl Waclawek <karl@waclawek.net>
Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
- Copyright (c) 2017 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2017-2024 Sebastian Pipping <sebastian@pipping.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -127,9 +127,9 @@ typedef struct prolog_state {
#endif /* XML_DTD */
} PROLOG_STATE;
-void XmlPrologStateInit(PROLOG_STATE *);
+void XmlPrologStateInit(PROLOG_STATE *state);
#ifdef XML_DTD
-void XmlPrologStateInitExternalEntity(PROLOG_STATE *);
+void XmlPrologStateInitExternalEntity(PROLOG_STATE *state);
#endif /* XML_DTD */
#define XmlTokenRole(state, tok, ptr, end, enc) \
diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c
index 2b7012a..29a66d7 100644
--- a/Modules/expat/xmltok.c
+++ b/Modules/expat/xmltok.c
@@ -12,7 +12,7 @@
Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net>
Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
- Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com>
Copyright (c) 2016 Don Lewis <truckman@apache.org>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
@@ -20,8 +20,10 @@
Copyright (c) 2017 Benbuck Nason <bnason@netflix.com>
Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com>
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
- Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
+ Copyright (c) 2021 Donghee Na <donghee.na@python.org>
Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com>
+ Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
+ Copyright (c) 2023 Hanno Böck <hanno@gentoo.org>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -44,7 +46,7 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <expat_config.h>
+#include "expat_config.h"
#include <stddef.h>
#include <string.h> /* memcpy */
@@ -76,7 +78,7 @@
#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
#define UCS2_GET_NAMING(pages, hi, lo) \
- (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F)))
+ (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F)))
/* A 2 byte UTF-8 representation splits the characters 11 bits between
the bottom 5 and 6 bits of the bytes. We need 8 bits to index into
@@ -100,7 +102,7 @@
& (1u << (((byte)[2]) & 0x1F)))
/* Detection of invalid UTF-8 sequences is based on Table 3.1B
- of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
+ of Unicode 3.2: https://www.unicode.org/unicode/reports/tr28/
with the additional restriction of not allowing the Unicode
code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
Implementation details:
@@ -225,7 +227,7 @@ struct normal_encoding {
/* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \
/* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL
-static int FASTCALL checkCharRefNumber(int);
+static int FASTCALL checkCharRefNumber(int result);
#include "xmltok_impl.h"
#include "ascii.h"
@@ -243,7 +245,7 @@ static int FASTCALL checkCharRefNumber(int);
#endif
#define SB_BYTE_TYPE(enc, p) \
- (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
+ (((const struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
#ifdef XML_MIN_SIZE
static int PTRFASTCALL
@@ -407,7 +409,7 @@ utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
unsigned short *to = *toP;
const char *from = *fromP;
while (from < fromLim && to < toLim) {
- switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
+ switch (SB_BYTE_TYPE(enc, from)) {
case BT_LEAD2:
if (fromLim - from < 2) {
res = XML_CONVERT_INPUT_INCOMPLETE;
@@ -715,31 +717,26 @@ unicode_byte_type(char hi, char lo) {
return res; \
}
-#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8)))
#define GET_LO(ptr) ((unsigned char)(ptr)[0])
#define GET_HI(ptr) ((unsigned char)(ptr)[1])
DEFINE_UTF16_TO_UTF8(little2_)
DEFINE_UTF16_TO_UTF16(little2_)
-#undef SET2
#undef GET_LO
#undef GET_HI
-#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF)))
#define GET_LO(ptr) ((unsigned char)(ptr)[1])
#define GET_HI(ptr) ((unsigned char)(ptr)[0])
DEFINE_UTF16_TO_UTF8(big2_)
DEFINE_UTF16_TO_UTF16(big2_)
-#undef SET2
#undef GET_LO
#undef GET_HI
#define LITTLE2_BYTE_TYPE(enc, p) \
- ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
- : unicode_byte_type((p)[1], (p)[0]))
+ ((p)[1] == 0 ? SB_BYTE_TYPE(enc, p) : unicode_byte_type((p)[1], (p)[0]))
#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c))
#define LITTLE2_IS_NAME_CHAR_MINBPC(p) \
@@ -872,9 +869,7 @@ static const struct normal_encoding internal_little2_encoding
#endif
#define BIG2_BYTE_TYPE(enc, p) \
- ((p)[0] == 0 \
- ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
- : unicode_byte_type((p)[0], (p)[1]))
+ ((p)[0] == 0 ? SB_BYTE_TYPE(enc, p + 1) : unicode_byte_type((p)[0], (p)[1]))
#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c))
#define BIG2_IS_NAME_CHAR_MINBPC(p) \
diff --git a/Modules/expat/xmltok.h b/Modules/expat/xmltok.h
index 6f630c2..c51fce1 100644
--- a/Modules/expat/xmltok.h
+++ b/Modules/expat/xmltok.h
@@ -10,7 +10,7 @@
Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
Copyright (c) 2002-2005 Karl Waclawek <karl@waclawek.net>
- Copyright (c) 2016-2017 Sebastian Pipping <sebastian@pipping.org>
+ Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Licensed under the MIT license:
@@ -289,7 +289,8 @@ int XmlParseXmlDecl(int isGeneralTextEntity, const ENCODING *enc,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr, int *standalonePtr);
-int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
+int XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr,
+ const char *name);
const ENCODING *XmlGetUtf8InternalEncoding(void);
const ENCODING *XmlGetUtf16InternalEncoding(void);
int FASTCALL XmlUtf8Encode(int charNumber, char *buf);
@@ -307,7 +308,8 @@ int XmlParseXmlDeclNS(int isGeneralTextEntity, const ENCODING *enc,
const char **encodingNamePtr,
const ENCODING **namedEncodingPtr, int *standalonePtr);
-int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
+int XmlInitEncodingNS(INIT_ENCODING *p, const ENCODING **encPtr,
+ const char *name);
const ENCODING *XmlGetUtf8InternalEncodingNS(void);
const ENCODING *XmlGetUtf16InternalEncodingNS(void);
ENCODING *XmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert,
diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c
index 1971d74..239a2d0 100644
--- a/Modules/expat/xmltok_impl.c
+++ b/Modules/expat/xmltok_impl.c
@@ -126,7 +126,7 @@
# endif
# define HAS_CHARS(enc, ptr, end, count) \
- ((end) - (ptr) >= ((count)*MINBPC(enc)))
+ ((end) - (ptr) >= ((count) * MINBPC(enc)))
# define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1)