diff options
author | Steve Dower <steve.dower@python.org> | 2022-03-07 21:46:18 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-07 21:46:18 (GMT) |
commit | 176835c3d5c70f4c1b152cc2062b549144e37094 (patch) | |
tree | e283af10cf4903358e68c1c3ba84449dfc112be8 /Modules/expat | |
parent | f193631387bfee99a812e39b05d5b7e6384b57f5 (diff) | |
download | cpython-176835c3d5c70f4c1b152cc2062b549144e37094.zip cpython-176835c3d5c70f4c1b152cc2062b549144e37094.tar.gz cpython-176835c3d5c70f4c1b152cc2062b549144e37094.tar.bz2 |
bpo-46932: Update bundled libexpat to 2.4.7 (GH-31736)
Diffstat (limited to 'Modules/expat')
-rw-r--r-- | Modules/expat/expat.h | 22 | ||||
-rw-r--r-- | Modules/expat/xmlparse.c | 147 |
2 files changed, 155 insertions, 14 deletions
diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index 46a0e1b..c9214f6 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -15,6 +15,7 @@ Copyright (c) 2016 Cristian RodrÃguez <crrodriguez@opensuse.org> Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> + Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -174,8 +175,10 @@ struct XML_cp { }; /* This is called for an element declaration. See above for - description of the model argument. It's the caller's responsibility - to free model when finished with it. + description of the model argument. It's the user code's responsibility + to free model when finished with it. See XML_FreeContentModel. + There is no need to free the model from the handler, it can be kept + around and freed at a later stage. */ typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData, const XML_Char *name, @@ -237,6 +240,17 @@ XML_ParserCreate(const XML_Char *encoding); and the local part will be concatenated without any separator. It is a programming error to use the separator '\0' with namespace triplets (see XML_SetReturnNSTriplet). + If a namespace separator is chosen that can be part of a URI or + part of an XML name, splitting an expanded name back into its + 1, 2 or 3 original parts on application level in the element handler + may end up vulnerable, so these are advised against; sane choices for + a namespace separator are e.g. '\n' (line feed) and '|' (pipe). + + Note that Expat does not validate namespace URIs (beyond encoding) + against RFC 3986 today (and is not required to do so with regard to + the XML 1.0 namespaces specification) but it may start doing that + in future releases. Before that, an application using Expat must + be ready to receive namespace URIs containing non-URI characters. */ XMLPARSEAPI(XML_Parser) XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); @@ -317,7 +331,7 @@ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData, const XML_Char *pubid, int has_internal_subset); -/* This is called for the start of the DOCTYPE declaration when the +/* This is called for the end of the DOCTYPE declaration when the closing > is encountered, but after processing any external subset. */ @@ -1041,7 +1055,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( */ #define XML_MAJOR_VERSION 2 #define XML_MINOR_VERSION 4 -#define XML_MICRO_VERSION 6 +#define XML_MICRO_VERSION 7 #ifdef __cplusplus } diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index 7db28d0..05216d9 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -1,4 +1,4 @@ -/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+) +/* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -34,6 +34,7 @@ Copyright (c) 2019 Vadim Zeitlin <vadim@zeitlins.org> Copyright (c) 2021 Dong-hee Na <donghee.na@python.org> Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net> + Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -133,7 +134,7 @@ * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \ * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ - * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \ + * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ * Windows >=Vista (rand_s): _WIN32. \ \ If insist on not using any of these, bypass this error by defining \ @@ -722,6 +723,7 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { return XML_ParserCreate_MM(encodingName, NULL, tmp); } +// "xml=http://www.w3.org/XML/1998/namespace" static const XML_Char implicitContext[] = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, @@ -3704,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, return XML_ERROR_NONE; } +static XML_Bool +is_rfc3986_uri_char(XML_Char candidate) { + // For the RFC 3986 ANBF grammar see + // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A + + switch (candidate) { + // From rule "ALPHA" (uppercase half) + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + + // From rule "ALPHA" (lowercase half) + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + + // From rule "DIGIT" + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + + // From rule "pct-encoded" + case '%': + + // From rule "unreserved" + case '-': + case '.': + case '_': + case '~': + + // From rule "gen-delims" + case ':': + case '/': + case '?': + case '#': + case '[': + case ']': + case '@': + + // From rule "sub-delims" + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + return XML_TRUE; + + default: + return XML_FALSE; + } +} + /* addBinding() overwrites the value of prefix->binding without checking. Therefore one must keep track of the old value outside of addBinding(). */ static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr) { + // "http://www.w3.org/XML/1998/namespace" static const XML_Char xmlNamespace[] = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, @@ -3720,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'}; static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; + // "http://www.w3.org/2000/xmlns/" static const XML_Char xmlnsNamespace[] = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, @@ -3760,14 +3875,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) isXMLNS = XML_FALSE; - // NOTE: While Expat does not validate namespace URIs against RFC 3986, - // we have to at least make sure that the XML processor on top of - // Expat (that is splitting tag names by namespace separator into - // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused - // by an attacker putting additional namespace separator characters - // into namespace declarations. That would be ambiguous and not to - // be expected. - if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { + // NOTE: While Expat does not validate namespace URIs against RFC 3986 + // today (and is not REQUIRED to do so with regard to the XML 1.0 + // namespaces specification) we have to at least make sure, that + // the application on top of Expat (that is likely splitting expanded + // element names ("qualified names") of form + // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces + // in its element handler code) cannot be confused by an attacker + // putting additional namespace separator characters into namespace + // declarations. That would be ambiguous and not to be expected. + // + // While the HTML API docs of function XML_ParserCreateNS have been + // advising against use of a namespace separator character that can + // appear in a URI for >20 years now, some widespread applications + // are using URI characters (':' (colon) in particular) for a + // namespace separator, in practice. To keep these applications + // functional, we only reject namespaces URIs containing the + // application-chosen namespace separator if the chosen separator + // is a non-URI character with regard to RFC 3986. + if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) + && ! is_rfc3986_uri_char(uri[len])) { return XML_ERROR_SYNTAX; } } |