#23144: merge with 3.5.

author: Ezio Melotti <ezio.melotti@gmail.com> 2015-09-06 18:49:48 (GMT)
committer: Ezio Melotti <ezio.melotti@gmail.com> 2015-09-06 18:49:48 (GMT)
commit: 564cf7b62ced76a3cc87b16a8a278f0612673690 (patch)
tree: 4595fab1818ef207ad3abd6b0f0323ee6e9b5a5a /Lib/html/parser.py
parent: 56f6e76c680f47ad2b11bed9406305a000a1889a (diff)
parent: 20a2c6482e28a2ca8d257ba646f2b8ead4837387 (diff)
download: cpython-564cf7b62ced76a3cc87b16a8a278f0612673690.zip
cpython-564cf7b62ced76a3cc87b16a8a278f0612673690.tar.gz
cpython-564cf7b62ced76a3cc87b16a8a278f0612673690.tar.bz2
1 files changed, 9 insertions, 1 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 390d4cc..43e6411 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -139,7 +139,15 @@ class HTMLParser(_markupbase.ParserBase):
             if self.convert_charrefs and not self.cdata_elem:
                 j = rawdata.find('<', i)
                 if j < 0:
-                    if not end:
+                    # if we can't find the next <, either we are at the end
+                    # or there's more text incoming.  If the latter is True,
+                    # we can't pass the text to handle_data in case we have
+                    # a charref cut in half at end.  Try to determine if
+                    # this is the case before proceding by looking for an
+                    # & near the end and see if it's followed by a space or ;.
+                    amppos = rawdata.rfind('&', max(i, n-34))
+                    if (amppos >= 0 and
+                        not re.compile(r'[\s;]').search(rawdata, amppos)):
                         break  # wait till we get all the text
                     j = n
             else:
author	Ezio Melotti <ezio.melotti@gmail.com>	2015-09-06 18:49:48 (GMT)
committer	Ezio Melotti <ezio.melotti@gmail.com>	2015-09-06 18:49:48 (GMT)
commit	564cf7b62ced76a3cc87b16a8a278f0612673690 (patch)
tree	4595fab1818ef207ad3abd6b0f0323ee6e9b5a5a /Lib/html/parser.py
parent	56f6e76c680f47ad2b11bed9406305a000a1889a (diff)
parent	20a2c6482e28a2ca8d257ba646f2b8ead4837387 (diff)
download	cpython-564cf7b62ced76a3cc87b16a8a278f0612673690.zip cpython-564cf7b62ced76a3cc87b16a8a278f0612673690.tar.gz cpython-564cf7b62ced76a3cc87b16a8a278f0612673690.tar.bz2