summaryrefslogtreecommitdiffstats
path: root/Lib/html/parser.py
diff options
context:
space:
mode:
authorLarry Hastings <larry@hastings.org>2015-09-07 12:16:38 (GMT)
committerLarry Hastings <larry@hastings.org>2015-09-07 12:16:38 (GMT)
commitc8c47f55e636ed86791160944ccbb1ea651476bd (patch)
treedb97b6edb082a98c6ad2abcbacb6151ee2df2049 /Lib/html/parser.py
parent71f9633818b3a0cbd36b0f6ed164d436b1997fe9 (diff)
parent8c85a2083fdc6188d32f1eb287151cdb7e79a54a (diff)
downloadcpython-c8c47f55e636ed86791160944ccbb1ea651476bd.zip
cpython-c8c47f55e636ed86791160944ccbb1ea651476bd.tar.gz
cpython-c8c47f55e636ed86791160944ccbb1ea651476bd.tar.bz2
Merge heads.
Diffstat (limited to 'Lib/html/parser.py')
-rw-r--r--Lib/html/parser.py10
1 files changed, 9 insertions, 1 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 390d4cc..43e6411 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -139,7 +139,15 @@ class HTMLParser(_markupbase.ParserBase):
if self.convert_charrefs and not self.cdata_elem:
j = rawdata.find('<', i)
if j < 0:
- if not end:
+ # if we can't find the next <, either we are at the end
+ # or there's more text incoming. If the latter is True,
+ # we can't pass the text to handle_data in case we have
+ # a charref cut in half at end. Try to determine if
+ # this is the case before proceding by looking for an
+ # & near the end and see if it's followed by a space or ;.
+ amppos = rawdata.rfind('&', max(i, n-34))
+ if (amppos >= 0 and
+ not re.compile(r'[\s;]').search(rawdata, amppos)):
break # wait till we get all the text
j = n
else: