summaryrefslogtreecommitdiffstats
path: root/Lib/html
diff options
context:
space:
mode:
authorR David Murray <rdmurray@bitdance.com>2016-09-08 17:59:53 (GMT)
committerR David Murray <rdmurray@bitdance.com>2016-09-08 17:59:53 (GMT)
commit44b548dda872c0d4f30afd6b44fd74b053a55ad8 (patch)
treeb3c1ff8485bc279000f9db95491ebc69a4385876 /Lib/html
parent513d7478a136e7646075592da2593476299cc8be (diff)
downloadcpython-44b548dda872c0d4f30afd6b44fd74b053a55ad8.zip
cpython-44b548dda872c0d4f30afd6b44fd74b053a55ad8.tar.gz
cpython-44b548dda872c0d4f30afd6b44fd74b053a55ad8.tar.bz2
#27364: fix "incorrect" uses of escape character in the stdlib.
And most of the tools. Patch by Emanual Barry, reviewed by me, Serhiy Storchaka, and Martin Panter.
Diffstat (limited to 'Lib/html')
-rw-r--r--Lib/html/parser.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index b781c63..ef869bc 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -34,7 +34,7 @@ commentclose = re.compile(r'--\s*>')
# explode, so don't do it.
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
-tagfind_tolerant = re.compile('([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
+tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*')
attrfind_tolerant = re.compile(
r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
@@ -56,7 +56,7 @@ locatestarttagend_tolerant = re.compile(r"""
endendtag = re.compile('>')
# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
# </ and the tag name, so maybe this should be fixed
-endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
+endtagfind = re.compile(r'</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')