diff options
| author | Ezio Melotti <ezio.melotti@gmail.com> | 2011-11-14 16:56:11 (GMT) |
|---|---|---|
| committer | Ezio Melotti <ezio.melotti@gmail.com> | 2011-11-14 16:56:11 (GMT) |
| commit | d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad (patch) | |
| tree | 585f97fa7990268f7ef46a60d78407838e334bc3 /Lib/html/parser.py | |
| parent | 84b48a6c46ce7720a23d92f4d64961812d00ce1b (diff) | |
| parent | c2fe57762b6cfa8849908e1a0475036cd0b058ba (diff) | |
| download | cpython-d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad.zip cpython-d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad.tar.gz cpython-d5d4406c8ebbbdf8a8961fc119be22b15a1c40ad.tar.bz2 | |
#1745761, #755670, #13357, #12629, #1200313: merge with 3.2.
Diffstat (limited to 'Lib/html/parser.py')
| -rw-r--r-- | Lib/html/parser.py | 19 |
1 files changed, 10 insertions, 9 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py index afdb305..662e855 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -30,8 +30,8 @@ attrfind = re.compile( r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?') attrfind_tolerant = re.compile( - r',?\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' - r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?') + r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*' + r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?') locatestarttagend = re.compile(r""" <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name (?:\s+ # whitespace before attribute name @@ -49,16 +49,16 @@ locatestarttagend = re.compile(r""" locatestarttagend_tolerant = re.compile(r""" <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name (?:\s* # optional whitespace before attribute name - (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name - (?:\s*=\s* # value indicator + (?:(?<=['"\s])[^\s/>][^\s/=>]* # attribute name + (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value - |\"[^\"]*\" # LIT-enclosed value - |[^'\">\s]+ # bare value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\s]* # bare value ) (?:\s*,)* # possibly followed by a comma - )? - ) - )* + )?\s* + )* + )? \s* # trailing whitespace """, re.VERBOSE) endendtag = re.compile('>') @@ -295,6 +295,7 @@ class HTMLParser(_markupbase.ParserBase): elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ attrvalue[:1] == '"' == attrvalue[-1:]: attrvalue = attrvalue[1:-1] + if attrvalue: attrvalue = self.unescape(attrvalue) attrs.append((attrname.lower(), attrvalue)) k = m.end() |
