diff options
author | Guido van Rossum <guido@python.org> | 1995-09-01 20:34:29 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1995-09-01 20:34:29 (GMT) |
commit | cf9e27c72e43e663629116057184752edd6548c8 (patch) | |
tree | 7587718556af4b59dc7818c7d123db6f8a666730 /Lib/sgmllib.py | |
parent | fd504d9f09466ded3cbadbdf43e7fd3e43cc57f4 (diff) | |
download | cpython-cf9e27c72e43e663629116057184752edd6548c8.zip cpython-cf9e27c72e43e663629116057184752edd6548c8.tar.gz cpython-cf9e27c72e43e663629116057184752edd6548c8.tar.bz2 |
support value-less attributes, using regex.group()
Diffstat (limited to 'Lib/sgmllib.py')
-rw-r--r-- | Lib/sgmllib.py | 22 |
1 files changed, 8 insertions, 14 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py index 3eed7a8..0169ea3 100644 --- a/Lib/sgmllib.py +++ b/Lib/sgmllib.py @@ -173,8 +173,6 @@ class SGMLParser: # Now parse the data between i+1 and j into a tag and attrs attrs = [] tagfind = regex.compile('[a-zA-Z][a-zA-Z0-9]*') - # XXX Should also support value-less attributes (e.g. ISMAP) - # XXX Should use regex.group() attrfind = regex.compile( '[ \t\n]+\([a-zA-Z][a-zA-Z0-9]*\)' + '\([ \t\n]*=[ \t\n]*' + @@ -187,18 +185,12 @@ class SGMLParser: while k < j: l = attrfind.match(rawdata, k) if l < 0: break - regs = attrfind.regs - a1, b1 = regs[1] - a2, b2 = regs[2] - a3, b3 = regs[3] - attrname = rawdata[a1:b1] - if '=' in rawdata[k:k+l]: - attrvalue = rawdata[a3:b3] - if attrvalue[:1] == '\'' == attrvalue[-1:] or \ - attrvalue[:1] == '"' == attrvalue[-1:]: - attrvalue = attrvalue[1:-1] - else: - attrvalue = '' + attrname, rest, attrvalue = attrfind.group(1, 2, 3) + if not rest: + attrvalue = attrname + elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ + attrvalue[:1] == '"' == attrvalue[-1:]: + attrvalue = attrvalue[1:-1] attrs.append((string.lower(attrname), attrvalue)) k = k + l j = j+1 @@ -226,6 +218,8 @@ class SGMLParser: except AttributeError: self.unknown_endtag(tag) return + # XXX Should invoke end methods when popping their + # XXX stack entry, not when encountering the tag! if self.stack and self.stack[-1] == tag: del self.stack[-1] else: |