summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1998-05-28 22:48:53 (GMT)
committerGuido van Rossum <guido@python.org>1998-05-28 22:48:53 (GMT)
commit1ad00717fb90ebfe1bb7727f9a89b1b06ebf9e5f (patch)
tree2dfe14bc7499fa5921763270f3a609f7b5087f55
parentae621ff7b7226d093f944fbf27876e6750e768fc (diff)
downloadcpython-1ad00717fb90ebfe1bb7727f9a89b1b06ebf9e5f.zip
cpython-1ad00717fb90ebfe1bb7727f9a89b1b06ebf9e5f.tar.gz
cpython-1ad00717fb90ebfe1bb7727f9a89b1b06ebf9e5f.tar.bz2
Patch by Lars Marius Garshol:
- Handle <? processing instructions >. - Allow . and - in entity names. Also fixed an oversight in the previous fix (in one place, [ \t\r\n] was used instead of string.whitespace).
-rw-r--r--Lib/sgmllib.py32
1 files changed, 30 insertions, 2 deletions
diff --git a/Lib/sgmllib.py b/Lib/sgmllib.py
index 956341c..6817608 100644
--- a/Lib/sgmllib.py
+++ b/Lib/sgmllib.py
@@ -20,12 +20,14 @@ incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
'/([a-zA-Z][^<>]*)?|'
'![^<>]*)?')
-entityref = re.compile('&([a-zA-Z][a-zA-Z0-9]*)[^a-zA-Z0-9]')
+entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#([0-9]+)[^0-9]')
starttagopen = re.compile('<[>a-zA-Z]')
shorttagopen = re.compile('<[a-zA-Z][a-zA-Z0-9]*/')
shorttag = re.compile('<([a-zA-Z][a-zA-Z0-9]*)/([^/]*)/')
+piopen = re.compile('<\?')
+piclose = re.compile('>')
endtagopen = re.compile('</[<>a-zA-Z]')
endbracket = re.compile('[<>]')
special = re.compile('<![^<>]*>')
@@ -33,7 +35,7 @@ commentopen = re.compile('<!--')
commentclose = re.compile('--[%s]*>' % string.whitespace)
tagfind = re.compile('[a-zA-Z][a-zA-Z0-9]*')
attrfind = re.compile(
- '[ \t\n\r]+([a-zA-Z_][-.a-zA-Z_0-9]*)'
+ '[%s]+([a-zA-Z_][-.a-zA-Z_0-9]*)' % string.whitespace
+ ('([%s]*=[%s]*' % (string.whitespace, string.whitespace))
+ r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:+*%?!\(\)_#=~]*))?')
@@ -127,6 +129,15 @@ class SGMLParser:
if k < 0: break
i = i+k
continue
+ if piopen.match(rawdata, i):
+ if self.literal:
+ self.handle_data(rawdata[i])
+ i = i+1
+ continue
+ k = self.parse_pi(i)
+ if k < 0: break
+ i = i+k
+ continue
match = special.match(rawdata, i)
if match:
if self.literal:
@@ -184,6 +195,19 @@ class SGMLParser:
j = match.end(0)
return j-i
+ # Internal -- parse processing instr, return length or -1 if not terminated
+ def parse_pi(self, i):
+ rawdata = self.rawdata
+ if rawdata[i:i+2] <> '<?':
+ raise RuntimeError, 'unexpected call to handle_pi'
+ match = piclose.search(rawdata, i+2)
+ if not match:
+ return -1
+ j = match.start(0)
+ self.handle_pi(rawdata[i+2: j])
+ j = match.end(0)
+ return j-i
+
# Internal -- handle starttag, return length or -1 if not terminated
def parse_starttag(self, i):
rawdata = self.rawdata
@@ -348,6 +372,10 @@ class SGMLParser:
def handle_comment(self, data):
pass
+ # Example -- handle processing instruction, could be overridden
+ def handle_pi(self, data):
+ pass
+
# To be overridden -- handlers for unknown objects
def unknown_starttag(self, tag, attrs): pass
def unknown_endtag(self, tag): pass