From eeb2f32aad5c080dbc80705d5b3fc807c0c31037 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 19 Oct 1998 13:28:26 +0000 Subject: Fixed a problem where xmllib didn't handle the tag correctly. Also changed comparisons of re matches and searches into explicit comparisons with None. (Sjoerd Mullender) --- Lib/xmllib.py | 115 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 60 insertions(+), 55 deletions(-) diff --git a/Lib/xmllib.py b/Lib/xmllib.py index 355714f..06dc373 100644 --- a/Lib/xmllib.py +++ b/Lib/xmllib.py @@ -9,9 +9,10 @@ version = '0.1' # Regular expressions used for parsing -_S = '[ \t\r\n]+' -_opS = '[ \t\r\n]*' -_Name = '[a-zA-Z_:][-a-zA-Z0-9._:]*' +_S = '[ \t\r\n]+' # white space +_opS = '[ \t\r\n]*' # optional white space +_Name = '[a-zA-Z_:][-a-zA-Z0-9._:]*' # valid XML name +_QStr = "(?:'[^']*'|\"[^\"]*\")" # quoted XML string illegal = re.compile('[^\t\r\n -\176\240-\377]') # illegal chars in content interesting = re.compile('[]&<]') @@ -22,17 +23,25 @@ charref = re.compile('&#(?P[0-9]+[^0-9]|x[0-9a-fA-F]+[^0-9a-fA-F])') space = re.compile(_S + '$') newline = re.compile('\n') +attrfind = re.compile( + _S + '(?P' + _Name + ')' + '(' + _opS + '=' + _opS + + '(?P'+_QStr+'|[-a-zA-Z0-9.:+*%?!()_#=~]+))?') starttagopen = re.compile('<' + _Name) -endtagopen = re.compile('/?)>') +starttagmatch = re.compile('<(?P'+_Name+')' + '(?P(?:'+attrfind.pattern+')*)'+ + starttagend.pattern) +endtagopen = re.compile('') +endbracketfind = re.compile('(?:[^>\'"]|'+_QStr+')*>') tagfind = re.compile(_Name) cdataopen = re.compile(r'') # this matches one of the following: # SYSTEM SystemLiteral # PUBLIC PubidLiteral SystemLiteral -_SystemLiteral = '(?P<%s>\'[^\']*\'|"[^"]*")' +_SystemLiteral = '(?P<%s>'+_QStr+')' _PublicLiteral = '(?P<%s>"[-\'()+,./:=?;!*#@$_%% \n\ra-zA-Z0-9]*"|' \ "'[-()+,./:=?;!*#@$_%% \n\ra-zA-Z0-9]*')" _ExternalId = '(?:SYSTEM|' \ @@ -41,7 +50,7 @@ _ExternalId = '(?:SYSTEM|' \ doctype = re.compile(''+_Name+')' '(?:'+_S+_ExternalId+')?'+_opS) xmldecl = re.compile('<\?xml'+_S+ - 'version'+_opS+'='+_opS+'(?P\'[^\']*\'|"[^"]*")'+ + 'version'+_opS+'='+_opS+'(?P'+_QStr+')'+ '(?:'+_S+'encoding'+_opS+'='+_opS+ "(?P'[A-Za-z][-A-Za-z0-9._]*'|" '"[A-Za-z][-A-Za-z0-9._]*"))?' @@ -53,10 +62,6 @@ procclose = re.compile(_opS + r'\?>') commentopen = re.compile('') doubledash = re.compile('--') -attrfind = re.compile( - _S + '(?P' + _Name + ')' - '(' + _opS + '=' + _opS + - '(?P\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9.:+*%?!()_#=~]+))') attrtrans = string.maketrans(' \r\n\t', ' ') @@ -173,7 +178,7 @@ class XMLParser: self.syntax_error('illegal data at start of file') self.__at_start = 0 data = rawdata[i:j] - if not self.stack and not space.match(data): + if not self.stack and space.match(data) is None: self.syntax_error('data not in content') if illegal.search(data): self.syntax_error('illegal character in content') @@ -327,7 +332,7 @@ class XMLParser: if rawdata[i:i+4] <> '