This patch looks large, but it just deletes the ^M characters and

untabifies the files. No actual code changes were made.
author: Andrew M. Kuchling <amk@amk.ca> 2000-04-02 05:22:30 (GMT)
committer: Andrew M. Kuchling <amk@amk.ca> 2000-04-02 05:22:30 (GMT)
commit: e3ba931aa439fdfe69f1c1c750ce06427f5f607b (patch)
tree: 1c8ad813f05538babecd17d7eed923cabe79a543
parent: e188d52a7ec912c9752fdb1e9c1b97c5848f0889 (diff)
download: cpython-e3ba931aa439fdfe69f1c1c750ce06427f5f607b.zip
cpython-e3ba931aa439fdfe69f1c1c750ce06427f5f607b.tar.gz
cpython-e3ba931aa439fdfe69f1c1c750ce06427f5f607b.tar.bz2
3 files changed, 482 insertions, 482 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 3e9700b..600b237 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -26,7 +26,7 @@ from sre_constants import *
 # find an array type code that matches the engine's code size
 for WORDSIZE in "BHil":
     if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
-	break
+        break
 else:
     raise RuntimeError, "cannot find a useable array type"
 
@@ -34,18 +34,18 @@ else:
 
 class Code:
     def __init__(self):
-	self.data = []
+        self.data = []
     def __len__(self):
-	return len(self.data)
+        return len(self.data)
     def __getitem__(self, index):
-	return self.data[index]
+        return self.data[index]
     def __setitem__(self, index, code):
-	self.data[index] = code
+        self.data[index] = code
     def append(self, code):
-	self.data.append(code)
+        self.data.append(code)
     def todata(self):
-	# print self.data
-	return array.array(WORDSIZE, self.data).tostring()
+        # print self.data
+        return array.array(WORDSIZE, self.data).tostring()
 
 def _lower(literal):
     # return _sre._lower(literal) # FIXME
@@ -54,122 +54,122 @@ def _lower(literal):
 def _compile(code, pattern, flags):
     append = code.append
     for op, av in pattern:
-	if op is ANY:
-	    if "s" in flags:
-		append(CODES[op]) # any character at all!
-	    else:
-		append(CODES[NOT_LITERAL])
-		append(10)
-	elif op in (SUCCESS, FAILURE):
-	    append(CODES[op])
-	elif op is AT:
-	    append(CODES[op])
-	    append(POSITIONS[av])
-	elif op is BRANCH:
-	    append(CODES[op])
-	    tail = []
-	    for av in av[1]:
-		skip = len(code); append(0)
-		_compile(code, av, flags)
-		append(CODES[JUMP])
-		tail.append(len(code)); append(0)
-		code[skip] = len(code) - skip
-	    append(0) # end of branch
-	    for tail in tail:
-		code[tail] = len(code) - tail
-	elif op is CALL:
-	    append(CODES[op])
-	    skip = len(code); append(0)
-	    _compile(code, av, flags)
-	    append(CODES[SUCCESS])
-	    code[skip] = len(code) - skip
-	elif op is CATEGORY: # not used by current parser
-	    append(CODES[op])
-	    append(CATEGORIES[av])
-	elif op is GROUP:
-	    if "i" in flags:
-		append(CODES[MAP_IGNORE[op]])
-	    else:
-		append(CODES[op])
-	    append(av)
-	elif op is IN:
-	    if "i" in flags:
-		append(CODES[MAP_IGNORE[op]])
-		def fixup(literal):
-		    return ord(_lower(literal))
-	    else:
-		append(CODES[op])
-		fixup = ord
-	    skip = len(code); append(0)
-	    for op, av in av:
-		append(CODES[op])
-		if op is NEGATE:
-		    pass
-		elif op is LITERAL:
-		    append(fixup(av))
-		elif op is RANGE:
-		    append(fixup(av[0]))
-		    append(fixup(av[1]))
-		elif op is CATEGORY:
-		    append(CATEGORIES[av])
-		else:
-		    raise ValueError, "unsupported set operator"
-	    append(CODES[FAILURE])
-	    code[skip] = len(code) - skip
-	elif op in (LITERAL, NOT_LITERAL):
-	    if "i" in flags:
-		append(CODES[MAP_IGNORE[op]])
-		append(ord(_lower(av)))
-	    else:
-		append(CODES[op])
-		append(ord(av))
-	elif op is MARK:
-	    append(CODES[op])
-	    append(av)
- 	elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
-	    lo, hi = av[2].getwidth()
- 	    if lo == 0:
- 		raise SyntaxError, "cannot repeat zero-width items"
-	    if lo == hi == 1 and op is MAX_REPEAT:
-		append(CODES[MAX_REPEAT_ONE])
-		skip = len(code); append(0)
-		append(av[0])
-		append(av[1])
-		_compile(code, av[2], flags)
-		append(CODES[SUCCESS])
-		code[skip] = len(code) - skip
-	    else:
-		append(CODES[op])
-		skip = len(code); append(0)
-		append(av[0])
-		append(av[1])
-		_compile(code, av[2], flags)
-		if op is MIN_REPEAT:
-		    append(CODES[MIN_UNTIL])
-		else:
-		    # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
-		    append(CODES[MAX_UNTIL])
-		code[skip] = len(code) - skip
-	elif op is SUBPATTERN:
-## 	    group = av[0]
-## 	    if group:
-## 		append(CODES[MARK])
-## 		append((group-1)*2)
-	    _compile(code, av[1], flags)
-## 	    if group:
-## 		append(CODES[MARK])
-## 		append((group-1)*2+1)
-	else:
-	    raise ValueError, ("unsupported operand type", op)
+        if op is ANY:
+            if "s" in flags:
+                append(CODES[op]) # any character at all!
+            else:
+                append(CODES[NOT_LITERAL])
+                append(10)
+        elif op in (SUCCESS, FAILURE):
+            append(CODES[op])
+        elif op is AT:
+            append(CODES[op])
+            append(POSITIONS[av])
+        elif op is BRANCH:
+            append(CODES[op])
+            tail = []
+            for av in av[1]:
+                skip = len(code); append(0)
+                _compile(code, av, flags)
+                append(CODES[JUMP])
+                tail.append(len(code)); append(0)
+                code[skip] = len(code) - skip
+            append(0) # end of branch
+            for tail in tail:
+                code[tail] = len(code) - tail
+        elif op is CALL:
+            append(CODES[op])
+            skip = len(code); append(0)
+            _compile(code, av, flags)
+            append(CODES[SUCCESS])
+            code[skip] = len(code) - skip
+        elif op is CATEGORY: # not used by current parser
+            append(CODES[op])
+            append(CATEGORIES[av])
+        elif op is GROUP:
+            if "i" in flags:
+                append(CODES[MAP_IGNORE[op]])
+            else:
+                append(CODES[op])
+            append(av)
+        elif op is IN:
+            if "i" in flags:
+                append(CODES[MAP_IGNORE[op]])
+                def fixup(literal):
+                    return ord(_lower(literal))
+            else:
+                append(CODES[op])
+                fixup = ord
+            skip = len(code); append(0)
+            for op, av in av:
+                append(CODES[op])
+                if op is NEGATE:
+                    pass
+                elif op is LITERAL:
+                    append(fixup(av))
+                elif op is RANGE:
+                    append(fixup(av[0]))
+                    append(fixup(av[1]))
+                elif op is CATEGORY:
+                    append(CATEGORIES[av])
+                else:
+                    raise ValueError, "unsupported set operator"
+            append(CODES[FAILURE])
+            code[skip] = len(code) - skip
+        elif op in (LITERAL, NOT_LITERAL):
+            if "i" in flags:
+                append(CODES[MAP_IGNORE[op]])
+                append(ord(_lower(av)))
+            else:
+                append(CODES[op])
+                append(ord(av))
+        elif op is MARK:
+            append(CODES[op])
+            append(av)
+        elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
+            lo, hi = av[2].getwidth()
+            if lo == 0:
+                raise SyntaxError, "cannot repeat zero-width items"
+            if lo == hi == 1 and op is MAX_REPEAT:
+                append(CODES[MAX_REPEAT_ONE])
+                skip = len(code); append(0)
+                append(av[0])
+                append(av[1])
+                _compile(code, av[2], flags)
+                append(CODES[SUCCESS])
+                code[skip] = len(code) - skip
+            else:
+                append(CODES[op])
+                skip = len(code); append(0)
+                append(av[0])
+                append(av[1])
+                _compile(code, av[2], flags)
+                if op is MIN_REPEAT:
+                    append(CODES[MIN_UNTIL])
+                else:
+                    # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
+                    append(CODES[MAX_UNTIL])
+                code[skip] = len(code) - skip
+        elif op is SUBPATTERN:
+##          group = av[0]
+##          if group:
+##              append(CODES[MARK])
+##              append((group-1)*2)
+            _compile(code, av[1], flags)
+##          if group:
+##              append(CODES[MARK])
+##              append((group-1)*2+1)
+        else:
+            raise ValueError, ("unsupported operand type", op)
 
 def compile(p, flags=()):
     # convert pattern list to internal format
     if type(p) is type(""):
-	import sre_parse
-	pattern = p
-	p = sre_parse.parse(p)
+        import sre_parse
+        pattern = p
+        p = sre_parse.parse(p)
     else:
-	pattern = None
+        pattern = None
     # print p.getwidth()
     # print p
     code = Code()
@@ -178,10 +178,10 @@ def compile(p, flags=()):
     # print list(code.data)
     data = code.todata()
     if 0: # debugging
-	print
-	print "-" * 68
-	import sre_disasm
-	sre_disasm.disasm(data)
-	print "-" * 68
+        print
+        print "-" * 68
+        import sre_disasm
+        sre_disasm.disasm(data)
+        print "-" * 68
     # print len(data), p.pattern.groups, len(p.pattern.groupdict)
     return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index af88309..f05c797 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -126,6 +126,6 @@ if __name__ == "__main__":
     f = open("sre_constants.h", "w")
     f.write("/* generated by sre_constants.py */\n")
     for k, v in items:
-	f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
+        f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
     f.close()
     print "done"
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index a563de3..db4c500 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -55,168 +55,168 @@ CATEGORIES = {
 class Pattern:
     # FIXME: <fl> rename class, and store flags in here too!
     def __init__(self):
-	self.flags = []
-	self.groups = 1
-	self.groupdict = {}
+        self.flags = []
+        self.groups = 1
+        self.groupdict = {}
     def getgroup(self, name=None):
-	gid = self.groups
-	self.groups = gid + 1
-	if name:
-	    self.groupdict[name] = gid
-	return gid
+        gid = self.groups
+        self.groups = gid + 1
+        if name:
+            self.groupdict[name] = gid
+        return gid
     def setflag(self, flag):
-	if flag not in self.flags:
-	    self.flags.append(flag)
+        if flag not in self.flags:
+            self.flags.append(flag)
 
 class SubPattern:
     # a subpattern, in intermediate form
     def __init__(self, pattern, data=None):
-	self.pattern = pattern
-	if not data:
-	    data = []
-	self.data = data
-	self.flags = []
-	self.width = None
+        self.pattern = pattern
+        if not data:
+            data = []
+        self.data = data
+        self.flags = []
+        self.width = None
     def __repr__(self):
-	return repr(self.data)
+        return repr(self.data)
     def __len__(self):
-	return len(self.data)
+        return len(self.data)
     def __delitem__(self, index):
-	del self.data[index]
+        del self.data[index]
     def __getitem__(self, index):
-	return self.data[index]
+        return self.data[index]
     def __setitem__(self, index, code):
-	self.data[index] = code
+        self.data[index] = code
     def __getslice__(self, start, stop):
-	return SubPattern(self.pattern, self.data[start:stop])
+        return SubPattern(self.pattern, self.data[start:stop])
     def insert(self, index, code):
-	self.data.insert(index, code)
+        self.data.insert(index, code)
     def append(self, code):
-	self.data.append(code)
+        self.data.append(code)
     def getwidth(self):
-	# determine the width (min, max) for this subpattern
-	if self.width:
-	    return self.width
-	lo = hi = 0L
-	for op, av in self.data:
-	    if op is BRANCH:
-		l = sys.maxint
-		h = 0
-		for av in av[1]:
-		    i, j = av.getwidth()
-		    l = min(l, i)
-		    h = min(h, j)
-		lo = lo + i
-		hi = hi + j
-	    elif op is CALL:
-		i, j = av.getwidth()
-		lo = lo + i
-		hi = hi + j
-	    elif op is SUBPATTERN:
-		i, j = av[1].getwidth()
-		lo = lo + i
-		hi = hi + j
-	    elif op in (MIN_REPEAT, MAX_REPEAT):
-		i, j = av[2].getwidth()
-		lo = lo + i * av[0]
-		hi = hi + j * av[1]
-	    elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
-		lo = lo + 1
-		hi = hi + 1
-	    elif op == SUCCESS:
-		break
-	self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
-	return self.width
+        # determine the width (min, max) for this subpattern
+        if self.width:
+            return self.width
+        lo = hi = 0L
+        for op, av in self.data:
+            if op is BRANCH:
+                l = sys.maxint
+                h = 0
+                for av in av[1]:
+                    i, j = av.getwidth()
+                    l = min(l, i)
+                    h = min(h, j)
+                lo = lo + i
+                hi = hi + j
+            elif op is CALL:
+                i, j = av.getwidth()
+                lo = lo + i
+                hi = hi + j
+            elif op is SUBPATTERN:
+                i, j = av[1].getwidth()
+                lo = lo + i
+                hi = hi + j
+            elif op in (MIN_REPEAT, MAX_REPEAT):
+                i, j = av[2].getwidth()
+                lo = lo + i * av[0]
+                hi = hi + j * av[1]
+            elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
+                lo = lo + 1
+                hi = hi + 1
+            elif op == SUCCESS:
+                break
+        self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
+        return self.width
     def set(self, flag):
-	if not flag in self.flags:
-	    self.flags.append(flag)
+        if not flag in self.flags:
+            self.flags.append(flag)
     def reset(self, flag):
-	if flag in self.flags:
-	    self.flags.remove(flag)
+        if flag in self.flags:
+            self.flags.remove(flag)
 
 class Tokenizer:
     def __init__(self, string):
-	self.string = list(string)
-	self.next = self.__next()
+        self.string = list(string)
+        self.next = self.__next()
     def __next(self):
-	if not self.string:
-	    return None
-	char = self.string[0]
-	if char[0] == "\\":
-	    try:
-		c = self.string[1]
-	    except IndexError:
-		raise SyntaxError, "bogus escape"
-	    char = char + c
-	    try:
-		if c == "x":
-		    # hexadecimal constant
-		    for i in xrange(2, sys.maxint):
-			c = self.string[i]
-			if c not in HEXDIGITS:
-			    break
-			char = char + c
-		elif c in string.digits:
-		    # decimal (or octal) number
-		    for i in xrange(2, sys.maxint):
-			c = self.string[i]
-			# FIXME: if larger than current number of
-			# groups, interpret as an octal number 
-			if c not in string.digits:
-			    break
-			char = char + c
-	    except IndexError:
-		pass # use what we've got this far
-	del self.string[0:len(char)]
-	return char
+        if not self.string:
+            return None
+        char = self.string[0]
+        if char[0] == "\\":
+            try:
+                c = self.string[1]
+            except IndexError:
+                raise SyntaxError, "bogus escape"
+            char = char + c
+            try:
+                if c == "x":
+                    # hexadecimal constant
+                    for i in xrange(2, sys.maxint):
+                        c = self.string[i]
+                        if c not in HEXDIGITS:
+                            break
+                        char = char + c
+                elif c in string.digits:
+                    # decimal (or octal) number
+                    for i in xrange(2, sys.maxint):
+                        c = self.string[i]
+                        # FIXME: if larger than current number of
+                        # groups, interpret as an octal number 
+                        if c not in string.digits:
+                            break
+                        char = char + c
+            except IndexError:
+                pass # use what we've got this far
+        del self.string[0:len(char)]
+        return char
     def match(self, char):
-	if char == self.next:
-	    self.next = self.__next()
-	    return 1
-	return 0
+        if char == self.next:
+            self.next = self.__next()
+            return 1
+        return 0
     def match_set(self, set):
-	if self.next in set:
-	    self.next = self.__next()
-	    return 1
-	return 0
+        if self.next in set:
+            self.next = self.__next()
+            return 1
+        return 0
     def get(self):
-	this = self.next
-	self.next = self.__next()
-	return this
+        this = self.next
+        self.next = self.__next()
+        return this
 
 def _fixescape(escape, character_class=0):
     # convert escape to (type, value)
     if character_class:
-	# inside a character class, we'll look in the character
-	# escapes dictionary first
-	code = ESCAPES.get(escape)
-	if code:
-	    return code
-	code = CATEGORIES.get(escape)
+        # inside a character class, we'll look in the character
+        # escapes dictionary first
+        code = ESCAPES.get(escape)
+        if code:
+            return code
+        code = CATEGORIES.get(escape)
     else:
-	code = CATEGORIES.get(escape)
-	if code:
-	    return code
-	code = ESCAPES.get(escape)
+        code = CATEGORIES.get(escape)
+        if code:
+            return code
+        code = ESCAPES.get(escape)
     if code:
-	return code
+        return code
     if not character_class:
-	try:
-	    group = int(escape[1:])
-	    # FIXME: only valid if group <= current number of groups
-	    return GROUP, group
-	except ValueError:
-	    pass
+        try:
+            group = int(escape[1:])
+            # FIXME: only valid if group <= current number of groups
+            return GROUP, group
+        except ValueError:
+            pass
     try:
-	if escape[1:2] == "x":
-	    escape = escape[2:]
-	    return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff)
-	elif escape[1:2] in string.digits:
-	    return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff)
-	elif len(escape) == 2:
-	    return LITERAL, escape[1]
+        if escape[1:2] == "x":
+            escape = escape[2:]
+            return LITERAL, chr(string.atoi(escape[-2:], 16) & 0xff)
+        elif escape[1:2] in string.digits:
+            return LITERAL, chr(string.atoi(escape[1:], 8) & 0xff)
+        elif len(escape) == 2:
+            return LITERAL, escape[1]
     except ValueError:
-	pass
+        pass
     raise SyntaxError, "bogus escape: %s" % repr(escape)
 
 def _branch(subpattern, items):
@@ -226,35 +226,35 @@ def _branch(subpattern, items):
 
     # check if all items share a common prefix
     while 1:
-	prefix = None
-	for item in items:
-	    if not item:
-		break
-	    if prefix is None:
-		prefix = item[0]
-	    elif item[0] != prefix:
-		break
-	else:
-	    # all subitems start with a common "prefix".
-	    # move it out of the branch
-	    for item in items:
-		del item[0]
-	    subpattern.append(prefix)
-	    continue # check next one
-	break
+        prefix = None
+        for item in items:
+            if not item:
+                break
+            if prefix is None:
+                prefix = item[0]
+            elif item[0] != prefix:
+                break
+        else:
+            # all subitems start with a common "prefix".
+            # move it out of the branch
+            for item in items:
+                del item[0]
+            subpattern.append(prefix)
+            continue # check next one
+        break
 
     # check if the branch can be replaced by a character set
     for item in items:
-	if len(item) != 1 or item[0][0] != LITERAL:
-	    break
+        if len(item) != 1 or item[0][0] != LITERAL:
+            break
     else:
-	# we can store this as a character set instead of a
-	# branch (FIXME: use a range if possible)
-	set = []
-	for item in items:
-	    set.append(item[0])
-	subpattern.append((IN, set))
-	return
+        # we can store this as a character set instead of a
+        # branch (FIXME: use a range if possible)
+        set = []
+        for item in items:
+            set.append(item[0])
+        subpattern.append((IN, set))
+        return
 
     subpattern.append((BRANCH, (None, items)))
 
@@ -268,178 +268,178 @@ def _parse(source, pattern, flags=()):
 
     while 1:
 
-	if source.next in ("|", ")"):
-	    break # end of subpattern
-	this = source.get()
-	if this is None:
-	    break # end of pattern
-
-	if this and this[0] not in SPECIAL_CHARS:
-	    subpattern.append((LITERAL, this))
-
-	elif this == "[":
-	    # character set
-	    set = []
-## 	    if source.match(":"):
-## 		pass # handle character classes
-	    if source.match("^"):
-		set.append((NEGATE, None))
-	    # check remaining characters
-	    start = set[:]
-	    while 1:
-		this = source.get()
-		if this == "]" and set != start:
-		    break
-		elif this and this[0] == "\\":
-		    code1 = _fixescape(this, 1)
-		elif this:
-		    code1 = LITERAL, this
-		else:
-		    raise SyntaxError, "unexpected end of regular expression"
-		if source.match("-"):
-		    # potential range
-		    this = source.get()
-		    if this == "]":
-			set.append(code1)
-			set.append((LITERAL, "-"))
-			break
-		    else:
-			if this[0] == "\\":
-			    code2 = _fixescape(this, 1)
-			else:
-			    code2 = LITERAL, this
-			if code1[0] != LITERAL or code2[0] != LITERAL:
-			    raise SyntaxError, "illegal range"
-			if len(code1[1]) != 1 or len(code2[1]) != 1:
-			    raise SyntaxError, "illegal range"
-			set.append((RANGE, (code1[1], code2[1])))
-		else:
-		    if code1[0] is IN:
-			code1 = code1[1][0]
-		    set.append(code1)
-
-	    # FIXME: <fl> move set optimization to support function
-	    if len(set)==1 and set[0][0] is LITERAL:
-		subpattern.append(set[0]) # optimization
-	    elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
-		subpattern.append((NOT_LITERAL, set[1][1])) # optimization
-	    else:
-		# FIXME: <fl> add charmap optimization
-		subpattern.append((IN, set))
-
-	elif this and this[0] in REPEAT_CHARS:
-	    # repeat previous item
-	    if this == "?":
-		min, max = 0, 1
-	    elif this == "*":
-		min, max = 0, sys.maxint
-	    elif this == "+":
-		min, max = 1, sys.maxint
-	    elif this == "{":
-		min, max = 0, sys.maxint
-		lo = hi = ""
-		while source.next in string.digits:
-		    lo = lo + source.get()
-		if source.match(","):
-		    while source.next in string.digits:
-			hi = hi + source.get()
-		else:
-		    hi = lo
-		if not source.match("}"):
-		    raise SyntaxError, "bogus range"
-		if lo:
-		    min = int(lo)
-		if hi:
-		    max = int(hi)
-		# FIXME: <fl> check that hi >= lo!
-	    else:
-		raise SyntaxError, "not supported"
-	    # figure out which item to repeat
-	    # FIXME: should back up to the right mark, right?
-	    if subpattern:
-		index = len(subpattern)-1
-		while subpattern[index][0] is MARK:
-		    index = index - 1
-		item = subpattern[index:index+1]
-	    else:
-		raise SyntaxError, "nothing to repeat"
-	    if source.match("?"):
-		subpattern[index] = (MIN_REPEAT, (min, max, item))
-	    else:
-		subpattern[index] = (MAX_REPEAT, (min, max, item))
-	elif this == ".":
-	    subpattern.append((ANY, None))
-	elif this == "(":
-	    group = 1
-	    name = None
-	    if source.match("?"):
-		group = 0
-		# options
-		if source.match("P"):
-		    # named group: skip forward to end of name
-		    if source.match("<"):
-			name = ""
-			while 1:
-			    char = source.get()
-			    if char in (">", None):
-				break
-			    name = name + char
-			group = 1
-		elif source.match(":"):
-		    # non-capturing group
-		    group = 2
-		elif source.match_set("iI"):
-		    pattern.setflag("i")
-		elif source.match_set("lL"):
-		    pattern.setflag("l")
-		elif source.match_set("mM"):
-		    pattern.setflag("m")
-		elif source.match_set("sS"):
-		    pattern.setflag("s")
-		elif source.match_set("xX"):
-		    pattern.setflag("x")
-	    if group:
-		# parse group contents
-		b = []
-		if group == 2:
-		    # anonymous group
-		    group = None
-		else:
-		    group = pattern.getgroup(name)
- 		if group:
- 		    subpattern.append((MARK, (group-1)*2))
-		while 1:
-		    p = _parse(source, pattern, flags)
-		    if source.match(")"):
-			if b:
-			    b.append(p)
-			    _branch(subpattern, b)
-			else:
-			    subpattern.append((SUBPATTERN, (group, p)))
-			break
-		    elif source.match("|"):
-			b.append(p)
-		    else:
-			raise SyntaxError, "group not properly closed"
- 		if group:
- 		    subpattern.append((MARK, (group-1)*2+1))
-	    else:
-		# FIXME: should this really be a while loop?
- 		while source.get() not in (")", None):
- 		    pass
-
-	elif this == "^":
-	    subpattern.append((AT, AT_BEGINNING))
-
-	elif this == "$":
-	    subpattern.append((AT, AT_END))
-
-	elif this and this[0] == "\\":
-	    code =_fixescape(this)
-	    subpattern.append(code)
-
-	else:
-	    raise SyntaxError, "parser error"
+        if source.next in ("|", ")"):
+            break # end of subpattern
+        this = source.get()
+        if this is None:
+            break # end of pattern
+
+        if this and this[0] not in SPECIAL_CHARS:
+            subpattern.append((LITERAL, this))
+
+        elif this == "[":
+            # character set
+            set = []
+##          if source.match(":"):
+##              pass # handle character classes
+            if source.match("^"):
+                set.append((NEGATE, None))
+            # check remaining characters
+            start = set[:]
+            while 1:
+                this = source.get()
+                if this == "]" and set != start:
+                    break
+                elif this and this[0] == "\\":
+                    code1 = _fixescape(this, 1)
+                elif this:
+                    code1 = LITERAL, this
+                else:
+                    raise SyntaxError, "unexpected end of regular expression"
+                if source.match("-"):
+                    # potential range
+                    this = source.get()
+                    if this == "]":
+                        set.append(code1)
+                        set.append((LITERAL, "-"))
+                        break
+                    else:
+                        if this[0] == "\\":
+                            code2 = _fixescape(this, 1)
+                        else:
+                            code2 = LITERAL, this
+                        if code1[0] != LITERAL or code2[0] != LITERAL:
+                            raise SyntaxError, "illegal range"
+                        if len(code1[1]) != 1 or len(code2[1]) != 1:
+                            raise SyntaxError, "illegal range"
+                        set.append((RANGE, (code1[1], code2[1])))
+                else:
+                    if code1[0] is IN:
+                        code1 = code1[1][0]
+                    set.append(code1)
+
+            # FIXME: <fl> move set optimization to support function
+            if len(set)==1 and set[0][0] is LITERAL:
+                subpattern.append(set[0]) # optimization
+            elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
+                subpattern.append((NOT_LITERAL, set[1][1])) # optimization
+            else:
+                # FIXME: <fl> add charmap optimization
+                subpattern.append((IN, set))
+
+        elif this and this[0] in REPEAT_CHARS:
+            # repeat previous item
+            if this == "?":
+                min, max = 0, 1
+            elif this == "*":
+                min, max = 0, sys.maxint
+            elif this == "+":
+                min, max = 1, sys.maxint
+            elif this == "{":
+                min, max = 0, sys.maxint
+                lo = hi = ""
+                while source.next in string.digits:
+                    lo = lo + source.get()
+                if source.match(","):
+                    while source.next in string.digits:
+                        hi = hi + source.get()
+                else:
+                    hi = lo
+                if not source.match("}"):
+                    raise SyntaxError, "bogus range"
+                if lo:
+                    min = int(lo)
+                if hi:
+                    max = int(hi)
+                # FIXME: <fl> check that hi >= lo!
+            else:
+                raise SyntaxError, "not supported"
+            # figure out which item to repeat
+            # FIXME: should back up to the right mark, right?
+            if subpattern:
+                index = len(subpattern)-1
+                while subpattern[index][0] is MARK:
+                    index = index - 1
+                item = subpattern[index:index+1]
+            else:
+                raise SyntaxError, "nothing to repeat"
+            if source.match("?"):
+                subpattern[index] = (MIN_REPEAT, (min, max, item))
+            else:
+                subpattern[index] = (MAX_REPEAT, (min, max, item))
+        elif this == ".":
+            subpattern.append((ANY, None))
+        elif this == "(":
+            group = 1
+            name = None
+            if source.match("?"):
+                group = 0
+                # options
+                if source.match("P"):
+                    # named group: skip forward to end of name
+                    if source.match("<"):
+                        name = ""
+                        while 1:
+                            char = source.get()
+                            if char in (">", None):
+                                break
+                            name = name + char
+                        group = 1
+                elif source.match(":"):
+                    # non-capturing group
+                    group = 2
+                elif source.match_set("iI"):
+                    pattern.setflag("i")
+                elif source.match_set("lL"):
+                    pattern.setflag("l")
+                elif source.match_set("mM"):
+                    pattern.setflag("m")
+                elif source.match_set("sS"):
+                    pattern.setflag("s")
+                elif source.match_set("xX"):
+                    pattern.setflag("x")
+            if group:
+                # parse group contents
+                b = []
+                if group == 2:
+                    # anonymous group
+                    group = None
+                else:
+                    group = pattern.getgroup(name)
+                if group:
+                    subpattern.append((MARK, (group-1)*2))
+                while 1:
+                    p = _parse(source, pattern, flags)
+                    if source.match(")"):
+                        if b:
+                            b.append(p)
+                            _branch(subpattern, b)
+                        else:
+                            subpattern.append((SUBPATTERN, (group, p)))
+                        break
+                    elif source.match("|"):
+                        b.append(p)
+                    else:
+                        raise SyntaxError, "group not properly closed"
+                if group:
+                    subpattern.append((MARK, (group-1)*2+1))
+            else:
+                # FIXME: should this really be a while loop?
+                while source.get() not in (")", None):
+                    pass
+
+        elif this == "^":
+            subpattern.append((AT, AT_BEGINNING))
+
+        elif this == "$":
+            subpattern.append((AT, AT_END))
+
+        elif this and this[0] == "\\":
+            code =_fixescape(this)
+            subpattern.append(code)
+
+        else:
+            raise SyntaxError, "parser error"
 
     return subpattern
 
@@ -448,20 +448,20 @@ def parse(source, flags=()):
     g = Pattern()
     b = []
     while 1:
-	p = _parse(s, g, flags)
-	tail = s.get()
-	if tail == "|":
-	    b.append(p)
-	elif tail == ")":
-	    raise SyntaxError, "unbalanced parenthesis"
-	elif tail is None:
-	    if b:
-		b.append(p)
-		p = SubPattern(g)
-		_branch(p, b)
-	    break
-	else:
-	    raise SyntaxError, "bogus characters at end of regular expression"
+        p = _parse(s, g, flags)
+        tail = s.get()
+        if tail == "|":
+            b.append(p)
+        elif tail == ")":
+            raise SyntaxError, "unbalanced parenthesis"
+        elif tail is None:
+            if b:
+                b.append(p)
+                p = SubPattern(g)
+                _branch(p, b)
+            break
+        else:
+            raise SyntaxError, "bogus characters at end of regular expression"
     return p
 
 if __name__ == "__main__":
@@ -469,23 +469,23 @@ if __name__ == "__main__":
     from testpatterns import PATTERNS
     a = b = c = 0
     for pattern, flags in PATTERNS:
-	if flags:
-	    continue
-	print "-"*68
-	try:
-	    p = parse(pattern)
-	    print repr(pattern), "->"
-	    pprint(p.data)
-	    import sre_compile
-	    try:
-		code = sre_compile.compile(p)
-		c = c + 1
-	    except:
-		pass
-	    a = a + 1
-	except SyntaxError, v:
-	    print "**", repr(pattern), v
-	b = b + 1
+        if flags:
+            continue
+        print "-"*68
+        try:
+            p = parse(pattern)
+            print repr(pattern), "->"
+            pprint(p.data)
+            import sre_compile
+            try:
+                code = sre_compile.compile(p)
+                c = c + 1
+            except:
+                pass
+            a = a + 1
+        except SyntaxError, v:
+            print "**", repr(pattern), v
+        b = b + 1
     print "-"*68
     print a, "of", b, "patterns successfully parsed"
     print c, "of", b, "patterns successfully compiled"
author	Andrew M. Kuchling <amk@amk.ca>	2000-04-02 05:22:30 (GMT)
committer	Andrew M. Kuchling <amk@amk.ca>	2000-04-02 05:22:30 (GMT)
commit	e3ba931aa439fdfe69f1c1c750ce06427f5f607b (patch)
tree	1c8ad813f05538babecd17d7eed923cabe79a543
parent	e188d52a7ec912c9752fdb1e9c1b97c5848f0889 (diff)
download	cpython-e3ba931aa439fdfe69f1c1c750ce06427f5f607b.zip cpython-e3ba931aa439fdfe69f1c1c750ce06427f5f607b.tar.gz cpython-e3ba931aa439fdfe69f1c1c750ce06427f5f607b.tar.bz2