summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2000-07-01 23:49:14 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2000-07-01 23:49:14 (GMT)
commitc13222cdff4373a9763b9c7df4b2e12e7e3b776f (patch)
tree7ce04dc8e3322672ab512488bbfdb362986b56a3
parent0cebe439ce26ed5a14d77771c01e50d9dcc6ad11 (diff)
downloadcpython-c13222cdff4373a9763b9c7df4b2e12e7e3b776f.zip
cpython-c13222cdff4373a9763b9c7df4b2e12e7e3b776f.tar.gz
cpython-c13222cdff4373a9763b9c7df4b2e12e7e3b776f.tar.bz2
- fixed "{ in any other context" bug
- minor comment touchups in the C module
-rw-r--r--Lib/sre_parse.py27
-rw-r--r--Modules/_sre.c5
2 files changed, 19 insertions, 13 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 0e01ad6..12f49c3 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -142,12 +142,13 @@ class SubPattern:
class Tokenizer:
def __init__(self, string):
- self.index = 0
self.string = string
- self.next = self.__next()
+ self.index = 0
+ self.__next()
def __next(self):
if self.index >= len(self.string):
- return None
+ self.next = None
+ return
char = self.string[self.index]
if char[0] == "\\":
try:
@@ -156,21 +157,20 @@ class Tokenizer:
raise error, "bogus escape"
char = char + c
self.index = self.index + len(char)
- return char
+ self.next = char
def match(self, char):
if char == self.next:
- self.next = self.__next()
- return 1
- return 0
- def match_set(self, set):
- if self.next and self.next in set:
- self.next = self.__next()
+ self.__next()
return 1
return 0
def get(self):
this = self.next
- self.next = self.__next()
+ self.__next()
return this
+ def tell(self):
+ return self.index, self.next
+ def seek(self, index):
+ self.index, self.next = index
def isident(char):
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
@@ -381,6 +381,7 @@ def _parse(source, state):
elif this == "+":
min, max = 1, MAXREPEAT
elif this == "{":
+ here = source.tell()
min, max = 0, MAXREPEAT
lo = hi = ""
while source.next in DIGITS:
@@ -391,7 +392,9 @@ def _parse(source, state):
else:
hi = lo
if not source.match("}"):
- raise error, "bogus range"
+ subpattern.append((LITERAL, ord(this)))
+ source.seek(here)
+ continue
if lo:
min = int(lo)
if hi:
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 3d6305a..7206b95 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -19,7 +19,7 @@
* 00-06-25 fl major changes to better deal with nested repeats (0.9)
* 00-06-28 fl fixed findall (0.9.1)
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
- * 00-06-30 fl tuning, fast search (0.9.3)
+ * 00-06-30 fl added fast search optimization (0.9.3)
* 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
*
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
@@ -365,18 +365,21 @@ SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
return !ok;
case SRE_OP_LITERAL:
+ /* args: <literal> */
if (ch == set[0])
return ok;
set++;
break;
case SRE_OP_RANGE:
+ /* args: <lower> <upper> */
if (set[0] <= ch && ch <= set[1])
return ok;
set += 2;
break;
case SRE_OP_CATEGORY:
+ /* args: <category> */
if (sre_category(set[0], (int) ch))
return ok;
set += 1;