From c13222cdff4373a9763b9c7df4b2e12e7e3b776f Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Sat, 1 Jul 2000 23:49:14 +0000 Subject: - fixed "{ in any other context" bug - minor comment touchups in the C module --- Lib/sre_parse.py | 27 +++++++++++++++------------ Modules/_sre.c | 5 ++++- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 0e01ad6..12f49c3 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -142,12 +142,13 @@ class SubPattern: class Tokenizer: def __init__(self, string): - self.index = 0 self.string = string - self.next = self.__next() + self.index = 0 + self.__next() def __next(self): if self.index >= len(self.string): - return None + self.next = None + return char = self.string[self.index] if char[0] == "\\": try: @@ -156,21 +157,20 @@ class Tokenizer: raise error, "bogus escape" char = char + c self.index = self.index + len(char) - return char + self.next = char def match(self, char): if char == self.next: - self.next = self.__next() - return 1 - return 0 - def match_set(self, set): - if self.next and self.next in set: - self.next = self.__next() + self.__next() return 1 return 0 def get(self): this = self.next - self.next = self.__next() + self.__next() return this + def tell(self): + return self.index, self.next + def seek(self, index): + self.index, self.next = index def isident(char): return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" @@ -381,6 +381,7 @@ def _parse(source, state): elif this == "+": min, max = 1, MAXREPEAT elif this == "{": + here = source.tell() min, max = 0, MAXREPEAT lo = hi = "" while source.next in DIGITS: @@ -391,7 +392,9 @@ def _parse(source, state): else: hi = lo if not source.match("}"): - raise error, "bogus range" + subpattern.append((LITERAL, ord(this))) + source.seek(here) + continue if lo: min = int(lo) if hi: diff --git a/Modules/_sre.c b/Modules/_sre.c index 3d6305a..7206b95 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -19,7 +19,7 @@ * 00-06-25 fl major changes to better deal with nested repeats (0.9) * 00-06-28 fl fixed findall (0.9.1) * 00-06-29 fl fixed split, added more scanner features (0.9.2) - * 00-06-30 fl tuning, fast search (0.9.3) + * 00-06-30 fl added fast search optimization (0.9.3) * 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4) * * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. @@ -365,18 +365,21 @@ SRE_MEMBER(SRE_CODE* set, SRE_CODE ch) return !ok; case SRE_OP_LITERAL: + /* args: */ if (ch == set[0]) return ok; set++; break; case SRE_OP_RANGE: + /* args: */ if (set[0] <= ch && ch <= set[1]) return ok; set += 2; break; case SRE_OP_CATEGORY: + /* args: */ if (sre_category(set[0], (int) ch)) return ok; set += 1; -- cgit v0.12