diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2000-07-02 17:33:27 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2000-07-02 17:33:27 (GMT) |
commit | 7cafe4d7e466996d5fc32e871fe834e0e0c94282 (patch) | |
tree | dc3572d1d6bd95316c7a044cfd8639be014e3520 /Lib | |
parent | b19948b7fb96cfc2ed69bb58f2205d1399f1f9f5 (diff) | |
download | cpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.zip cpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.tar.gz cpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.tar.bz2 |
- actually enabled charset anchors in the engine (still not
used by the code generator)
- changed max repeat value in engine (to match earlier array fix)
- added experimental "which part matched?" mechanism to sre; see
http://hem.passagen.se/eff/2000_07_01_bot-archive.htm#416954
or python-dev for details.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/sre.py | 31 | ||||
-rw-r--r-- | Lib/sre_compile.py | 2 | ||||
-rw-r--r-- | Lib/sre_constants.py | 2 | ||||
-rw-r--r-- | Lib/sre_parse.py | 17 |
4 files changed, 51 insertions, 1 deletions
@@ -155,3 +155,34 @@ def _pickle(p): return _compile, (p.pattern, p.flags) copy_reg.pickle(type(_compile("")), _pickle, _compile) + +# -------------------------------------------------------------------- +# experimental stuff (see python-dev discussions for details) + +class Scanner: + def __init__(self, lexicon): + self.lexicon = lexicon + p = [] + for phrase, action in lexicon: + p.append("(?:%s)(?P#%d)" % (phrase, len(p))) + self.scanner = sre.compile("|".join(p)) + def scan(self, string): + result = [] + append = result.append + match = self.scanner.match + i = 0 + while 1: + m = match(string, i) + if not m: + break + j = m.end() + if i == j: + break + action = self.lexicon[m.index][1] + if callable(action): + self.match = match + action = action(self, m.group()) + if action is not None: + append(action) + i = j + return result, string[i:] diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py index a593ee7..e5c501e 100644 --- a/Lib/sre_compile.py +++ b/Lib/sre_compile.py @@ -208,7 +208,7 @@ def _compile(code, pattern, flags): else: emit(OPCODES[op]) emit(av-1) - elif op is MARK: + elif op in (MARK, INDEX): emit(OPCODES[op]) emit(av) else: diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py index f0e45ea..076637d 100644 --- a/Lib/sre_constants.py +++ b/Lib/sre_constants.py @@ -33,6 +33,7 @@ GROUP = "group" GROUP_IGNORE = "group_ignore" IN = "in" IN_IGNORE = "in_ignore" +INDEX = "index" INFO = "info" JUMP = "jump" LITERAL = "literal" @@ -90,6 +91,7 @@ OPCODES = [ CATEGORY, CHARSET, GROUP, GROUP_IGNORE, + INDEX, IN, IN_IGNORE, INFO, JUMP, diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index b263256..81ca217 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -451,6 +451,23 @@ def _parse(source, state): if gid is None: raise error, "unknown group name" subpattern.append((GROUP, gid)) + elif source.match("#"): + index = "" + while 1: + char = source.get() + if char is None: + raise error, "unterminated index" + if char == ")": + break + index = index + char + try: + index = int(index) + if index < 0 or index > MAXREPEAT: + raise ValueError + except ValueError: + raise error, "illegal index" + subpattern.append((INDEX, index)) + continue else: char = source.get() if char is None: |