summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2000-07-02 17:33:27 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2000-07-02 17:33:27 (GMT)
commit7cafe4d7e466996d5fc32e871fe834e0e0c94282 (patch)
treedc3572d1d6bd95316c7a044cfd8639be014e3520 /Lib
parentb19948b7fb96cfc2ed69bb58f2205d1399f1f9f5 (diff)
downloadcpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.zip
cpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.tar.gz
cpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.tar.bz2
- actually enabled charset anchors in the engine (still not
used by the code generator) - changed max repeat value in engine (to match earlier array fix) - added experimental "which part matched?" mechanism to sre; see http://hem.passagen.se/eff/2000_07_01_bot-archive.htm#416954 or python-dev for details.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/sre.py31
-rw-r--r--Lib/sre_compile.py2
-rw-r--r--Lib/sre_constants.py2
-rw-r--r--Lib/sre_parse.py17
4 files changed, 51 insertions, 1 deletions
diff --git a/Lib/sre.py b/Lib/sre.py
index a09184b..79f12a1 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -155,3 +155,34 @@ def _pickle(p):
return _compile, (p.pattern, p.flags)
copy_reg.pickle(type(_compile("")), _pickle, _compile)
+
+# --------------------------------------------------------------------
+# experimental stuff (see python-dev discussions for details)
+
+class Scanner:
+ def __init__(self, lexicon):
+ self.lexicon = lexicon
+ p = []
+ for phrase, action in lexicon:
+ p.append("(?:%s)(?P#%d)" % (phrase, len(p)))
+ self.scanner = sre.compile("|".join(p))
+ def scan(self, string):
+ result = []
+ append = result.append
+ match = self.scanner.match
+ i = 0
+ while 1:
+ m = match(string, i)
+ if not m:
+ break
+ j = m.end()
+ if i == j:
+ break
+ action = self.lexicon[m.index][1]
+ if callable(action):
+ self.match = match
+ action = action(self, m.group())
+ if action is not None:
+ append(action)
+ i = j
+ return result, string[i:]
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index a593ee7..e5c501e 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -208,7 +208,7 @@ def _compile(code, pattern, flags):
else:
emit(OPCODES[op])
emit(av-1)
- elif op is MARK:
+ elif op in (MARK, INDEX):
emit(OPCODES[op])
emit(av)
else:
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index f0e45ea..076637d 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -33,6 +33,7 @@ GROUP = "group"
GROUP_IGNORE = "group_ignore"
IN = "in"
IN_IGNORE = "in_ignore"
+INDEX = "index"
INFO = "info"
JUMP = "jump"
LITERAL = "literal"
@@ -90,6 +91,7 @@ OPCODES = [
CATEGORY,
CHARSET,
GROUP, GROUP_IGNORE,
+ INDEX,
IN, IN_IGNORE,
INFO,
JUMP,
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index b263256..81ca217 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -451,6 +451,23 @@ def _parse(source, state):
if gid is None:
raise error, "unknown group name"
subpattern.append((GROUP, gid))
+ elif source.match("#"):
+ index = ""
+ while 1:
+ char = source.get()
+ if char is None:
+ raise error, "unterminated index"
+ if char == ")":
+ break
+ index = index + char
+ try:
+ index = int(index)
+ if index < 0 or index > MAXREPEAT:
+ raise ValueError
+ except ValueError:
+ raise error, "illegal index"
+ subpattern.append((INDEX, index))
+ continue
else:
char = source.get()
if char is None: