summaryrefslogtreecommitdiffstats
path: root/Lib/sre.py
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2000-07-02 17:33:27 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2000-07-02 17:33:27 (GMT)
commit7cafe4d7e466996d5fc32e871fe834e0e0c94282 (patch)
treedc3572d1d6bd95316c7a044cfd8639be014e3520 /Lib/sre.py
parentb19948b7fb96cfc2ed69bb58f2205d1399f1f9f5 (diff)
downloadcpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.zip
cpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.tar.gz
cpython-7cafe4d7e466996d5fc32e871fe834e0e0c94282.tar.bz2
- actually enabled charset anchors in the engine (still not
used by the code generator) - changed max repeat value in engine (to match earlier array fix) - added experimental "which part matched?" mechanism to sre; see http://hem.passagen.se/eff/2000_07_01_bot-archive.htm#416954 or python-dev for details.
Diffstat (limited to 'Lib/sre.py')
-rw-r--r--Lib/sre.py31
1 files changed, 31 insertions, 0 deletions
diff --git a/Lib/sre.py b/Lib/sre.py
index a09184b..79f12a1 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -155,3 +155,34 @@ def _pickle(p):
return _compile, (p.pattern, p.flags)
copy_reg.pickle(type(_compile("")), _pickle, _compile)
+
+# --------------------------------------------------------------------
+# experimental stuff (see python-dev discussions for details)
+
+class Scanner:
+ def __init__(self, lexicon):
+ self.lexicon = lexicon
+ p = []
+ for phrase, action in lexicon:
+ p.append("(?:%s)(?P#%d)" % (phrase, len(p)))
+ self.scanner = sre.compile("|".join(p))
+ def scan(self, string):
+ result = []
+ append = result.append
+ match = self.scanner.match
+ i = 0
+ while 1:
+ m = match(string, i)
+ if not m:
+ break
+ j = m.end()
+ if i == j:
+ break
+ action = self.lexicon[m.index][1]
+ if callable(action):
+ self.match = match
+ action = action(self, m.group())
+ if action is not None:
+ append(action)
+ i = j
+ return result, string[i:]