diff options
author | Guido van Rossum <guido@python.org> | 1991-12-30 16:03:05 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1991-12-30 16:03:05 (GMT) |
commit | 6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5 (patch) | |
tree | beecda225e2a19fc67faa1be2f61c165a921282c | |
parent | 564f5507c38a0399d0a0e852df3f744a785a3d81 (diff) | |
download | cpython-6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5.zip cpython-6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5.tar.gz cpython-6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5.tar.bz2 |
Initial revision
-rw-r--r-- | Lib/regex_syntax.py | 41 | ||||
-rw-r--r-- | Lib/regexp.py | 37 |
2 files changed, 78 insertions, 0 deletions
diff --git a/Lib/regex_syntax.py b/Lib/regex_syntax.py new file mode 100644 index 0000000..bb80f4e --- /dev/null +++ b/Lib/regex_syntax.py @@ -0,0 +1,41 @@ +# These bits are passed to regex.set_syntax() to choose among +# alternative regexp syntaxes. + +# 1 means plain parentheses serve as grouping, and backslash +# parentheses are needed for literal searching. +# 0 means backslash-parentheses are grouping, and plain parentheses +# are for literal searching. +RE_NO_BK_PARENS = 1 + +# 1 means plain | serves as the "or"-operator, and \| is a literal. +# 0 means \| serves as the "or"-operator, and | is a literal. +RE_NO_BK_VBAR = 2 + +# 0 means plain + or ? serves as an operator, and \+, \? are literals. +# 1 means \+, \? are operators and plain +, ? are literals. +RE_BK_PLUS_QM = 4 + +# 1 means | binds tighter than ^ or $. +# 0 means the contrary. +RE_TIGHT_VBAR = 8 + +# 1 means treat \n as an _OR operator +# 0 means treat it as a normal character +RE_NEWLINE_OR = 16 + +# 0 means that a special characters (such as *, ^, and $) always have +# their special meaning regardless of the surrounding context. +# 1 means that special characters may act as normal characters in some +# contexts. Specifically, this applies to: +# ^ - only special at the beginning, or after ( or | +# $ - only special at the end, or before ) or | +# *, +, ? - only special when not after the beginning, (, or | +RE_CONTEXT_INDEP_OPS = 32 + +# Now define combinations of bits for the standard possibilities. +RE_SYNTAX_AWK = (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS) +RE_SYNTAX_EGREP = (RE_SYNTAX_AWK | RE_NEWLINE_OR) +RE_SYNTAX_GREP = (RE_BK_PLUS_QM | RE_NEWLINE_OR) +RE_SYNTAX_EMACS = 0 + +# (Python's obsolete "regexp" module used a syntax similar to awk.) diff --git a/Lib/regexp.py b/Lib/regexp.py new file mode 100644 index 0000000..fc6fcad --- /dev/null +++ b/Lib/regexp.py @@ -0,0 +1,37 @@ +# Provide backward compatibility for module "regexp" using "regex". + +import regex +from regex_syntax import * + +class Prog: + def init(self, pat): + save_syntax = regex.set_syntax(RE_SYNTAX_AWK) + try: + self.prog = regex.compile(pat) + finally: + xxx = regex.set_syntax(save_syntax) + return self + def match(self, args): + if type(args) = type(()): + str, offset = args + else: + str, offset = args, 0 + if self.prog.search(str, offset) < 0: + return () + regs = self.prog.regs() + i = len(regs) + while i > 0 and regs[i-1] = (-1, -1): + i = i-1 + return regs[:i] + +def compile(pat): + return Prog().init(pat) + +cache_pat = None +cache_prog = None + +def match(pat, str): + global cache_pat, cache_prog + if pat <> cache_pat: + cache_pat, cache_prog = pat, compile(pat) + return cache_prog.match(str) |