Initial revision

author: Guido van Rossum <guido@python.org> 1991-12-30 16:03:05 (GMT)
committer: Guido van Rossum <guido@python.org> 1991-12-30 16:03:05 (GMT)
commit: 6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5 (patch)
tree: beecda225e2a19fc67faa1be2f61c165a921282c
parent: 564f5507c38a0399d0a0e852df3f744a785a3d81 (diff)
download: cpython-6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5.zip
cpython-6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5.tar.gz
cpython-6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5.tar.bz2
2 files changed, 78 insertions, 0 deletions
diff --git a/Lib/regex_syntax.py b/Lib/regex_syntax.py
new file mode 100644
index 0000000..bb80f4e
--- /dev/null
+++ b/Lib/regex_syntax.py
@@ -0,0 +1,41 @@
+# These bits are passed to regex.set_syntax() to choose among
+# alternative regexp syntaxes.
+
+# 1 means plain parentheses serve as grouping, and backslash
+#   parentheses are needed for literal searching.
+# 0 means backslash-parentheses are grouping, and plain parentheses
+#   are for literal searching.
+RE_NO_BK_PARENS = 1
+
+# 1 means plain | serves as the "or"-operator, and \| is a literal.
+# 0 means \| serves as the "or"-operator, and | is a literal.
+RE_NO_BK_VBAR = 2
+
+# 0 means plain + or ? serves as an operator, and \+, \? are literals.
+# 1 means \+, \? are operators and plain +, ? are literals.
+RE_BK_PLUS_QM = 4
+
+# 1 means | binds tighter than ^ or $.
+# 0 means the contrary.
+RE_TIGHT_VBAR = 8
+
+# 1 means treat \n as an _OR operator
+# 0 means treat it as a normal character
+RE_NEWLINE_OR = 16
+
+# 0 means that a special characters (such as *, ^, and $) always have
+#   their special meaning regardless of the surrounding context.
+# 1 means that special characters may act as normal characters in some
+#   contexts.  Specifically, this applies to:
+#	^ - only special at the beginning, or after ( or |
+#	$ - only special at the end, or before ) or |
+#	*, +, ? - only special when not after the beginning, (, or |
+RE_CONTEXT_INDEP_OPS = 32
+
+# Now define combinations of bits for the standard possibilities.
+RE_SYNTAX_AWK = (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
+RE_SYNTAX_EGREP = (RE_SYNTAX_AWK | RE_NEWLINE_OR)
+RE_SYNTAX_GREP = (RE_BK_PLUS_QM | RE_NEWLINE_OR)
+RE_SYNTAX_EMACS = 0
+
+# (Python's obsolete "regexp" module used a syntax similar to awk.)
diff --git a/Lib/regexp.py b/Lib/regexp.py
new file mode 100644
index 0000000..fc6fcad
--- /dev/null
+++ b/Lib/regexp.py
@@ -0,0 +1,37 @@
+# Provide backward compatibility for module "regexp" using "regex".
+
+import regex
+from regex_syntax import *
+
+class Prog:
+	def init(self, pat):
+		save_syntax = regex.set_syntax(RE_SYNTAX_AWK)
+		try:
+			self.prog = regex.compile(pat)
+		finally:
+			xxx = regex.set_syntax(save_syntax)
+		return self
+	def match(self, args):
+		if type(args) = type(()):
+			str, offset = args
+		else:
+			str, offset = args, 0
+		if self.prog.search(str, offset) < 0:
+			return ()
+		regs = self.prog.regs()
+		i = len(regs)
+		while i > 0 and regs[i-1] = (-1, -1):
+			i = i-1
+		return regs[:i]
+
+def compile(pat):
+	return Prog().init(pat)
+
+cache_pat = None
+cache_prog = None
+
+def match(pat, str):
+	global cache_pat, cache_prog
+	if pat <> cache_pat:
+		cache_pat, cache_prog = pat, compile(pat)
+	return cache_prog.match(str)
author	Guido van Rossum <guido@python.org>	1991-12-30 16:03:05 (GMT)
committer	Guido van Rossum <guido@python.org>	1991-12-30 16:03:05 (GMT)
commit	6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5 (patch)
tree	beecda225e2a19fc67faa1be2f61c165a921282c
parent	564f5507c38a0399d0a0e852df3f744a785a3d81 (diff)
download	cpython-6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5.zip cpython-6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5.tar.gz cpython-6c6b78d6bd4d19af78e61a6b08c84696a2a88fa5.tar.bz2