summaryrefslogtreecommitdiffstats
path: root/Lib/sre_parse.py
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2001-02-18 12:05:16 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2001-02-18 12:05:16 (GMT)
commitf2989b22fff921b3394e1709a07f0119370b6d74 (patch)
treee7d820eea66d169e543ac117631694d3afbccbcd /Lib/sre_parse.py
parentae7636753e15273742515eb123999d23f6b7985e (diff)
downloadcpython-f2989b22fff921b3394e1709a07f0119370b6d74.zip
cpython-f2989b22fff921b3394e1709a07f0119370b6d74.tar.gz
cpython-f2989b22fff921b3394e1709a07f0119370b6d74.tar.bz2
- restored 1.5.2 compatibility (sorry, eric)
- removed __all__ cruft from internal modules (sorry, skip) - don't assume ASCII for string escapes (sorry, per)
Diffstat (limited to 'Lib/sre_parse.py')
-rw-r--r--Lib/sre_parse.py49
1 files changed, 27 insertions, 22 deletions
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index fc3c492..36036b6 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -10,13 +10,11 @@
# XXX: show string offset and offending character for all errors
-import sys
+# this module works under 1.5.2 and later. don't use string methods
+import string, sys
from sre_constants import *
-__all__ = ["Pattern","SubPattern","Tokenizer","parse","parse_template",
- "expand_template"]
-
SPECIAL_CHARS = ".\\[{()*+?^$|"
REPEAT_CHARS = "*+?{"
@@ -28,13 +26,13 @@ HEXDIGITS = tuple("0123456789abcdefABCDEF")
WHITESPACE = tuple(" \t\n\r\v\f")
ESCAPES = {
- r"\a": (LITERAL, 7),
- r"\b": (LITERAL, 8),
- r"\f": (LITERAL, 12),
- r"\n": (LITERAL, 10),
- r"\r": (LITERAL, 13),
- r"\t": (LITERAL, 9),
- r"\v": (LITERAL, 11),
+ r"\a": (LITERAL, ord("\a")),
+ r"\b": (LITERAL, ord("\b")),
+ r"\f": (LITERAL, ord("\f")),
+ r"\n": (LITERAL, ord("\n")),
+ r"\r": (LITERAL, ord("\r")),
+ r"\t": (LITERAL, ord("\t")),
+ r"\v": (LITERAL, ord("\v")),
r"\\": (LITERAL, ord("\\"))
}
@@ -63,6 +61,13 @@ FLAGS = {
"u": SRE_FLAG_UNICODE,
}
+# figure out best way to convert hex/octal numbers to integers
+try:
+ int("10", 8)
+ atoi = int # 2.0 and later
+except TypeError:
+ atoi = string.atoi # 1.5.2
+
class Pattern:
# master pattern object. keeps track of global attributes
def __init__(self):
@@ -219,7 +224,7 @@ def isname(name):
def _group(escape, groups):
# check if the escape string represents a valid group
try:
- gid = int(escape[1:])
+ gid = atoi(escape[1:])
if gid and gid < groups:
return gid
except ValueError:
@@ -242,13 +247,13 @@ def _class_escape(source, escape):
escape = escape[2:]
if len(escape) != 2:
raise error, "bogus escape: %s" % repr("\\" + escape)
- return LITERAL, int(escape, 16) & 0xff
+ return LITERAL, atoi(escape, 16) & 0xff
elif str(escape[1:2]) in OCTDIGITS:
# octal escape (up to three digits)
while source.next in OCTDIGITS and len(escape) < 5:
escape = escape + source.get()
escape = escape[1:]
- return LITERAL, int(escape, 8) & 0xff
+ return LITERAL, atoi(escape, 8) & 0xff
if len(escape) == 2:
return LITERAL, ord(escape[1])
except ValueError:
@@ -270,12 +275,12 @@ def _escape(source, escape, state):
escape = escape + source.get()
if len(escape) != 4:
raise ValueError
- return LITERAL, int(escape[2:], 16) & 0xff
+ return LITERAL, atoi(escape[2:], 16) & 0xff
elif escape[1:2] == "0":
# octal escape
while source.next in OCTDIGITS and len(escape) < 4:
escape = escape + source.get()
- return LITERAL, int(escape[1:], 8) & 0xff
+ return LITERAL, atoi(escape[1:], 8) & 0xff
elif escape[1:2] in DIGITS:
# octal escape *or* decimal group reference (sigh)
here = source.tell()
@@ -285,7 +290,7 @@ def _escape(source, escape, state):
source.next in OCTDIGITS):
# got three octal digits; this is an octal escape
escape = escape + source.get()
- return LITERAL, int(escape[1:], 8) & 0xff
+ return LITERAL, atoi(escape[1:], 8) & 0xff
# got at least one decimal digit; this is a group reference
group = _group(escape, state.groups)
if group:
@@ -459,9 +464,9 @@ def _parse(source, state):
source.seek(here)
continue
if lo:
- min = int(lo)
+ min = atoi(lo)
if hi:
- max = int(hi)
+ max = atoi(hi)
if max < min:
raise error, "bad repeat interval"
else:
@@ -649,7 +654,7 @@ def parse_template(source, pattern):
if not name:
raise error, "bad group name"
try:
- index = int(name)
+ index = atoi(name)
except ValueError:
if not isname(name):
raise error, "bad character in group name"
@@ -673,7 +678,7 @@ def parse_template(source, pattern):
break
if not code:
this = this[1:]
- code = LITERAL, int(this[-6:], 8) & 0xff
+ code = LITERAL, atoi(this[-6:], 8) & 0xff
a(code)
else:
try:
@@ -702,4 +707,4 @@ def expand_template(template, match):
if s is None:
raise error, "empty group"
a(s)
- return sep.join(p)
+ return string.join(p, sep)