summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/sre.py15
-rw-r--r--Lib/sre_compile.py8
-rw-r--r--Lib/sre_parse.py68
-rw-r--r--Modules/_sre.c6
4 files changed, 55 insertions, 42 deletions
diff --git a/Lib/sre.py b/Lib/sre.py
index 49e3140..d5bb462 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -109,16 +109,13 @@ def _subn(pattern, template, string, count=0):
m = c.search()
if not m:
break
- j = m.start()
- if j > i:
- append(string[i:j])
+ b, e = m.span()
+ if i < b:
+ append(string[i:b])
append(filter(m))
- i = m.end()
- if i <= j:
- break
+ i = e
n = n + 1
- if i < len(string):
- append(string[i:])
+ append(string[i:])
return string[:0].join(s), n
def _split(pattern, string, maxsplit=0):
@@ -128,7 +125,7 @@ def _split(pattern, string, maxsplit=0):
append = s.append
extend = s.extend
c = pattern.scanner(string)
- g = c.groups
+ g = pattern.groups
while not maxsplit or n < maxsplit:
m = c.search()
if not m:
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 344dc29..ea5f5bc 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -61,9 +61,9 @@ def _compile(code, pattern, flags):
elif op is CATEGORY:
emit(OPCODES[op])
if flags & SRE_FLAG_LOCALE:
- emit(CH_LOCALE[CHCODES[av]])
+ emit(CHCODES[CH_LOCALE[av]])
elif flags & SRE_FLAG_UNICODE:
- emit(CH_UNICODE[CHCODES[av]])
+ emit(CHCODES[CH_UNICODE[av]])
else:
emit(CHCODES[av])
elif op is GROUP:
@@ -92,9 +92,9 @@ def _compile(code, pattern, flags):
emit(fixup(av[1]))
elif op is CATEGORY:
if flags & SRE_FLAG_LOCALE:
- emit(CH_LOCALE[CHCODES[av]])
+ emit(CHCODES[CH_LOCALE[av]])
elif flags & SRE_FLAG_UNICODE:
- emit(CH_UNICODE[CHCODES[av]])
+ emit(CHCODES[CH_UNICODE[av]])
else:
emit(CHCODES[av])
else:
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 93a7b5d..ec934fe 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -30,26 +30,27 @@ HEXDIGITS = tuple("0123456789abcdefABCDEF")
WHITESPACE = string.whitespace
ESCAPES = {
- "\\a": (LITERAL, chr(7)),
- "\\b": (LITERAL, chr(8)),
- "\\f": (LITERAL, chr(12)),
- "\\n": (LITERAL, chr(10)),
- "\\r": (LITERAL, chr(13)),
- "\\t": (LITERAL, chr(9)),
- "\\v": (LITERAL, chr(11))
+ r"\a": (LITERAL, chr(7)),
+ r"\b": (LITERAL, chr(8)),
+ r"\f": (LITERAL, chr(12)),
+ r"\n": (LITERAL, chr(10)),
+ r"\r": (LITERAL, chr(13)),
+ r"\t": (LITERAL, chr(9)),
+ r"\v": (LITERAL, chr(11)),
+ r"\\": (LITERAL, "\\")
}
CATEGORIES = {
- "\\A": (AT, AT_BEGINNING), # start of string
- "\\b": (AT, AT_BOUNDARY),
- "\\B": (AT, AT_NON_BOUNDARY),
- "\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
- "\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
- "\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
- "\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
- "\\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
- "\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
- "\\Z": (AT, AT_END), # end of string
+ r"\A": (AT, AT_BEGINNING), # start of string
+ r"\b": (AT, AT_BOUNDARY),
+ r"\B": (AT, AT_NON_BOUNDARY),
+ r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
+ r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
+ r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
+ r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
+ r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
+ r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
+ r"\Z": (AT, AT_END), # end of string
}
FLAGS = {
@@ -185,11 +186,11 @@ def isname(name):
return 0
return 1
-def _group(escape, state):
+def _group(escape, groups):
# check if the escape string represents a valid group
try:
group = int(escape[1:])
- if group and group < state.groups:
+ if group and group < groups:
return group
except ValueError:
pass
@@ -239,10 +240,10 @@ def _escape(source, escape, state):
return LITERAL, chr(int(escape[-4:], 16) & 0xff)
elif escape[1:2] in DIGITS:
while 1:
- group = _group(escape, state)
+ group = _group(escape, state.groups)
if group:
if (not source.next or
- not _group(escape + source.next, state)):
+ not _group(escape + source.next, state.groups)):
return GROUP, group
escape = escape + source.get()
elif source.next in OCTDIGITS:
@@ -534,6 +535,7 @@ def parse_template(source, pattern):
if this is None:
break # end of replacement string
if this and this[0] == "\\":
+ # group
if this == "\\g":
name = ""
if s.match("<"):
@@ -557,15 +559,29 @@ def parse_template(source, pattern):
raise IndexError, "unknown group name"
a((MARK, index))
elif len(this) > 1 and this[1] in DIGITS:
- while s.next in DIGITS:
- this = this + s.get()
- a((MARK, int(this[1:])))
+ code = None
+ while 1:
+ group = _group(this, pattern.groups+1)
+ if group:
+ if (not s.next or
+ not _group(this + s.next, pattern.groups+1)):
+ code = MARK, int(group)
+ break
+ elif s.next in OCTDIGITS:
+ this = this + s.get()
+ else:
+ break
+ if not code:
+ this = this[1:]
+ # FIXME: support unicode characters!
+ code = LITERAL, chr(int(this[-6:], 8) & 0xff)
+ a(code)
else:
try:
a(ESCAPES[this])
except KeyError:
- for char in this:
- a((LITERAL, char))
+ for c in this:
+ a((LITERAL, c))
else:
a((LITERAL, this))
return p
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 6b0fa61..7b1adbd 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -1534,6 +1534,9 @@ pattern_getattr(PatternObject* self, char* name)
if (!strcmp(name, "flags"))
return Py_BuildValue("i", self->flags);
+ if (!strcmp(name, "groups"))
+ return Py_BuildValue("i", self->groups);
+
if (!strcmp(name, "groupindex") && self->groupindex) {
Py_INCREF(self->groupindex);
return self->groupindex;
@@ -1939,9 +1942,6 @@ scanner_getattr(ScannerObject* self, char* name)
return self->pattern;
}
- if (!strcmp(name, "groups"))
- return Py_BuildValue("i", ((PatternObject*) self->pattern)->groups);
-
PyErr_SetString(PyExc_AttributeError, name);
return NULL;
}