diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2001-09-18 20:55:24 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2001-09-18 20:55:24 (GMT) |
commit | 59b68656f846973840953220c4780f3558b59fb8 (patch) | |
tree | 6a91ff46e00681f594aa3c9280d268627d009cc5 | |
parent | ab3b0343b89b4683148dadaf89728ee1198ebee5 (diff) | |
download | cpython-59b68656f846973840953220c4780f3558b59fb8.zip cpython-59b68656f846973840953220c4780f3558b59fb8.tar.gz cpython-59b68656f846973840953220c4780f3558b59fb8.tar.bz2 |
fixed #449964: sre.sub raises an exception if the template contains a
\g<x> group reference followed by a character escape
(also restructured a few things on the way to fixing #449000)
-rw-r--r-- | Lib/sre.py | 12 | ||||
-rw-r--r-- | Lib/sre_parse.py | 8 | ||||
-rw-r--r-- | Lib/test/test_sre.py | 3 | ||||
-rw-r--r-- | Modules/_sre.c | 28 |
4 files changed, 30 insertions, 21 deletions
@@ -251,11 +251,13 @@ def _subn(pattern, template, text, count=0, sub=0): else: template = _compile_repl(template, pattern) literals = template[1] - sub = 0 # temporarly disabled, see bug #449000 - if (sub and not count and pattern._isliteral() and - len(literals) == 1 and literals[0]): - # shortcut: both pattern and string are literals - return string.replace(text, pattern.pattern, literals[0]), 0 + if sub and not count: + literal = pattern._getliteral() + if literal and "\\" in literal: + literal = None # may contain untranslated escapes + if literal is not None and len(literals) == 1 and literals[0]: + # shortcut: both pattern and string are literals + return string.replace(text, pattern.pattern, literals[0]), 0 def filter(match, template=template): return sre_parse.expand_template(template, match) n = i = 0 diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index af1edbf..7d9b889 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -647,9 +647,9 @@ def parse_template(source, pattern): p.append((LITERAL, literal)) sep = source[:0] if type(sep) is type(""): - char = chr + makechar = chr else: - char = unichr + makechar = unichr while 1: this = s.get() if this is None: @@ -693,14 +693,14 @@ def parse_template(source, pattern): break if not code: this = this[1:] - code = LITERAL, char(atoi(this[-6:], 8) & 0xff) + code = LITERAL, makechar(atoi(this[-6:], 8) & 0xff) if code[0] is LITERAL: literal(code[1]) else: a(code) else: try: - this = char(ESCAPES[this][1]) + this = makechar(ESCAPES[this][1]) except KeyError: pass literal(this) diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 49fe4c6..4a71447 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -104,6 +104,9 @@ test(r"""sre.sub(r'(?P<a>x)', '\g<a>\g<1>', 'xx')""", 'xxxx') test(r"""sre.sub(r'(?P<unk>x)', '\g<unk>\g<unk>', 'xx')""", 'xxxx') test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>', 'xx')""", 'xxxx') +# bug 449964: fails for group followed by other escape +test(r"""sre.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx')""", 'xx\bxx\b') + test(r"""sre.sub(r'a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a')""", '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", '\t\n\v\r\f\a') test(r"""sre.sub(r'a', '\t\n\v\r\f\a', 'a')""", (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) diff --git a/Modules/_sre.c b/Modules/_sre.c index b0ab663..32cd48b 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -31,7 +31,7 @@ * 2001-04-28 fl added __copy__ methods (work in progress) * 2001-05-14 fl fixes for 1.5.2 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) - * 2001-09-18 fl + * 2001-09-18 fl added _getliteral helper * * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * @@ -1959,25 +1959,29 @@ pattern_deepcopy(PatternObject* self, PyObject* args) } static PyObject* -pattern_isliteral(PatternObject* self, PyObject* args) +pattern_getliteral(PatternObject* self, PyObject* args) { - /* internal: return true if pattern consists of literal text only */ + /* internal: if the pattern is a literal string, return that + string. otherwise, return None */ SRE_CODE* code; - PyObject* isliteral; + PyObject* literal; - if (!PyArg_ParseTuple(args, ":_isliteral")) + if (!PyArg_ParseTuple(args, ":_getliteral")) return NULL; code = PatternObject_GetCode(self); - if (code[0] == SRE_OP_INFO && code[2] & SRE_INFO_LITERAL) - isliteral = Py_True; - else - isliteral = Py_False; + if (code[0] == SRE_OP_INFO && code[2] & SRE_INFO_LITERAL) { + /* FIXME: extract literal string from code buffer. we can't + use the pattern member, since it may contain untranslated + escape codes (see SF bug 449000) */ + literal = Py_None; + } else + literal = Py_None; /* no literal */ - Py_INCREF(isliteral); - return isliteral; + Py_INCREF(literal); + return literal; } static PyMethodDef pattern_methods[] = { @@ -1990,7 +1994,7 @@ static PyMethodDef pattern_methods[] = { {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS}, {"__copy__", (PyCFunction) pattern_copy, METH_VARARGS}, {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_VARARGS}, - {"_isliteral", (PyCFunction) pattern_isliteral, METH_VARARGS}, + {"_getliteral", (PyCFunction) pattern_getliteral, METH_VARARGS}, {NULL, NULL} }; |