From 21009b9c6fc40b25fcb30ee60d6108f235733e40 Mon Sep 17 00:00:00 2001 From: Fredrik Lundh Date: Tue, 18 Sep 2001 18:47:09 +0000 Subject: an SRE bugfix a day keeps Guido away... #462270: sub-tle difference between pre.sub and sre.sub. PRE ignored an empty match at the previous location, SRE didn't. also synced with Secret Labs "sreopen" codebase. --- Lib/sre.py | 11 +++++++---- Lib/test/test_sre.py | 4 ++++ Modules/_sre.c | 23 ++++++++++++++--------- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/Lib/sre.py b/Lib/sre.py index eb8325d..a87870e 100644 --- a/Lib/sre.py +++ b/Lib/sre.py @@ -45,7 +45,7 @@ The special characters are: "|" A|B, creates an RE that will match either A or B. (...) Matches the RE inside the parentheses. The contents can be retrieved or matched later in the string. - (?iLmsx) Set the I, L, M, S, or X flag for the RE. + (?iLmsx) Set the I, L, M, S, or X flag for the RE (see below). (?:...) Non-grouping version of regular parentheses. (?P...) The substring matched by the group is accessible by name. (?P=name) Matches the text matched earlier by the group named name. @@ -80,7 +80,6 @@ This module exports the following functions: findall Find all occurrences of a pattern in a string. compile Compile a pattern into a RegexObject. purge Clear the regular expression cache. - template Compile a template pattern, returning a pattern object. escape Backslash all non-alphanumerics in a string. Some of the functions in this module takes flags as optional parameters: @@ -90,11 +89,12 @@ Some of the functions in this module takes flags as optional parameters: "$" matches the end of lines as well as the string. S DOTALL "." matches any character at all, including the newline. X VERBOSE Ignore whitespace and comments for nicer looking RE's. - U UNICODE Use unicode locale. + U UNICODE Make \w, \W, \b, \B, dependent on the Unicode locale. This module also defines an exception 'error'. """ + import sre_compile import sre_parse @@ -104,7 +104,7 @@ __all__ = [ "match", "search", "sub", "subn", "split", "findall", "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", "UNICODE", "error" ] -__version__ = "2.1b2" +__version__ = "2.1.1" # this module works under 1.5.2 and later. don't use string methods import string @@ -269,6 +269,9 @@ def _subn(pattern, template, text, count=0, sub=0): b, e = m.span() if i < b: append(text[i:b]) + elif i == b == e and n: + append(text[i:b]) + continue # ignore empty match at previous position append(filter(m)) i = e n = n + 1 diff --git a/Lib/test/test_sre.py b/Lib/test/test_sre.py index 8442258..49fe4c6 100644 --- a/Lib/test/test_sre.py +++ b/Lib/test/test_sre.py @@ -123,6 +123,10 @@ test(r"""sre.sub('\r\n', r'\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n') test(r"""sre.sub(r'\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n') test(r"""sre.sub('\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n') +# Test for empty sub() behaviour, see SF bug #462270 +test(r"""sre.sub('x*', '-', 'abxd')""", '-a-b-d-') +test(r"""sre.sub('x+', '-', 'abxd')""", 'ab-d') + if verbose: print 'Running tests on symbolic references' diff --git a/Modules/_sre.c b/Modules/_sre.c index d6f39a4..b0ab663 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -31,6 +31,7 @@ * 2001-04-28 fl added __copy__ methods (work in progress) * 2001-05-14 fl fixes for 1.5.2 * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) + * 2001-09-18 fl * * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * @@ -133,6 +134,8 @@ static char copyright[] = #define SRE_ALNUM_MASK 8 #define SRE_WORD_MASK 16 +/* FIXME: this assumes ASCII. create tables in init_sre() instead */ + static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, @@ -1141,6 +1144,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level) } /* can't end up here */ + /* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */ } LOCAL(int) @@ -2624,16 +2628,17 @@ init_sre(void) m = Py_InitModule("_" SRE_MODULE, _functions); d = PyModule_GetDict(m); - PyDict_SetItemString( - d, "MAGIC", (x = (PyObject*) PyInt_FromLong(SRE_MAGIC)) - ); - Py_XDECREF(x); - - PyDict_SetItemString( - d, "copyright", (x = (PyObject*)PyString_FromString(copyright)) - ); - Py_XDECREF(x); + x = PyInt_FromLong(SRE_MAGIC); + if (x) { + PyDict_SetItemString(d, "MAGIC", x); + Py_DECREF(x); + } + x = PyString_FromString(copyright); + if (x) { + PyDict_SetItemString(d, "copyright", x); + Py_DECREF(x); + } } #endif /* !defined(SRE_RECURSIVE) */ -- cgit v0.12