summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-11-23 20:42:43 (GMT)
committerSerhiy Storchaka <storchaka@gmail.com>2013-11-23 20:42:43 (GMT)
commit5c24d0e504f9271ee095f706b493566ec0162879 (patch)
treedfd3af4fb8e193f3c0780fab3daef24a4beb182d
parentc1207c1bcf732bc3e9c8875cfb0343af98ebc41c (diff)
downloadcpython-5c24d0e504f9271ee095f706b493566ec0162879.zip
cpython-5c24d0e504f9271ee095f706b493566ec0162879.tar.gz
cpython-5c24d0e504f9271ee095f706b493566ec0162879.tar.bz2
Issue #13592: Improved the repr for regular expression pattern objects.
Based on patch by Hugo Lopes Tavares.
-rw-r--r--Lib/sre_constants.py2
-rw-r--r--Lib/test/test_re.py62
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/_sre.c82
-rw-r--r--Modules/sre_constants.h2
5 files changed, 150 insertions, 1 deletions
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index 5898d54..23e3516 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -250,6 +250,8 @@ if __name__ == "__main__":
f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
+ f.write("#define SRE_FLAG_DEBUG %d\n" % SRE_FLAG_DEBUG)
+ f.write("#define SRE_FLAG_ASCII %d\n" % SRE_FLAG_ASCII)
f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 8d63fac..1ef68b8 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1164,6 +1164,68 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(2), "y")
+class PatternReprTests(unittest.TestCase):
+ def check(self, pattern, expected):
+ self.assertEqual(repr(re.compile(pattern)), expected)
+
+ def check_flags(self, pattern, flags, expected):
+ self.assertEqual(repr(re.compile(pattern, flags)), expected)
+
+ def test_without_flags(self):
+ self.check('random pattern',
+ "re.compile('random pattern')")
+
+ def test_single_flag(self):
+ self.check_flags('random pattern', re.IGNORECASE,
+ "re.compile('random pattern', re.IGNORECASE)")
+
+ def test_multiple_flags(self):
+ self.check_flags('random pattern', re.I|re.S|re.X,
+ "re.compile('random pattern', "
+ "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
+
+ def test_unicode_flag(self):
+ self.check_flags('random pattern', re.U,
+ "re.compile('random pattern')")
+ self.check_flags('random pattern', re.I|re.S|re.U,
+ "re.compile('random pattern', "
+ "re.IGNORECASE|re.DOTALL)")
+
+ def test_inline_flags(self):
+ self.check('(?i)pattern',
+ "re.compile('(?i)pattern', re.IGNORECASE)")
+
+ def test_unknown_flags(self):
+ self.check_flags('random pattern', 0x123000,
+ "re.compile('random pattern', 0x123000)")
+ self.check_flags('random pattern', 0x123000|re.I,
+ "re.compile('random pattern', re.IGNORECASE|0x123000)")
+
+ def test_bytes(self):
+ self.check(b'bytes pattern',
+ "re.compile(b'bytes pattern')")
+ self.check_flags(b'bytes pattern', re.A,
+ "re.compile(b'bytes pattern', re.ASCII)")
+
+ def test_quotes(self):
+ self.check('random "double quoted" pattern',
+ '''re.compile('random "double quoted" pattern')''')
+ self.check("random 'single quoted' pattern",
+ '''re.compile("random 'single quoted' pattern")''')
+ self.check('''both 'single' and "double" quotes''',
+ '''re.compile('both \\'single\\' and "double" quotes')''')
+
+ def test_long_pattern(self):
+ pattern = 'Very %spattern' % ('long ' * 1000)
+ r = repr(re.compile(pattern))
+ self.assertLess(len(r), 300)
+ self.assertEqual(r[:30], "re.compile('Very long long lon")
+ r = repr(re.compile(pattern, re.I))
+ self.assertLess(len(r), 300)
+ self.assertEqual(r[:30], "re.compile('Very long long lon")
+ self.assertEqual(r[-16:], ", re.IGNORECASE)")
+
+
class ImplementationTest(unittest.TestCase):
"""
Test implementation details of the re module.
diff --git a/Misc/NEWS b/Misc/NEWS
index f6823b4..a2db433 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -68,6 +68,9 @@ Core and Builtins
Library
-------
+- Issue #13592: Improved the repr for regular expression pattern objects.
+ Based on patch by Hugo Lopes Tavares.
+
- Issue #19641: Added the audioop.byteswap() function to convert big-endian
samples to little-endian and vice versa.
diff --git a/Modules/_sre.c b/Modules/_sre.c
index c3df825..3a92db9 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -1139,6 +1139,86 @@ pattern_deepcopy(PatternObject* self, PyObject* memo)
#endif
}
+static PyObject *
+pattern_repr(PatternObject *obj)
+{
+ static const struct {
+ const char *name;
+ int value;
+ } flag_names[] = {
+ {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
+ {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
+ {"re.LOCALE", SRE_FLAG_LOCALE},
+ {"re.MULTILINE", SRE_FLAG_MULTILINE},
+ {"re.DOTALL", SRE_FLAG_DOTALL},
+ {"re.UNICODE", SRE_FLAG_UNICODE},
+ {"re.VERBOSE", SRE_FLAG_VERBOSE},
+ {"re.DEBUG", SRE_FLAG_DEBUG},
+ {"re.ASCII", SRE_FLAG_ASCII},
+ };
+ PyObject *result = NULL;
+ PyObject *flag_items;
+ int i;
+ int flags = obj->flags;
+
+ /* Omit re.UNICODE for valid string patterns. */
+ if (obj->isbytes == 0 &&
+ (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
+ SRE_FLAG_UNICODE)
+ flags &= ~SRE_FLAG_UNICODE;
+
+ flag_items = PyList_New(0);
+ if (!flag_items)
+ return NULL;
+
+ for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
+ if (flags & flag_names[i].value) {
+ PyObject *item = PyUnicode_FromString(flag_names[i].name);
+ if (!item)
+ goto done;
+
+ if (PyList_Append(flag_items, item) < 0) {
+ Py_DECREF(item);
+ goto done;
+ }
+ Py_DECREF(item);
+ flags &= ~flag_names[i].value;
+ }
+ }
+ if (flags) {
+ PyObject *item = PyUnicode_FromFormat("0x%x", flags);
+ if (!item)
+ goto done;
+
+ if (PyList_Append(flag_items, item) < 0) {
+ Py_DECREF(item);
+ goto done;
+ }
+ Py_DECREF(item);
+ }
+
+ if (PyList_Size(flag_items) > 0) {
+ PyObject *flags_result;
+ PyObject *sep = PyUnicode_FromString("|");
+ if (!sep)
+ goto done;
+ flags_result = PyUnicode_Join(sep, flag_items);
+ Py_DECREF(sep);
+ if (!flags_result)
+ goto done;
+ result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
+ obj->pattern, flags_result);
+ Py_DECREF(flags_result);
+ }
+ else {
+ result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
+ }
+
+done:
+ Py_DECREF(flag_items);
+ return result;
+}
+
PyDoc_STRVAR(pattern_match_doc,
"match(string[, pos[, endpos]]) -> match object or None.\n\
Matches zero or more characters at the beginning of the string");
@@ -1214,7 +1294,7 @@ static PyTypeObject Pattern_Type = {
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
- 0, /* tp_repr */
+ (reprfunc)pattern_repr, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h
index 13c8958..5940d5a 100644
--- a/Modules/sre_constants.h
+++ b/Modules/sre_constants.h
@@ -81,6 +81,8 @@
#define SRE_FLAG_DOTALL 16
#define SRE_FLAG_UNICODE 32
#define SRE_FLAG_VERBOSE 64
+#define SRE_FLAG_DEBUG 128
+#define SRE_FLAG_ASCII 256
#define SRE_INFO_PREFIX 1
#define SRE_INFO_LITERAL 2
#define SRE_INFO_CHARSET 4