summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2016-11-21 15:35:08 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2016-11-21 15:35:08 (GMT)
commitb44fb128ae5d9562f00a944e2d22392235073a69 (patch)
treea02c44010193373a6766920250f2bfbd1af03f21
parenta2f7ee8b26e00124c0587932364b26635af84c45 (diff)
downloadcpython-b44fb128ae5d9562f00a944e2d22392235073a69.zip
cpython-b44fb128ae5d9562f00a944e2d22392235073a69.tar.gz
cpython-b44fb128ae5d9562f00a944e2d22392235073a69.tar.bz2
Implement rich comparison for _sre.SRE_Pattern
Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created by re.compile(), become comparable (only x==y and x!=y operators). This change should fix the issue #18383: don't duplicate warning filters when the warnings module is reloaded (thing usually only done in unit tests).
-rw-r--r--Lib/test/test_re.py47
-rw-r--r--Misc/NEWS7
-rw-r--r--Modules/_sre.c73
3 files changed, 118 insertions, 9 deletions
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index aac3a2c..4fcd2d4 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -3,12 +3,13 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
import io
import locale
import re
-from re import Scanner
import sre_compile
-import sys
import string
+import sys
import traceback
import unittest
+import warnings
+from re import Scanner
from weakref import proxy
# Misc tests from Tim Peters' re.doc
@@ -1777,6 +1778,48 @@ SUBPATTERN None 0 0
self.assertIn('ASCII', str(re.A))
self.assertIn('DOTALL', str(re.S))
+ def test_pattern_compare(self):
+ pattern1 = re.compile('abc', re.IGNORECASE)
+
+ # equal
+ re.purge()
+ pattern2 = re.compile('abc', re.IGNORECASE)
+ self.assertEqual(hash(pattern2), hash(pattern1))
+ self.assertEqual(pattern2, pattern1)
+
+ # not equal: different pattern
+ re.purge()
+ pattern3 = re.compile('XYZ', re.IGNORECASE)
+ # Don't test hash(pattern3) != hash(pattern1) because there is no
+ # warranty that hash values are different
+ self.assertNotEqual(pattern3, pattern1)
+
+ # not equal: different flag (flags=0)
+ re.purge()
+ pattern4 = re.compile('abc')
+ self.assertNotEqual(pattern4, pattern1)
+
+ # only == and != comparison operators are supported
+ with self.assertRaises(TypeError):
+ pattern1 < pattern2
+
+ def test_pattern_compare_bytes(self):
+ pattern1 = re.compile(b'abc')
+
+ # equal: test bytes patterns
+ re.purge()
+ pattern2 = re.compile(b'abc')
+ self.assertEqual(hash(pattern2), hash(pattern1))
+ self.assertEqual(pattern2, pattern1)
+
+ # not equal: pattern of a different types (str vs bytes),
+ # comparison must not raise a BytesWarning
+ re.purge()
+ pattern3 = re.compile('abc')
+ with warnings.catch_warnings():
+ warnings.simplefilter('error', BytesWarning)
+ self.assertNotEqual(pattern3, pattern1)
+
class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):
diff --git a/Misc/NEWS b/Misc/NEWS
index 7ba6b42..ab846a6 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -42,6 +42,11 @@ Core and Builtins
Library
-------
+- Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created
+ by re.compile(), become comparable (only x==y and x!=y operators). This
+ change should fix the issue #18383: don't duplicate warning filters when the
+ warnings module is reloaded (thing usually only done in unit tests).
+
- Issue #20572: The subprocess.Popen.wait method's undocumented
endtime parameter now raises a DeprecationWarning.
@@ -77,7 +82,7 @@ Library
- Issue #28703: Fix asyncio.iscoroutinefunction to handle Mock objects.
-- Issue #28704: Fix create_unix_server to support Path-like objects
+- Issue #28704: Fix create_unix_server to support Path-like objects
(PEP 519).
- Issue #28720: Add collections.abc.AsyncGenerator.
diff --git a/Modules/_sre.c b/Modules/_sre.c
index 69c7bc0..c1e9fa6 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -1506,14 +1506,12 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
self->groups = groups;
- Py_XINCREF(groupindex);
+ Py_INCREF(groupindex);
self->groupindex = groupindex;
- Py_XINCREF(indexgroup);
+ Py_INCREF(indexgroup);
self->indexgroup = indexgroup;
- self->weakreflist = NULL;
-
if (!_validate(self)) {
Py_DECREF(self);
return NULL;
@@ -2649,6 +2647,69 @@ pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_
return (PyObject*) scanner;
}
+static Py_hash_t
+pattern_hash(PatternObject *self)
+{
+ Py_hash_t hash, hash2;
+
+ hash = PyObject_Hash(self->pattern);
+ if (hash == -1) {
+ return -1;
+ }
+
+ hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
+ hash ^= hash2;
+
+ hash ^= self->flags;
+ hash ^= self->isbytes;
+ hash ^= self->codesize;
+
+ if (hash == -1) {
+ hash = -2;
+ }
+ return hash;
+}
+
+static PyObject*
+pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
+{
+ PatternObject *left, *right;
+ int cmp;
+
+ if (op != Py_EQ && op != Py_NE) {
+ Py_RETURN_NOTIMPLEMENTED;
+ }
+
+ if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
+ Py_RETURN_NOTIMPLEMENTED;
+ }
+ left = (PatternObject *)lefto;
+ right = (PatternObject *)righto;
+
+ cmp = (left->flags == right->flags
+ && left->isbytes == right->isbytes
+ && left->codesize && right->codesize);
+ if (cmp) {
+ /* Compare the code and the pattern because the same pattern can
+ produce different codes depending on the locale used to compile the
+ pattern when the re.LOCALE flag is used. Don't compare groups,
+ indexgroup nor groupindex: they are derivated from the pattern. */
+ cmp = (memcmp(left->code, right->code,
+ sizeof(left->code[0]) * left->codesize) == 0);
+ }
+ if (cmp) {
+ cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
+ Py_EQ);
+ if (cmp < 0) {
+ return NULL;
+ }
+ }
+ if (op == Py_NE) {
+ cmp = !cmp;
+ }
+ return PyBool_FromLong(cmp);
+}
+
#include "clinic/_sre.c.h"
static PyMethodDef pattern_methods[] = {
@@ -2693,7 +2754,7 @@ static PyTypeObject Pattern_Type = {
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
- 0, /* tp_hash */
+ (hashfunc)pattern_hash, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
@@ -2703,7 +2764,7 @@ static PyTypeObject Pattern_Type = {
pattern_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
- 0, /* tp_richcompare */
+ pattern_richcompare, /* tp_richcompare */
offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */