summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-05-09 20:37:14 (GMT)
committerGitHub <noreply@github.com>2017-05-09 20:37:14 (GMT)
commit6d336a027913327fc042b0d758a16724fea27b9c (patch)
treeca511a6c75e340ef3493674b791f05a692e0c9e2 /Modules
parentf93234bb8a87855f295d441524e519481ce6ab13 (diff)
downloadcpython-6d336a027913327fc042b0d758a16724fea27b9c.zip
cpython-6d336a027913327fc042b0d758a16724fea27b9c.tar.gz
cpython-6d336a027913327fc042b0d758a16724fea27b9c.tar.bz2
bpo-30285: Optimize case-insensitive matching and searching (#1482)
of regular expressions.
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_sre.c34
-rw-r--r--Modules/clinic/_sre.c.h64
2 files changed, 97 insertions, 1 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c
index a86c5f2..6873f1d 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -274,6 +274,38 @@ _sre_getcodesize_impl(PyObject *module)
}
/*[clinic input]
+_sre.ascii_iscased -> bool
+
+ character: int
+ /
+
+[clinic start generated code]*/
+
+static int
+_sre_ascii_iscased_impl(PyObject *module, int character)
+/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
+{
+ unsigned int ch = (unsigned int)character;
+ return ch != sre_lower(ch) || ch != sre_upper(ch);
+}
+
+/*[clinic input]
+_sre.unicode_iscased -> bool
+
+ character: int
+ /
+
+[clinic start generated code]*/
+
+static int
+_sre_unicode_iscased_impl(PyObject *module, int character)
+/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/
+{
+ unsigned int ch = (unsigned int)character;
+ return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch);
+}
+
+/*[clinic input]
_sre.ascii_tolower -> int
character: int
@@ -2750,6 +2782,8 @@ static PyTypeObject Scanner_Type = {
static PyMethodDef _functions[] = {
_SRE_COMPILE_METHODDEF
_SRE_GETCODESIZE_METHODDEF
+ _SRE_ASCII_ISCASED_METHODDEF
+ _SRE_UNICODE_ISCASED_METHODDEF
_SRE_ASCII_TOLOWER_METHODDEF
_SRE_UNICODE_TOLOWER_METHODDEF
{NULL, NULL}
diff --git a/Modules/clinic/_sre.c.h b/Modules/clinic/_sre.c.h
index 8056eda..1e60686 100644
--- a/Modules/clinic/_sre.c.h
+++ b/Modules/clinic/_sre.c.h
@@ -29,6 +29,68 @@ exit:
return return_value;
}
+PyDoc_STRVAR(_sre_ascii_iscased__doc__,
+"ascii_iscased($module, character, /)\n"
+"--\n"
+"\n");
+
+#define _SRE_ASCII_ISCASED_METHODDEF \
+ {"ascii_iscased", (PyCFunction)_sre_ascii_iscased, METH_O, _sre_ascii_iscased__doc__},
+
+static int
+_sre_ascii_iscased_impl(PyObject *module, int character);
+
+static PyObject *
+_sre_ascii_iscased(PyObject *module, PyObject *arg)
+{
+ PyObject *return_value = NULL;
+ int character;
+ int _return_value;
+
+ if (!PyArg_Parse(arg, "i:ascii_iscased", &character)) {
+ goto exit;
+ }
+ _return_value = _sre_ascii_iscased_impl(module, character);
+ if ((_return_value == -1) && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_sre_unicode_iscased__doc__,
+"unicode_iscased($module, character, /)\n"
+"--\n"
+"\n");
+
+#define _SRE_UNICODE_ISCASED_METHODDEF \
+ {"unicode_iscased", (PyCFunction)_sre_unicode_iscased, METH_O, _sre_unicode_iscased__doc__},
+
+static int
+_sre_unicode_iscased_impl(PyObject *module, int character);
+
+static PyObject *
+_sre_unicode_iscased(PyObject *module, PyObject *arg)
+{
+ PyObject *return_value = NULL;
+ int character;
+ int _return_value;
+
+ if (!PyArg_Parse(arg, "i:unicode_iscased", &character)) {
+ goto exit;
+ }
+ _return_value = _sre_unicode_iscased_impl(module, character);
+ if ((_return_value == -1) && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = PyBool_FromLong((long)_return_value);
+
+exit:
+ return return_value;
+}
+
PyDoc_STRVAR(_sre_ascii_tolower__doc__,
"ascii_tolower($module, character, /)\n"
"--\n"
@@ -715,4 +777,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
{
return _sre_SRE_Scanner_search_impl(self);
}
-/*[clinic end generated code: output=811e67d7f8f5052e input=a9049054013a1b77]*/
+/*[clinic end generated code: output=5fe47c49e475cccb input=a9049054013a1b77]*/