diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2017-05-09 20:37:14 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-09 20:37:14 (GMT) |
commit | 6d336a027913327fc042b0d758a16724fea27b9c (patch) | |
tree | ca511a6c75e340ef3493674b791f05a692e0c9e2 /Modules | |
parent | f93234bb8a87855f295d441524e519481ce6ab13 (diff) | |
download | cpython-6d336a027913327fc042b0d758a16724fea27b9c.zip cpython-6d336a027913327fc042b0d758a16724fea27b9c.tar.gz cpython-6d336a027913327fc042b0d758a16724fea27b9c.tar.bz2 |
bpo-30285: Optimize case-insensitive matching and searching (#1482)
of regular expressions.
Diffstat (limited to 'Modules')
-rw-r--r-- | Modules/_sre.c | 34 | ||||
-rw-r--r-- | Modules/clinic/_sre.c.h | 64 |
2 files changed, 97 insertions, 1 deletions
diff --git a/Modules/_sre.c b/Modules/_sre.c index a86c5f2..6873f1d 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -274,6 +274,38 @@ _sre_getcodesize_impl(PyObject *module) } /*[clinic input] +_sre.ascii_iscased -> bool + + character: int + / + +[clinic start generated code]*/ + +static int +_sre_ascii_iscased_impl(PyObject *module, int character) +/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/ +{ + unsigned int ch = (unsigned int)character; + return ch != sre_lower(ch) || ch != sre_upper(ch); +} + +/*[clinic input] +_sre.unicode_iscased -> bool + + character: int + / + +[clinic start generated code]*/ + +static int +_sre_unicode_iscased_impl(PyObject *module, int character) +/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/ +{ + unsigned int ch = (unsigned int)character; + return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch); +} + +/*[clinic input] _sre.ascii_tolower -> int character: int @@ -2750,6 +2782,8 @@ static PyTypeObject Scanner_Type = { static PyMethodDef _functions[] = { _SRE_COMPILE_METHODDEF _SRE_GETCODESIZE_METHODDEF + _SRE_ASCII_ISCASED_METHODDEF + _SRE_UNICODE_ISCASED_METHODDEF _SRE_ASCII_TOLOWER_METHODDEF _SRE_UNICODE_TOLOWER_METHODDEF {NULL, NULL} diff --git a/Modules/clinic/_sre.c.h b/Modules/clinic/_sre.c.h index 8056eda..1e60686 100644 --- a/Modules/clinic/_sre.c.h +++ b/Modules/clinic/_sre.c.h @@ -29,6 +29,68 @@ exit: return return_value; } +PyDoc_STRVAR(_sre_ascii_iscased__doc__, +"ascii_iscased($module, character, /)\n" +"--\n" +"\n"); + +#define _SRE_ASCII_ISCASED_METHODDEF \ + {"ascii_iscased", (PyCFunction)_sre_ascii_iscased, METH_O, _sre_ascii_iscased__doc__}, + +static int +_sre_ascii_iscased_impl(PyObject *module, int character); + +static PyObject * +_sre_ascii_iscased(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int character; + int _return_value; + + if (!PyArg_Parse(arg, "i:ascii_iscased", &character)) { + goto exit; + } + _return_value = _sre_ascii_iscased_impl(module, character); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + +PyDoc_STRVAR(_sre_unicode_iscased__doc__, +"unicode_iscased($module, character, /)\n" +"--\n" +"\n"); + +#define _SRE_UNICODE_ISCASED_METHODDEF \ + {"unicode_iscased", (PyCFunction)_sre_unicode_iscased, METH_O, _sre_unicode_iscased__doc__}, + +static int +_sre_unicode_iscased_impl(PyObject *module, int character); + +static PyObject * +_sre_unicode_iscased(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int character; + int _return_value; + + if (!PyArg_Parse(arg, "i:unicode_iscased", &character)) { + goto exit; + } + _return_value = _sre_unicode_iscased_impl(module, character); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyBool_FromLong((long)_return_value); + +exit: + return return_value; +} + PyDoc_STRVAR(_sre_ascii_tolower__doc__, "ascii_tolower($module, character, /)\n" "--\n" @@ -715,4 +777,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored)) { return _sre_SRE_Scanner_search_impl(self); } -/*[clinic end generated code: output=811e67d7f8f5052e input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5fe47c49e475cccb input=a9049054013a1b77]*/ |