summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2017-05-05 07:42:46 (GMT)
committerGitHub <noreply@github.com>2017-05-05 07:42:46 (GMT)
commit7186cc29be352bed6f1110873283d073fd0643e4 (patch)
tree62b9aa53a22754f7e95cbde603643b8d33df6d07
parent76a3e51a403bc84ed536921866c86dd7d07aaa7e (diff)
downloadcpython-7186cc29be352bed6f1110873283d073fd0643e4.zip
cpython-7186cc29be352bed6f1110873283d073fd0643e4.tar.gz
cpython-7186cc29be352bed6f1110873283d073fd0643e4.tar.bz2
bpo-30277: Replace _sre.getlower() with _sre.ascii_tolower() and _sre.unicode_tolower(). (#1468)
-rw-r--r--Lib/sre_compile.py23
-rw-r--r--Lib/test/test_re.py26
-rw-r--r--Modules/_sre.c29
-rw-r--r--Modules/clinic/_sre.c.h49
4 files changed, 83 insertions, 44 deletions
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index d7ee4e8..db8b8a2 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -69,13 +69,14 @@ def _compile(code, pattern, flags):
REPEATING_CODES = _REPEATING_CODES
SUCCESS_CODES = _SUCCESS_CODES
ASSERT_CODES = _ASSERT_CODES
- if (flags & SRE_FLAG_IGNORECASE and
- not (flags & SRE_FLAG_LOCALE) and
- flags & SRE_FLAG_UNICODE and
- not (flags & SRE_FLAG_ASCII)):
- fixes = _ignorecase_fixes
- else:
- fixes = None
+ tolower = None
+ fixes = None
+ if flags & SRE_FLAG_IGNORECASE and not flags & SRE_FLAG_LOCALE:
+ if flags & SRE_FLAG_UNICODE and not flags & SRE_FLAG_ASCII:
+ tolower = _sre.unicode_tolower
+ fixes = _ignorecase_fixes
+ else:
+ tolower = _sre.ascii_tolower
for op, av in pattern:
if op in LITERAL_CODES:
if not flags & SRE_FLAG_IGNORECASE:
@@ -85,7 +86,7 @@ def _compile(code, pattern, flags):
emit(OP_LOC_IGNORE[op])
emit(av)
else:
- lo = _sre.getlower(av, flags)
+ lo = tolower(av)
if fixes and lo in fixes:
emit(IN_IGNORE)
skip = _len(code); emit(0)
@@ -102,16 +103,12 @@ def _compile(code, pattern, flags):
elif op is IN:
if not flags & SRE_FLAG_IGNORECASE:
emit(op)
- fixup = None
elif flags & SRE_FLAG_LOCALE:
emit(IN_LOC_IGNORE)
- fixup = None
else:
emit(IN_IGNORE)
- def fixup(literal, flags=flags):
- return _sre.getlower(literal, flags)
skip = _len(code); emit(0)
- _compile_charset(av, flags, code, fixup, fixes)
+ _compile_charset(av, flags, code, tolower, fixes)
code[skip] = _len(code) - skip
elif op is ANY:
if flags & SRE_FLAG_DOTALL:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 7601dc8..b5b7cff 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -883,17 +883,23 @@ class ReTests(unittest.TestCase):
def test_category(self):
self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
- def test_getlower(self):
+ @cpython_only
+ def test_case_helpers(self):
import _sre
- self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
- self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
- self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
- self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a'))
-
- self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
- self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC")
- self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC")
- self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC")
+ for i in range(128):
+ c = chr(i)
+ lo = ord(c.lower())
+ self.assertEqual(_sre.ascii_tolower(i), lo)
+ self.assertEqual(_sre.unicode_tolower(i), lo)
+
+ for i in list(range(128, 0x1000)) + [0x10400, 0x10428]:
+ c = chr(i)
+ self.assertEqual(_sre.ascii_tolower(i), i)
+ if i != 0x0130:
+ self.assertEqual(_sre.unicode_tolower(i), ord(c.lower()))
+
+ self.assertEqual(_sre.ascii_tolower(0x0130), 0x0130)
+ self.assertEqual(_sre.unicode_tolower(0x0130), ord('i'))
def test_not_literal(self):
self.assertEqual(re.search(r"\s([^a])", " b").group(1), "b")
diff --git a/Modules/_sre.c b/Modules/_sre.c
index afb2bce..a86c5f2 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -274,25 +274,35 @@ _sre_getcodesize_impl(PyObject *module)
}
/*[clinic input]
-_sre.getlower -> int
+_sre.ascii_tolower -> int
character: int
- flags: int
/
[clinic start generated code]*/
static int
-_sre_getlower_impl(PyObject *module, int character, int flags)
-/*[clinic end generated code: output=47eebc4c1214feb5 input=087d2f1c44bbca6f]*/
+_sre_ascii_tolower_impl(PyObject *module, int character)
+/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
{
- if (flags & SRE_FLAG_LOCALE)
- return sre_lower_locale(character);
- if (flags & SRE_FLAG_UNICODE)
- return sre_lower_unicode(character);
return sre_lower(character);
}
+/*[clinic input]
+_sre.unicode_tolower -> int
+
+ character: int
+ /
+
+[clinic start generated code]*/
+
+static int
+_sre_unicode_tolower_impl(PyObject *module, int character)
+/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/
+{
+ return sre_lower_unicode(character);
+}
+
LOCAL(void)
state_reset(SRE_STATE* state)
{
@@ -2740,7 +2750,8 @@ static PyTypeObject Scanner_Type = {
static PyMethodDef _functions[] = {
_SRE_COMPILE_METHODDEF
_SRE_GETCODESIZE_METHODDEF
- _SRE_GETLOWER_METHODDEF
+ _SRE_ASCII_TOLOWER_METHODDEF
+ _SRE_UNICODE_TOLOWER_METHODDEF
{NULL, NULL}
};
diff --git a/Modules/clinic/_sre.c.h b/Modules/clinic/_sre.c.h
index 5278323..8056eda 100644
--- a/Modules/clinic/_sre.c.h
+++ b/Modules/clinic/_sre.c.h
@@ -29,34 +29,59 @@ exit:
return return_value;
}
-PyDoc_STRVAR(_sre_getlower__doc__,
-"getlower($module, character, flags, /)\n"
+PyDoc_STRVAR(_sre_ascii_tolower__doc__,
+"ascii_tolower($module, character, /)\n"
"--\n"
"\n");
-#define _SRE_GETLOWER_METHODDEF \
- {"getlower", (PyCFunction)_sre_getlower, METH_FASTCALL, _sre_getlower__doc__},
+#define _SRE_ASCII_TOLOWER_METHODDEF \
+ {"ascii_tolower", (PyCFunction)_sre_ascii_tolower, METH_O, _sre_ascii_tolower__doc__},
static int
-_sre_getlower_impl(PyObject *module, int character, int flags);
+_sre_ascii_tolower_impl(PyObject *module, int character);
static PyObject *
-_sre_getlower(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
+_sre_ascii_tolower(PyObject *module, PyObject *arg)
{
PyObject *return_value = NULL;
int character;
- int flags;
int _return_value;
- if (!_PyArg_ParseStack(args, nargs, "ii:getlower",
- &character, &flags)) {
+ if (!PyArg_Parse(arg, "i:ascii_tolower", &character)) {
+ goto exit;
+ }
+ _return_value = _sre_ascii_tolower_impl(module, character);
+ if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
+ return_value = PyLong_FromLong((long)_return_value);
+
+exit:
+ return return_value;
+}
+
+PyDoc_STRVAR(_sre_unicode_tolower__doc__,
+"unicode_tolower($module, character, /)\n"
+"--\n"
+"\n");
+
+#define _SRE_UNICODE_TOLOWER_METHODDEF \
+ {"unicode_tolower", (PyCFunction)_sre_unicode_tolower, METH_O, _sre_unicode_tolower__doc__},
+
+static int
+_sre_unicode_tolower_impl(PyObject *module, int character);
+
+static PyObject *
+_sre_unicode_tolower(PyObject *module, PyObject *arg)
+{
+ PyObject *return_value = NULL;
+ int character;
+ int _return_value;
- if (!_PyArg_NoStackKeywords("getlower", kwnames)) {
+ if (!PyArg_Parse(arg, "i:unicode_tolower", &character)) {
goto exit;
}
- _return_value = _sre_getlower_impl(module, character, flags);
+ _return_value = _sre_unicode_tolower_impl(module, character);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
@@ -690,4 +715,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyObject *Py_UNUSED(ignored))
{
return _sre_SRE_Scanner_search_impl(self);
}
-/*[clinic end generated code: output=e6dab3ba8864da9e input=a9049054013a1b77]*/
+/*[clinic end generated code: output=811e67d7f8f5052e input=a9049054013a1b77]*/