From a49ac9902903a798fab4970ccf563c531199c3f8 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sat, 27 Jan 2018 14:06:21 +0900 Subject: bpo-32677: Add .isascii() to str, bytes and bytearray (GH-5342) --- Doc/library/stdtypes.rst | 19 ++++++++++++++++++ Include/bytes_methods.h | 2 ++ Lib/collections/__init__.py | 1 + Lib/test/string_tests.py | 8 ++++++++ Lib/test/test_doctest.py | 2 +- Lib/test/test_unicode.py | 5 +++++ .../2018-01-26-20-11-09.bpo-32677.xTGfCq.rst | 2 ++ Objects/bytearrayobject.c | 2 ++ Objects/bytes_methods.c | 20 +++++++++++++++++++ Objects/bytesobject.c | 2 ++ Objects/clinic/unicodeobject.c.h | 23 +++++++++++++++++++++- Objects/stringlib/ctype.h | 6 ++++++ Objects/unicodeobject.c | 20 +++++++++++++++++++ 13 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-01-26-20-11-09.bpo-32677.xTGfCq.rst diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 120b0d3..ad7f578 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1653,6 +1653,15 @@ expression support in the :mod:`re` module). from the "Alphabetic" property defined in the Unicode Standard. +.. method:: str.isascii() + + Return true if the string is empty or all characters in the string are ASCII, + false otherwise. + ASCII characters have code points in the range U+0000-U+007F. + + .. versionadded:: 3.7 + + .. method:: str.isdecimal() Return true if all characters in the string are decimal @@ -2941,6 +2950,16 @@ place, and instead produce new objects. False +.. method:: bytes.isascii() + bytearray.isascii() + + Return true if the sequence is empty or all bytes in the sequence are ASCII, + false otherwise. + ASCII bytes are in the range 0-0x7F. + + .. versionadded:: 3.7 + + .. method:: bytes.isdigit() bytearray.isdigit() diff --git a/Include/bytes_methods.h b/Include/bytes_methods.h index 7fa7540..8434a50 100644 --- a/Include/bytes_methods.h +++ b/Include/bytes_methods.h @@ -9,6 +9,7 @@ extern PyObject* _Py_bytes_isspace(const char *cptr, Py_ssize_t len); extern PyObject* _Py_bytes_isalpha(const char *cptr, Py_ssize_t len); extern PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isascii(const char *cptr, Py_ssize_t len); extern PyObject* _Py_bytes_isdigit(const char *cptr, Py_ssize_t len); extern PyObject* _Py_bytes_islower(const char *cptr, Py_ssize_t len); extern PyObject* _Py_bytes_isupper(const char *cptr, Py_ssize_t len); @@ -37,6 +38,7 @@ extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to); extern const char _Py_isspace__doc__[]; extern const char _Py_isalpha__doc__[]; extern const char _Py_isalnum__doc__[]; +extern const char _Py_isascii__doc__[]; extern const char _Py_isdigit__doc__[]; extern const char _Py_islower__doc__[]; extern const char _Py_isupper__doc__[]; diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 7088b88..21d91fd 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -1214,6 +1214,7 @@ class UserString(Sequence): return self.data.index(sub, start, end) def isalpha(self): return self.data.isalpha() def isalnum(self): return self.data.isalnum() + def isascii(self): return self.data.isascii() def isdecimal(self): return self.data.isdecimal() def isdigit(self): return self.data.isdigit() def isidentifier(self): return self.data.isidentifier() diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py index cd3ee48..4be1d21 100644 --- a/Lib/test/string_tests.py +++ b/Lib/test/string_tests.py @@ -909,6 +909,14 @@ class BaseTest: self.checkequal(False, 'abc\n', 'isalnum') self.checkraises(TypeError, 'abc', 'isalnum', 42) + def test_isascii(self): + self.checkequal(True, '', 'isascii') + self.checkequal(True, '\x00', 'isascii') + self.checkequal(True, '\x7f', 'isascii') + self.checkequal(True, '\x00\x7f', 'isascii') + self.checkequal(False, '\x80', 'isascii') + self.checkequal(False, '\xe9', 'isascii') + def test_isdigit(self): self.checkequal(False, '', 'isdigit') self.checkequal(False, 'a', 'isdigit') diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py index 2258c6b..5ad94ab 100644 --- a/Lib/test/test_doctest.py +++ b/Lib/test/test_doctest.py @@ -659,7 +659,7 @@ plain ol' Python and is guaranteed to be available. >>> import builtins >>> tests = doctest.DocTestFinder().find(builtins) - >>> 790 < len(tests) < 810 # approximate number of objects with docstrings + >>> 800 < len(tests) < 820 # approximate number of objects with docstrings True >>> real_tests = [t for t in tests if len(t.examples) > 0] >>> len(real_tests) # objects that actually have doctests diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 2b77863..3cc018c 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -638,6 +638,11 @@ class UnicodeTest(string_tests.CommonTest, self.assertFalse('\U0001F40D'.isalpha()) self.assertFalse('\U0001F46F'.isalpha()) + def test_isascii(self): + super().test_isascii() + self.assertFalse("\u20ac".isascii()) + self.assertFalse("\U0010ffff".isascii()) + def test_isdecimal(self): self.checkequalnofix(False, '', 'isdecimal') self.checkequalnofix(False, 'a', 'isdecimal') diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-01-26-20-11-09.bpo-32677.xTGfCq.rst b/Misc/NEWS.d/next/Core and Builtins/2018-01-26-20-11-09.bpo-32677.xTGfCq.rst new file mode 100644 index 0000000..947c74f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-01-26-20-11-09.bpo-32677.xTGfCq.rst @@ -0,0 +1,2 @@ +Add ``.isascii()`` method to ``str``, ``bytes`` and ``bytearray``. +It can be used to test that string contains only ASCII characters. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index dc1515a..692b7be 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2159,6 +2159,8 @@ bytearray_methods[] = { _Py_isalnum__doc__}, {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS, _Py_isalpha__doc__}, + {"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS, + _Py_isascii__doc__}, {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS, _Py_isdigit__doc__}, {"islower", (PyCFunction)stringlib_islower, METH_NOARGS, diff --git a/Objects/bytes_methods.c b/Objects/bytes_methods.c index bd79773..149650f 100644 --- a/Objects/bytes_methods.c +++ b/Objects/bytes_methods.c @@ -92,6 +92,26 @@ _Py_bytes_isalnum(const char *cptr, Py_ssize_t len) } +PyDoc_STRVAR_shared(_Py_isascii__doc__, +"B.isascii() -> bool\n\ +\n\ +Return True if B is empty or all characters in B are ASCII,\n\ +False otherwise."); + +PyObject* +_Py_bytes_isascii(const char *cptr, Py_ssize_t len) +{ + const unsigned char *p = (unsigned char *) cptr; + const unsigned char *e = p + len; + for (; p < e; p++) { + if (*p >= 128) { + Py_RETURN_FALSE; + } + } + Py_RETURN_TRUE; +} + + PyDoc_STRVAR_shared(_Py_isdigit__doc__, "B.isdigit() -> bool\n\ \n\ diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index a921d9c..c358756 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2459,6 +2459,8 @@ bytes_methods[] = { _Py_isalnum__doc__}, {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS, _Py_isalpha__doc__}, + {"isascii", (PyCFunction)stringlib_isascii, METH_NOARGS, + _Py_isascii__doc__}, {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS, _Py_isdigit__doc__}, {"islower", (PyCFunction)stringlib_islower, METH_NOARGS, diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 643ef04..8072516 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -165,6 +165,27 @@ exit: return return_value; } +PyDoc_STRVAR(unicode_isascii__doc__, +"isascii($self, /)\n" +"--\n" +"\n" +"Return True if all characters in the string are ASCII, False otherwise.\n" +"\n" +"ASCII characters have code points in the range U+0000-U+007F.\n" +"Empty string is ASCII too."); + +#define UNICODE_ISASCII_METHODDEF \ + {"isascii", (PyCFunction)unicode_isascii, METH_NOARGS, unicode_isascii__doc__}, + +static PyObject * +unicode_isascii_impl(PyObject *self); + +static PyObject * +unicode_isascii(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return unicode_isascii_impl(self); +} + PyDoc_STRVAR(unicode_islower__doc__, "islower($self, /)\n" "--\n" @@ -930,4 +951,4 @@ unicode_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) { return unicode_sizeof_impl(self); } -/*[clinic end generated code: output=1ad4e81b68194264 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=561c88c912b8fe3b input=a9049054013a1b77]*/ diff --git a/Objects/stringlib/ctype.h b/Objects/stringlib/ctype.h index f054625..fd7b1bd 100644 --- a/Objects/stringlib/ctype.h +++ b/Objects/stringlib/ctype.h @@ -23,6 +23,12 @@ stringlib_isalnum(PyObject *self) } static PyObject* +stringlib_isascii(PyObject *self) +{ + return _Py_bytes_isascii(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + +static PyObject* stringlib_isdigit(PyObject *self) { return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self)); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0733011..4b90cc3 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -11612,6 +11612,25 @@ unicode_index(PyObject *self, PyObject *args) } /*[clinic input] +str.isascii as unicode_isascii + +Return True if all characters in the string are ASCII, False otherwise. + +ASCII characters have code points in the range U+0000-U+007F. +Empty string is ASCII too. +[clinic start generated code]*/ + +static PyObject * +unicode_isascii_impl(PyObject *self) +/*[clinic end generated code: output=c5910d64b5a8003f input=5a43cbc6399621d5]*/ +{ + if (PyUnicode_READY(self) == -1) { + return NULL; + } + return PyBool_FromLong(PyUnicode_IS_ASCII(self)); +} + +/*[clinic input] str.islower as unicode_islower Return True if the string is a lowercase string, False otherwise. @@ -13801,6 +13820,7 @@ static PyMethodDef unicode_methods[] = { UNICODE_UPPER_METHODDEF {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__}, {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__}, + UNICODE_ISASCII_METHODDEF UNICODE_ISLOWER_METHODDEF UNICODE_ISUPPER_METHODDEF UNICODE_ISTITLE_METHODDEF -- cgit v0.12