From 5269c091458c5ea76eb625e4fabc9980b6309266 Mon Sep 17 00:00:00 2001 From: Erlend Egeberg Aasland Date: Thu, 29 Jul 2021 09:47:56 +0200 Subject: bpo-44688: Remove ASCII limitation from `sqlite3` collation names (GH-27395) --- Doc/library/sqlite3.rst | 4 ++ Doc/whatsnew/3.11.rst | 5 ++ Lib/sqlite3/test/hooks.py | 3 +- Lib/sqlite3/test/regression.py | 2 +- .../2021-07-20-23-28-26.bpo-44688.buFgz3.rst | 2 + Modules/_sqlite/clinic/connection.c.h | 16 ++++-- Modules/_sqlite/connection.c | 63 ++++------------------ 7 files changed, 35 insertions(+), 60 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-07-20-23-28-26.bpo-44688.buFgz3.rst diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index 05064e4..6399bed 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -402,6 +402,10 @@ Connection Objects con.create_collation("reverse", None) + .. versionchanged:: 3.11 + The collation name can contain any Unicode character. Earlier, only + ASCII characters were allowed. + .. method:: interrupt() diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index b29d0cb..e97162a 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -213,6 +213,11 @@ sqlite3 :meth:`~sqlite3.Connection.set_authorizer`. (Contributed by Erlend E. Aasland in :issue:`44491`.) +* Collation name :meth:`~sqlite3.Connection.create_collation` can now + contain any Unicode character. Collation names with invalid characters + now raise :exc:`UnicodeEncodeError` instead of :exc:`sqlite3.ProgrammingError`. + (Contributed by Erlend E. Aasland in :issue:`44688`.) + Removed ======= diff --git a/Lib/sqlite3/test/hooks.py b/Lib/sqlite3/test/hooks.py index 520a5b9..1be6d38 100644 --- a/Lib/sqlite3/test/hooks.py +++ b/Lib/sqlite3/test/hooks.py @@ -40,8 +40,7 @@ class CollationTests(unittest.TestCase): def test_create_collation_not_ascii(self): con = sqlite.connect(":memory:") - with self.assertRaises(sqlite.ProgrammingError): - con.create_collation("collä", lambda x, y: (x > y) - (x < y)) + con.create_collation("collä", lambda x, y: (x > y) - (x < y)) def test_create_collation_bad_upper(self): class BadUpperStr(str): diff --git a/Lib/sqlite3/test/regression.py b/Lib/sqlite3/test/regression.py index 417a531..6c093d7 100644 --- a/Lib/sqlite3/test/regression.py +++ b/Lib/sqlite3/test/regression.py @@ -278,7 +278,7 @@ class RegressionTests(unittest.TestCase): def test_collation(self): def collation_cb(a, b): return 1 - self.assertRaises(sqlite.ProgrammingError, self.con.create_collation, + self.assertRaises(UnicodeEncodeError, self.con.create_collation, # Lone surrogate cannot be encoded to the default encoding (utf8) "\uDC80", collation_cb) diff --git a/Misc/NEWS.d/next/Library/2021-07-20-23-28-26.bpo-44688.buFgz3.rst b/Misc/NEWS.d/next/Library/2021-07-20-23-28-26.bpo-44688.buFgz3.rst new file mode 100644 index 0000000..15f6a52 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-07-20-23-28-26.bpo-44688.buFgz3.rst @@ -0,0 +1,2 @@ +:meth:`sqlite3.Connection.create_collation` now accepts non-ASCII collation +names. Patch by Erlend E. Aasland. diff --git a/Modules/_sqlite/clinic/connection.c.h b/Modules/_sqlite/clinic/connection.c.h index ec0a43a..1626e1c 100644 --- a/Modules/_sqlite/clinic/connection.c.h +++ b/Modules/_sqlite/clinic/connection.c.h @@ -722,13 +722,14 @@ PyDoc_STRVAR(pysqlite_connection_create_collation__doc__, static PyObject * pysqlite_connection_create_collation_impl(pysqlite_Connection *self, - PyObject *name, PyObject *callable); + const char *name, + PyObject *callable); static PyObject * pysqlite_connection_create_collation(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - PyObject *name; + const char *name; PyObject *callable; if (!_PyArg_CheckPositional("create_collation", nargs, 2, 2)) { @@ -738,10 +739,15 @@ pysqlite_connection_create_collation(pysqlite_Connection *self, PyObject *const _PyArg_BadArgument("create_collation", "argument 1", "str", args[0]); goto exit; } - if (PyUnicode_READY(args[0]) == -1) { + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); + if (name == NULL) { + goto exit; + } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); goto exit; } - name = args[0]; callable = args[1]; return_value = pysqlite_connection_create_collation_impl(self, name, callable); @@ -811,4 +817,4 @@ exit: #ifndef PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF #define PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF #endif /* !defined(PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF) */ -/*[clinic end generated code: output=30f11f2d8f09bdf0 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a7a899c4e41381ac input=a9049054013a1b77]*/ diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c index af093c3..85b666a 100644 --- a/Modules/_sqlite/connection.c +++ b/Modules/_sqlite/connection.c @@ -1720,7 +1720,7 @@ pysqlite_connection_backup_impl(pysqlite_Connection *self, /*[clinic input] _sqlite3.Connection.create_collation as pysqlite_connection_create_collation - name: unicode + name: str callback as callable: object / @@ -1729,61 +1729,26 @@ Creates a collation function. Non-standard. static PyObject * pysqlite_connection_create_collation_impl(pysqlite_Connection *self, - PyObject *name, PyObject *callable) -/*[clinic end generated code: output=0f63b8995565ae22 input=5c3898813a776cf2]*/ + const char *name, + PyObject *callable) +/*[clinic end generated code: output=a4ceaff957fdef9a input=301647aab0f2fb1d]*/ { - PyObject* uppercase_name = 0; - Py_ssize_t i, len; - _Py_IDENTIFIER(upper); - const char *uppercase_name_str; - int rc; - unsigned int kind; - const void *data; - if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) { - goto finally; - } - - uppercase_name = _PyObject_CallMethodIdOneArg((PyObject *)&PyUnicode_Type, - &PyId_upper, name); - if (!uppercase_name) { - goto finally; - } - - if (PyUnicode_READY(uppercase_name)) - goto finally; - len = PyUnicode_GET_LENGTH(uppercase_name); - kind = PyUnicode_KIND(uppercase_name); - data = PyUnicode_DATA(uppercase_name); - for (i=0; i= '0' && ch <= '9') - || (ch >= 'A' && ch <= 'Z') - || (ch == '_')) - { - continue; - } else { - PyErr_SetString(self->ProgrammingError, - "invalid character in collation name"); - goto finally; - } + return NULL; } - uppercase_name_str = PyUnicode_AsUTF8(uppercase_name); - if (!uppercase_name_str) - goto finally; - + int rc; int flags = SQLITE_UTF8; if (callable == Py_None) { - rc = sqlite3_create_collation_v2(self->db, uppercase_name_str, flags, + rc = sqlite3_create_collation_v2(self->db, name, flags, NULL, NULL, NULL); } else { if (!PyCallable_Check(callable)) { PyErr_SetString(PyExc_TypeError, "parameter must be callable"); - goto finally; + return NULL; } - rc = sqlite3_create_collation_v2(self->db, uppercase_name_str, flags, + rc = sqlite3_create_collation_v2(self->db, name, flags, Py_NewRef(callable), &pysqlite_collation_callback, &_destructor); @@ -1798,16 +1763,10 @@ pysqlite_connection_create_collation_impl(pysqlite_Connection *self, Py_DECREF(callable); } _pysqlite_seterror(self->db); - goto finally; - } - -finally: - Py_XDECREF(uppercase_name); - - if (PyErr_Occurred()) { return NULL; } - return Py_NewRef(Py_None); + + Py_RETURN_NONE; } /*[clinic input] -- cgit v0.12