From a7551247e7cb7010fb4735281f1afa4abeb8a9cc Mon Sep 17 00:00:00 2001 From: Erlend Egeberg Aasland Date: Tue, 5 Apr 2022 16:15:25 +0200 Subject: bpo-41930: Add support for SQLite serialise/deserialise API (GH-26728) Co-authored-by: Jelle Zijlstra Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> --- Doc/library/sqlite3.rst | 38 +++++ Doc/whatsnew/3.11.rst | 5 + Lib/test/test_sqlite3/test_dbapi.py | 55 +++++++ .../2021-06-17-00-02-58.bpo-41930.JS6fsd.rst | 3 + Modules/_sqlite/clinic/connection.c.h | 160 ++++++++++++++++++++- Modules/_sqlite/connection.c | 121 ++++++++++++++++ PCbuild/_sqlite3.vcxproj | 1 + configure | 44 ++++++ configure.ac | 6 + pyconfig.h.in | 3 + 10 files changed, 435 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2021-06-17-00-02-58.bpo-41930.JS6fsd.rst diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index e70d038..852b684 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -748,6 +748,44 @@ Connection Objects .. versionadded:: 3.11 + .. method:: serialize(*, name="main") + + This method serializes a database into a :class:`bytes` object. For an + ordinary on-disk database file, the serialization is just a copy of the + disk file. For an in-memory database or a "temp" database, the + serialization is the same sequence of bytes which would be written to + disk if that database were backed up to disk. + + *name* is the database to be serialized, and defaults to the main + database. + + .. note:: + + This method is only available if the underlying SQLite library has the + serialize API. + + .. versionadded:: 3.11 + + + .. method:: deserialize(data, /, *, name="main") + + This method causes the database connection to disconnect from database + *name*, and reopen *name* as an in-memory database based on the + serialization contained in *data*. Deserialization will raise + :exc:`OperationalError` if the database connection is currently involved + in a read transaction or a backup operation. :exc:`DataError` will be + raised if ``len(data)`` is larger than ``2**63 - 1``, and + :exc:`DatabaseError` will be raised if *data* does not contain a valid + SQLite database. + + .. note:: + + This method is only available if the underlying SQLite library has the + deserialize API. + + .. versionadded:: 3.11 + + .. _sqlite3-cursor-objects: Cursor Objects diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index d0c10a9..c312645 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -366,6 +366,11 @@ sqlite3 Instead we leave it to the SQLite library to handle these cases. (Contributed by Erlend E. Aasland in :issue:`44092`.) +* Add :meth:`~sqlite3.Connection.serialize` and + :meth:`~sqlite3.Connection.deserialize` to :class:`sqlite3.Connection` for + serializing and deserializing databases. + (Contributed by Erlend E. Aasland in :issue:`41930`.) + sys --- diff --git a/Lib/test/test_sqlite3/test_dbapi.py b/Lib/test/test_sqlite3/test_dbapi.py index 177c2cd..0248281 100644 --- a/Lib/test/test_sqlite3/test_dbapi.py +++ b/Lib/test/test_sqlite3/test_dbapi.py @@ -29,6 +29,7 @@ import unittest from test.support import ( SHORT_TIMEOUT, + bigmemtest, check_disallow_instantiation, threading_helper, ) @@ -603,6 +604,56 @@ class UninitialisedConnectionTests(unittest.TestCase): func) +@unittest.skipUnless(hasattr(sqlite.Connection, "serialize"), + "Needs SQLite serialize API") +class SerializeTests(unittest.TestCase): + def test_serialize_deserialize(self): + with memory_database() as cx: + with cx: + cx.execute("create table t(t)") + data = cx.serialize() + self.assertEqual(len(data), 8192) + + # Remove test table, verify that it was removed. + with cx: + cx.execute("drop table t") + regex = "no such table" + with self.assertRaisesRegex(sqlite.OperationalError, regex): + cx.execute("select t from t") + + # Deserialize and verify that test table is restored. + cx.deserialize(data) + cx.execute("select t from t") + + def test_deserialize_wrong_args(self): + dataset = ( + (BufferError, memoryview(b"blob")[::2]), + (TypeError, []), + (TypeError, 1), + (TypeError, None), + ) + for exc, arg in dataset: + with self.subTest(exc=exc, arg=arg): + with memory_database() as cx: + self.assertRaises(exc, cx.deserialize, arg) + + def test_deserialize_corrupt_database(self): + with memory_database() as cx: + regex = "file is not a database" + with self.assertRaisesRegex(sqlite.DatabaseError, regex): + cx.deserialize(b"\0\1\3") + # SQLite does not generate an error until you try to query the + # deserialized database. + cx.execute("create table fail(f)") + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @bigmemtest(size=2**63, memuse=3, dry_run=False) + def test_deserialize_too_much_data_64bit(self): + with memory_database() as cx: + with self.assertRaisesRegex(OverflowError, "'data' is too large"): + cx.deserialize(b"b" * size) + + class OpenTests(unittest.TestCase): _sql = "create table test(id integer)" @@ -1030,6 +1081,10 @@ class ThreadTests(unittest.TestCase): lambda: self.con.setlimit(sqlite.SQLITE_LIMIT_LENGTH, -1), lambda: self.con.getlimit(sqlite.SQLITE_LIMIT_LENGTH), ] + if hasattr(sqlite.Connection, "serialize"): + fns.append(lambda: self.con.serialize()) + fns.append(lambda: self.con.deserialize(b"")) + for fn in fns: with self.subTest(fn=fn): self._run_test(fn) diff --git a/Misc/NEWS.d/next/Library/2021-06-17-00-02-58.bpo-41930.JS6fsd.rst b/Misc/NEWS.d/next/Library/2021-06-17-00-02-58.bpo-41930.JS6fsd.rst new file mode 100644 index 0000000..ce494e7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-06-17-00-02-58.bpo-41930.JS6fsd.rst @@ -0,0 +1,3 @@ +Add :meth:`~sqlite3.Connection.serialize` and +:meth:`~sqlite3.Connection.deserialize` support to :mod:`sqlite3`. Patch by +Erlend E. Aasland. diff --git a/Modules/_sqlite/clinic/connection.c.h b/Modules/_sqlite/clinic/connection.c.h index 111e344..99ef94e 100644 --- a/Modules/_sqlite/clinic/connection.c.h +++ b/Modules/_sqlite/clinic/connection.c.h @@ -693,6 +693,156 @@ exit: return return_value; } +#if defined(PY_SQLITE_HAVE_SERIALIZE) + +PyDoc_STRVAR(serialize__doc__, +"serialize($self, /, *, name=\'main\')\n" +"--\n" +"\n" +"Serialize a database into a byte string.\n" +"\n" +" name\n" +" Which database to serialize.\n" +"\n" +"For an ordinary on-disk database file, the serialization is just a copy of the\n" +"disk file. For an in-memory database or a \"temp\" database, the serialization is\n" +"the same sequence of bytes which would be written to disk if that database\n" +"were backed up to disk."); + +#define SERIALIZE_METHODDEF \ + {"serialize", (PyCFunction)(void(*)(void))serialize, METH_FASTCALL|METH_KEYWORDS, serialize__doc__}, + +static PyObject * +serialize_impl(pysqlite_Connection *self, const char *name); + +static PyObject * +serialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"name", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "serialize", 0}; + PyObject *argsbuf[1]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + const char *name = "main"; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("serialize", "argument 'name'", "str", args[0]); + goto exit; + } + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); + if (name == NULL) { + goto exit; + } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } +skip_optional_kwonly: + return_value = serialize_impl(self, name); + +exit: + return return_value; +} + +#endif /* defined(PY_SQLITE_HAVE_SERIALIZE) */ + +#if defined(PY_SQLITE_HAVE_SERIALIZE) + +PyDoc_STRVAR(deserialize__doc__, +"deserialize($self, data, /, *, name=\'main\')\n" +"--\n" +"\n" +"Load a serialized database.\n" +"\n" +" data\n" +" The serialized database content.\n" +" name\n" +" Which database to reopen with the deserialization.\n" +"\n" +"The deserialize interface causes the database connection to disconnect from the\n" +"target database, and then reopen it as an in-memory database based on the given\n" +"serialized data.\n" +"\n" +"The deserialize interface will fail with SQLITE_BUSY if the database is\n" +"currently in a read transaction or is involved in a backup operation."); + +#define DESERIALIZE_METHODDEF \ + {"deserialize", (PyCFunction)(void(*)(void))deserialize, METH_FASTCALL|METH_KEYWORDS, deserialize__doc__}, + +static PyObject * +deserialize_impl(pysqlite_Connection *self, Py_buffer *data, + const char *name); + +static PyObject * +deserialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"", "name", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "deserialize", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + const char *name = "main"; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyUnicode_Check(args[0])) { + Py_ssize_t len; + const char *ptr = PyUnicode_AsUTF8AndSize(args[0], &len); + if (ptr == NULL) { + goto exit; + } + PyBuffer_FillInfo(&data, args[0], (void *)ptr, len, 1, 0); + } + else { /* any bytes-like object */ + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!PyBuffer_IsContiguous(&data, 'C')) { + _PyArg_BadArgument("deserialize", "argument 1", "contiguous buffer", args[0]); + goto exit; + } + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("deserialize", "argument 'name'", "str", args[1]); + goto exit; + } + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[1], &name_length); + if (name == NULL) { + goto exit; + } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } +skip_optional_kwonly: + return_value = deserialize_impl(self, &data, name); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +#endif /* defined(PY_SQLITE_HAVE_SERIALIZE) */ + PyDoc_STRVAR(pysqlite_connection_enter__doc__, "__enter__($self, /)\n" "--\n" @@ -832,4 +982,12 @@ exit: #ifndef PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF #define PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF #endif /* !defined(PYSQLITE_CONNECTION_LOAD_EXTENSION_METHODDEF) */ -/*[clinic end generated code: output=176c9095219b17c4 input=a9049054013a1b77]*/ + +#ifndef SERIALIZE_METHODDEF + #define SERIALIZE_METHODDEF +#endif /* !defined(SERIALIZE_METHODDEF) */ + +#ifndef DESERIALIZE_METHODDEF + #define DESERIALIZE_METHODDEF +#endif /* !defined(DESERIALIZE_METHODDEF) */ +/*[clinic end generated code: output=d965a68f9229a56c input=a9049054013a1b77]*/ diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c index 37f6d0f..9d187cf 100644 --- a/Modules/_sqlite/connection.c +++ b/Modules/_sqlite/connection.c @@ -1818,6 +1818,125 @@ pysqlite_connection_create_collation_impl(pysqlite_Connection *self, Py_RETURN_NONE; } +#ifdef PY_SQLITE_HAVE_SERIALIZE +/*[clinic input] +_sqlite3.Connection.serialize as serialize + + * + name: str = "main" + Which database to serialize. + +Serialize a database into a byte string. + +For an ordinary on-disk database file, the serialization is just a copy of the +disk file. For an in-memory database or a "temp" database, the serialization is +the same sequence of bytes which would be written to disk if that database +were backed up to disk. +[clinic start generated code]*/ + +static PyObject * +serialize_impl(pysqlite_Connection *self, const char *name) +/*[clinic end generated code: output=97342b0e55239dd3 input=d2eb5194a65abe2b]*/ +{ + if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) { + return NULL; + } + + /* If SQLite has a contiguous memory representation of the database, we can + * avoid memory allocations, so we try with the no-copy flag first. + */ + sqlite3_int64 size; + unsigned int flags = SQLITE_SERIALIZE_NOCOPY; + const char *data; + + Py_BEGIN_ALLOW_THREADS + data = (const char *)sqlite3_serialize(self->db, name, &size, flags); + if (data == NULL) { + flags &= ~SQLITE_SERIALIZE_NOCOPY; + data = (const char *)sqlite3_serialize(self->db, name, &size, flags); + } + Py_END_ALLOW_THREADS + + if (data == NULL) { + PyErr_Format(self->OperationalError, "unable to serialize '%s'", + name); + return NULL; + } + PyObject *res = PyBytes_FromStringAndSize(data, size); + if (!(flags & SQLITE_SERIALIZE_NOCOPY)) { + sqlite3_free((void *)data); + } + return res; +} + +/*[clinic input] +_sqlite3.Connection.deserialize as deserialize + + data: Py_buffer(accept={buffer, str}) + The serialized database content. + / + * + name: str = "main" + Which database to reopen with the deserialization. + +Load a serialized database. + +The deserialize interface causes the database connection to disconnect from the +target database, and then reopen it as an in-memory database based on the given +serialized data. + +The deserialize interface will fail with SQLITE_BUSY if the database is +currently in a read transaction or is involved in a backup operation. +[clinic start generated code]*/ + +static PyObject * +deserialize_impl(pysqlite_Connection *self, Py_buffer *data, + const char *name) +/*[clinic end generated code: output=e394c798b98bad89 input=1be4ca1faacf28f2]*/ +{ + if (!pysqlite_check_thread(self) || !pysqlite_check_connection(self)) { + return NULL; + } + + /* Transfer ownership of the buffer to SQLite: + * - Move buffer from Py to SQLite + * - Tell SQLite to free buffer memory + * - Tell SQLite that it is permitted to grow the resulting database + * + * Make sure we don't overflow sqlite3_deserialize(); it accepts a signed + * 64-bit int as its data size argument. + * + * We can safely use sqlite3_malloc64 here, since it was introduced before + * the serialize APIs. + */ + if (data->len > 9223372036854775807) { // (1 << 63) - 1 + PyErr_SetString(PyExc_OverflowError, "'data' is too large"); + return NULL; + } + + sqlite3_int64 size = (sqlite3_int64)data->len; + unsigned char *buf = sqlite3_malloc64(size); + if (buf == NULL) { + return PyErr_NoMemory(); + } + + const unsigned int flags = SQLITE_DESERIALIZE_FREEONCLOSE | + SQLITE_DESERIALIZE_RESIZEABLE; + int rc; + Py_BEGIN_ALLOW_THREADS + (void)memcpy(buf, data->buf, data->len); + rc = sqlite3_deserialize(self->db, name, buf, size, size, flags); + Py_END_ALLOW_THREADS + + if (rc != SQLITE_OK) { + (void)_pysqlite_seterror(self->state, self->db); + return NULL; + } + Py_RETURN_NONE; +} +#endif // PY_SQLITE_HAVE_SERIALIZE + + /*[clinic input] _sqlite3.Connection.__enter__ as pysqlite_connection_enter @@ -1971,6 +2090,8 @@ static PyMethodDef connection_methods[] = { PYSQLITE_CONNECTION_SET_TRACE_CALLBACK_METHODDEF SETLIMIT_METHODDEF GETLIMIT_METHODDEF + SERIALIZE_METHODDEF + DESERIALIZE_METHODDEF {NULL, NULL} }; diff --git a/PCbuild/_sqlite3.vcxproj b/PCbuild/_sqlite3.vcxproj index e268c47..9cff43f 100644 --- a/PCbuild/_sqlite3.vcxproj +++ b/PCbuild/_sqlite3.vcxproj @@ -94,6 +94,7 @@ $(sqlite3Dir);%(AdditionalIncludeDirectories) + PY_SQLITE_HAVE_SERIALIZE;%(PreprocessorDefinitions) diff --git a/configure b/configure index 44912b9..69b1230 100755 --- a/configure +++ b/configure @@ -12904,6 +12904,50 @@ else fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sqlite3_serialize in -lsqlite3" >&5 +$as_echo_n "checking for sqlite3_serialize in -lsqlite3... " >&6; } +if ${ac_cv_lib_sqlite3_sqlite3_serialize+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsqlite3 $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char sqlite3_serialize (); +int +main () +{ +return sqlite3_serialize (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_sqlite3_sqlite3_serialize=yes +else + ac_cv_lib_sqlite3_sqlite3_serialize=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_sqlite3_sqlite3_serialize" >&5 +$as_echo "$ac_cv_lib_sqlite3_sqlite3_serialize" >&6; } +if test "x$ac_cv_lib_sqlite3_sqlite3_serialize" = xyes; then : + + +$as_echo "#define PY_SQLITE_HAVE_SERIALIZE 1" >>confdefs.h + + +fi + else diff --git a/configure.ac b/configure.ac index c02adf7..5860595 100644 --- a/configure.ac +++ b/configure.ac @@ -3605,6 +3605,12 @@ dnl hence CPPFLAGS instead of CFLAGS. [have_sqlite3_load_extension=yes], [have_sqlite3_load_extension=no] ) + AC_CHECK_LIB([sqlite3], [sqlite3_serialize], [ + AC_DEFINE( + [PY_SQLITE_HAVE_SERIALIZE], [1], + [Define if SQLite was compiled with the serialize API] + ) + ]) ], [ have_supported_sqlite3=no ]) diff --git a/pyconfig.h.in b/pyconfig.h.in index be776f7..4ac054a 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1506,6 +1506,9 @@ /* Define to 1 to build the sqlite module with loadable extensions support. */ #undef PY_SQLITE_ENABLE_LOAD_EXTENSION +/* Define if SQLite was compiled with the serialize API */ +#undef PY_SQLITE_HAVE_SERIALIZE + /* Default cipher suites list for ssl module. 1: Python's preferred selection, 2: leave OpenSSL defaults untouched, 0: custom string */ #undef PY_SSL_DEFAULT_CIPHERS -- cgit v0.12