summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCorvin <corvin@corvin.dev>2023-08-30 09:06:21 (GMT)
committerGitHub <noreply@github.com>2023-08-30 09:06:21 (GMT)
commit400a1cebc743515e40157ed7af86e48d654290ce (patch)
treee4f5f2e61c3cff4743684fd60908db34880ca10a
parent210a5d7b8b2f5cdaf3740e8b9b468ed5ddf24591 (diff)
downloadcpython-400a1cebc743515e40157ed7af86e48d654290ce.zip
cpython-400a1cebc743515e40157ed7af86e48d654290ce.tar.gz
cpython-400a1cebc743515e40157ed7af86e48d654290ce.tar.bz2
gh-108590: Fix sqlite3.iterdump for invalid Unicode in TEXT columns (#108657)
Co-authored-by: Erlend E. Aasland <erlend@python.org>
-rw-r--r--Lib/sqlite3/dump.py27
-rw-r--r--Lib/test/test_sqlite3/test_dump.py15
-rw-r--r--Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst1
3 files changed, 41 insertions, 2 deletions
diff --git a/Lib/sqlite3/dump.py b/Lib/sqlite3/dump.py
index ead3360..481d605 100644
--- a/Lib/sqlite3/dump.py
+++ b/Lib/sqlite3/dump.py
@@ -7,6 +7,10 @@
# future enhancements, you should normally quote any identifier that
# is an English language word, even if you do not have to."
+
+from contextlib import contextmanager
+
+
def _quote_name(name):
return '"{0}"'.format(name.replace('"', '""'))
@@ -15,6 +19,24 @@ def _quote_value(value):
return "'{0}'".format(value.replace("'", "''"))
+def _force_decode(bs, *args, **kwargs):
+ # gh-108590: Don't fail if the database contains invalid Unicode data.
+ try:
+ return bs.decode(*args, **kwargs)
+ except UnicodeDecodeError:
+ return "".join([chr(c) for c in bs])
+
+
+@contextmanager
+def _text_factory(con, factory):
+ saved_factory = con.text_factory
+ con.text_factory = factory
+ try:
+ yield
+ finally:
+ con.text_factory = saved_factory
+
+
def _iterdump(connection):
"""
Returns an iterator to the dump of the database in an SQL text format.
@@ -74,8 +96,9 @@ def _iterdump(connection):
)
)
query_res = cu.execute(q)
- for row in query_res:
- yield("{0};".format(row[0]))
+ with _text_factory(connection, bytes):
+ for row in query_res:
+ yield("{0};".format(_force_decode(row[0])))
# Now when the type is 'index', 'trigger', or 'view'
q = """
diff --git a/Lib/test/test_sqlite3/test_dump.py b/Lib/test/test_sqlite3/test_dump.py
index 3107e1b..0279ce6 100644
--- a/Lib/test/test_sqlite3/test_dump.py
+++ b/Lib/test/test_sqlite3/test_dump.py
@@ -133,6 +133,21 @@ class DumpTests(MemoryDatabaseMixin, unittest.TestCase):
actual = list(self.cx.iterdump())
self.assertEqual(expected, actual)
+ def test_dump_unicode_invalid(self):
+ # gh-108590
+ expected = [
+ "BEGIN TRANSACTION;",
+ "CREATE TABLE foo (data TEXT);",
+ "INSERT INTO \"foo\" VALUES('a\x9f');",
+ "COMMIT;",
+ ]
+ self.cu.executescript("""
+ CREATE TABLE foo (data TEXT);
+ INSERT INTO foo VALUES (CAST(X'619f' AS TEXT));
+ """)
+ actual = list(self.cx.iterdump())
+ self.assertEqual(expected, actual)
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst
new file mode 100644
index 0000000..50b41f2
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst
@@ -0,0 +1 @@
+Fixed an issue where :meth:`sqlite3.Connection.iterdump` would fail and leave an incomplete SQL dump if a table includes invalid Unicode sequences. Patch by Corvin McPherson