diff options
author | Corvin <corvin@corvin.dev> | 2023-08-30 09:06:21 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-30 09:06:21 (GMT) |
commit | 400a1cebc743515e40157ed7af86e48d654290ce (patch) | |
tree | e4f5f2e61c3cff4743684fd60908db34880ca10a | |
parent | 210a5d7b8b2f5cdaf3740e8b9b468ed5ddf24591 (diff) | |
download | cpython-400a1cebc743515e40157ed7af86e48d654290ce.zip cpython-400a1cebc743515e40157ed7af86e48d654290ce.tar.gz cpython-400a1cebc743515e40157ed7af86e48d654290ce.tar.bz2 |
gh-108590: Fix sqlite3.iterdump for invalid Unicode in TEXT columns (#108657)
Co-authored-by: Erlend E. Aasland <erlend@python.org>
-rw-r--r-- | Lib/sqlite3/dump.py | 27 | ||||
-rw-r--r-- | Lib/test/test_sqlite3/test_dump.py | 15 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst | 1 |
3 files changed, 41 insertions, 2 deletions
diff --git a/Lib/sqlite3/dump.py b/Lib/sqlite3/dump.py index ead3360..481d605 100644 --- a/Lib/sqlite3/dump.py +++ b/Lib/sqlite3/dump.py @@ -7,6 +7,10 @@ # future enhancements, you should normally quote any identifier that # is an English language word, even if you do not have to." + +from contextlib import contextmanager + + def _quote_name(name): return '"{0}"'.format(name.replace('"', '""')) @@ -15,6 +19,24 @@ def _quote_value(value): return "'{0}'".format(value.replace("'", "''")) +def _force_decode(bs, *args, **kwargs): + # gh-108590: Don't fail if the database contains invalid Unicode data. + try: + return bs.decode(*args, **kwargs) + except UnicodeDecodeError: + return "".join([chr(c) for c in bs]) + + +@contextmanager +def _text_factory(con, factory): + saved_factory = con.text_factory + con.text_factory = factory + try: + yield + finally: + con.text_factory = saved_factory + + def _iterdump(connection): """ Returns an iterator to the dump of the database in an SQL text format. @@ -74,8 +96,9 @@ def _iterdump(connection): ) ) query_res = cu.execute(q) - for row in query_res: - yield("{0};".format(row[0])) + with _text_factory(connection, bytes): + for row in query_res: + yield("{0};".format(_force_decode(row[0]))) # Now when the type is 'index', 'trigger', or 'view' q = """ diff --git a/Lib/test/test_sqlite3/test_dump.py b/Lib/test/test_sqlite3/test_dump.py index 3107e1b..0279ce6 100644 --- a/Lib/test/test_sqlite3/test_dump.py +++ b/Lib/test/test_sqlite3/test_dump.py @@ -133,6 +133,21 @@ class DumpTests(MemoryDatabaseMixin, unittest.TestCase): actual = list(self.cx.iterdump()) self.assertEqual(expected, actual) + def test_dump_unicode_invalid(self): + # gh-108590 + expected = [ + "BEGIN TRANSACTION;", + "CREATE TABLE foo (data TEXT);", + "INSERT INTO \"foo\" VALUES('a\x9f');", + "COMMIT;", + ] + self.cu.executescript(""" + CREATE TABLE foo (data TEXT); + INSERT INTO foo VALUES (CAST(X'619f' AS TEXT)); + """) + actual = list(self.cx.iterdump()) + self.assertEqual(expected, actual) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst new file mode 100644 index 0000000..50b41f2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst @@ -0,0 +1 @@ +Fixed an issue where :meth:`sqlite3.Connection.iterdump` would fail and leave an incomplete SQL dump if a table includes invalid Unicode sequences. Patch by Corvin McPherson |