diff options
author | Corvin <corvin@corvin.dev> | 2023-08-30 09:06:21 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-30 09:06:21 (GMT) |
commit | 400a1cebc743515e40157ed7af86e48d654290ce (patch) | |
tree | e4f5f2e61c3cff4743684fd60908db34880ca10a /Lib/sqlite3 | |
parent | 210a5d7b8b2f5cdaf3740e8b9b468ed5ddf24591 (diff) | |
download | cpython-400a1cebc743515e40157ed7af86e48d654290ce.zip cpython-400a1cebc743515e40157ed7af86e48d654290ce.tar.gz cpython-400a1cebc743515e40157ed7af86e48d654290ce.tar.bz2 |
gh-108590: Fix sqlite3.iterdump for invalid Unicode in TEXT columns (#108657)
Co-authored-by: Erlend E. Aasland <erlend@python.org>
Diffstat (limited to 'Lib/sqlite3')
-rw-r--r-- | Lib/sqlite3/dump.py | 27 |
1 files changed, 25 insertions, 2 deletions
diff --git a/Lib/sqlite3/dump.py b/Lib/sqlite3/dump.py index ead3360..481d605 100644 --- a/Lib/sqlite3/dump.py +++ b/Lib/sqlite3/dump.py @@ -7,6 +7,10 @@ # future enhancements, you should normally quote any identifier that # is an English language word, even if you do not have to." + +from contextlib import contextmanager + + def _quote_name(name): return '"{0}"'.format(name.replace('"', '""')) @@ -15,6 +19,24 @@ def _quote_value(value): return "'{0}'".format(value.replace("'", "''")) +def _force_decode(bs, *args, **kwargs): + # gh-108590: Don't fail if the database contains invalid Unicode data. + try: + return bs.decode(*args, **kwargs) + except UnicodeDecodeError: + return "".join([chr(c) for c in bs]) + + +@contextmanager +def _text_factory(con, factory): + saved_factory = con.text_factory + con.text_factory = factory + try: + yield + finally: + con.text_factory = saved_factory + + def _iterdump(connection): """ Returns an iterator to the dump of the database in an SQL text format. @@ -74,8 +96,9 @@ def _iterdump(connection): ) ) query_res = cu.execute(q) - for row in query_res: - yield("{0};".format(row[0])) + with _text_factory(connection, bytes): + for row in query_res: + yield("{0};".format(_force_decode(row[0]))) # Now when the type is 'index', 'trigger', or 'view' q = """ |