From c688c0f130906ff7725a126fff143d1389884f89 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 23 Feb 2024 22:25:09 +0200 Subject: gh-67044: Always quote or escape \r and \n in csv.writer() (GH-115741) --- Lib/test/test_csv.py | 54 ++++++++++++++++------ .../2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst | 2 + Modules/_csv.c | 2 + 3 files changed, 43 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 5217f2a..d74ab7e 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -265,9 +265,11 @@ class Test_Csv(unittest.TestCase): writer = csv.writer(sio, lineterminator=lineterminator) writer.writerow(['a', 'b']) writer.writerow([1, 2]) + writer.writerow(['\r', '\n']) self.assertEqual(sio.getvalue(), f'a,b{lineterminator}' - f'1,2{lineterminator}') + f'1,2{lineterminator}' + f'"\r","\n"{lineterminator}') def test_write_iterable(self): self._write_test(iter(['a', 1, 'p,q']), 'a,1,"p,q"') @@ -507,22 +509,44 @@ class Test_Csv(unittest.TestCase): self.assertEqual(r.line_num, 3) def test_roundtrip_quoteed_newlines(self): - with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: - writer = csv.writer(fileobj) - rows = [['a\nb','b'],['c','x\r\nd']] - writer.writerows(rows) - fileobj.seek(0) - for i, row in enumerate(csv.reader(fileobj)): - self.assertEqual(row, rows[i]) + rows = [ + ['\na', 'b\nc', 'd\n'], + ['\re', 'f\rg', 'h\r'], + ['\r\ni', 'j\r\nk', 'l\r\n'], + ['\n\rm', 'n\n\ro', 'p\n\r'], + ['\r\rq', 'r\r\rs', 't\r\r'], + ['\n\nu', 'v\n\nw', 'x\n\n'], + ] + for lineterminator in '\r\n', '\n', '\r': + with self.subTest(lineterminator=lineterminator): + with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: + writer = csv.writer(fileobj, lineterminator=lineterminator) + writer.writerows(rows) + fileobj.seek(0) + for i, row in enumerate(csv.reader(fileobj)): + self.assertEqual(row, rows[i]) def test_roundtrip_escaped_unquoted_newlines(self): - with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: - writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\") - rows = [['a\nb','b'],['c','x\r\nd']] - writer.writerows(rows) - fileobj.seek(0) - for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")): - self.assertEqual(row,rows[i]) + rows = [ + ['\na', 'b\nc', 'd\n'], + ['\re', 'f\rg', 'h\r'], + ['\r\ni', 'j\r\nk', 'l\r\n'], + ['\n\rm', 'n\n\ro', 'p\n\r'], + ['\r\rq', 'r\r\rs', 't\r\r'], + ['\n\nu', 'v\n\nw', 'x\n\n'], + ] + for lineterminator in '\r\n', '\n', '\r': + with self.subTest(lineterminator=lineterminator): + with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: + writer = csv.writer(fileobj, lineterminator=lineterminator, + quoting=csv.QUOTE_NONE, escapechar="\\") + writer.writerows(rows) + fileobj.seek(0) + for i, row in enumerate(csv.reader(fileobj, + quoting=csv.QUOTE_NONE, + escapechar="\\")): + self.assertEqual(row, rows[i]) + class TestDialectRegistry(unittest.TestCase): def test_registry_badargs(self): diff --git a/Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst b/Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst new file mode 100644 index 0000000..095e69b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst @@ -0,0 +1,2 @@ +:func:`csv.writer` now always quotes or escapes ``'\r'`` and ``'\n'``, +regardless of *lineterminator* value. diff --git a/Modules/_csv.c b/Modules/_csv.c index 8d04728..660c545 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1152,6 +1152,8 @@ join_append_data(WriterObj *self, int field_kind, const void *field_data, if (c == dialect->delimiter || c == dialect->escapechar || c == dialect->quotechar || + c == '\n' || + c == '\r' || PyUnicode_FindChar( dialect->lineterminator, c, 0, PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { -- cgit v0.12