summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2024-02-23 20:25:09 (GMT)
committerGitHub <noreply@github.com>2024-02-23 20:25:09 (GMT)
commitc688c0f130906ff7725a126fff143d1389884f89 (patch)
treefaf7196a6a37744f941716379e8708f1bd190854
parent462a2fc09d9e5f7cdd3a8f2faed73e5bc2c93349 (diff)
downloadcpython-c688c0f130906ff7725a126fff143d1389884f89.zip
cpython-c688c0f130906ff7725a126fff143d1389884f89.tar.gz
cpython-c688c0f130906ff7725a126fff143d1389884f89.tar.bz2
gh-67044: Always quote or escape \r and \n in csv.writer() (GH-115741)
-rw-r--r--Lib/test/test_csv.py54
-rw-r--r--Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst2
-rw-r--r--Modules/_csv.c2
3 files changed, 43 insertions, 15 deletions
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
index 5217f2a..d74ab7e 100644
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -265,9 +265,11 @@ class Test_Csv(unittest.TestCase):
writer = csv.writer(sio, lineterminator=lineterminator)
writer.writerow(['a', 'b'])
writer.writerow([1, 2])
+ writer.writerow(['\r', '\n'])
self.assertEqual(sio.getvalue(),
f'a,b{lineterminator}'
- f'1,2{lineterminator}')
+ f'1,2{lineterminator}'
+ f'"\r","\n"{lineterminator}')
def test_write_iterable(self):
self._write_test(iter(['a', 1, 'p,q']), 'a,1,"p,q"')
@@ -507,22 +509,44 @@ class Test_Csv(unittest.TestCase):
self.assertEqual(r.line_num, 3)
def test_roundtrip_quoteed_newlines(self):
- with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
- writer = csv.writer(fileobj)
- rows = [['a\nb','b'],['c','x\r\nd']]
- writer.writerows(rows)
- fileobj.seek(0)
- for i, row in enumerate(csv.reader(fileobj)):
- self.assertEqual(row, rows[i])
+ rows = [
+ ['\na', 'b\nc', 'd\n'],
+ ['\re', 'f\rg', 'h\r'],
+ ['\r\ni', 'j\r\nk', 'l\r\n'],
+ ['\n\rm', 'n\n\ro', 'p\n\r'],
+ ['\r\rq', 'r\r\rs', 't\r\r'],
+ ['\n\nu', 'v\n\nw', 'x\n\n'],
+ ]
+ for lineterminator in '\r\n', '\n', '\r':
+ with self.subTest(lineterminator=lineterminator):
+ with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
+ writer = csv.writer(fileobj, lineterminator=lineterminator)
+ writer.writerows(rows)
+ fileobj.seek(0)
+ for i, row in enumerate(csv.reader(fileobj)):
+ self.assertEqual(row, rows[i])
def test_roundtrip_escaped_unquoted_newlines(self):
- with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
- writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")
- rows = [['a\nb','b'],['c','x\r\nd']]
- writer.writerows(rows)
- fileobj.seek(0)
- for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")):
- self.assertEqual(row,rows[i])
+ rows = [
+ ['\na', 'b\nc', 'd\n'],
+ ['\re', 'f\rg', 'h\r'],
+ ['\r\ni', 'j\r\nk', 'l\r\n'],
+ ['\n\rm', 'n\n\ro', 'p\n\r'],
+ ['\r\rq', 'r\r\rs', 't\r\r'],
+ ['\n\nu', 'v\n\nw', 'x\n\n'],
+ ]
+ for lineterminator in '\r\n', '\n', '\r':
+ with self.subTest(lineterminator=lineterminator):
+ with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
+ writer = csv.writer(fileobj, lineterminator=lineterminator,
+ quoting=csv.QUOTE_NONE, escapechar="\\")
+ writer.writerows(rows)
+ fileobj.seek(0)
+ for i, row in enumerate(csv.reader(fileobj,
+ quoting=csv.QUOTE_NONE,
+ escapechar="\\")):
+ self.assertEqual(row, rows[i])
+
class TestDialectRegistry(unittest.TestCase):
def test_registry_badargs(self):
diff --git a/Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst b/Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst
new file mode 100644
index 0000000..095e69b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-02-20-22-02-34.gh-issue-67044.QF9_Ru.rst
@@ -0,0 +1,2 @@
+:func:`csv.writer` now always quotes or escapes ``'\r'`` and ``'\n'``,
+regardless of *lineterminator* value.
diff --git a/Modules/_csv.c b/Modules/_csv.c
index 8d04728..660c545 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -1152,6 +1152,8 @@ join_append_data(WriterObj *self, int field_kind, const void *field_data,
if (c == dialect->delimiter ||
c == dialect->escapechar ||
c == dialect->quotechar ||
+ c == '\n' ||
+ c == '\r' ||
PyUnicode_FindChar(
dialect->lineterminator, c, 0,
PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {