From ab62051152cb24470056ffaeb9107c8b4311375e Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Mon, 11 Oct 2021 20:08:15 +0900 Subject: bpo-20028: Empty escapechar/quotechar is not allowed for csv.Dialect (GH-28833) --- Doc/library/csv.rst | 4 ++++ Lib/test/test_csv.py | 13 ++++++++++++- .../next/Library/2021-10-10-00-25-36.bpo-20028.bPx4Z8.rst | 2 ++ Modules/_csv.c | 13 ++++--------- 4 files changed, 22 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-10-10-00-25-36.bpo-20028.bPx4Z8.rst diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 899ce02..3a7817c 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -383,6 +383,8 @@ Dialects support the following attributes: :const:`False`. On reading, the *escapechar* removes any special meaning from the following character. It defaults to :const:`None`, which disables escaping. + .. versionchanged:: 3.11 + An empty *escapechar* is not allowed. .. attribute:: Dialect.lineterminator @@ -402,6 +404,8 @@ Dialects support the following attributes: as the *delimiter* or *quotechar*, or which contain new-line characters. It defaults to ``'"'``. + .. versionchanged:: 3.11 + An empty *quotechar* is not allowed. .. attribute:: Dialect.quoting diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index fb27ea3..95a19dd 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -44,6 +44,8 @@ class Test_Csv(unittest.TestCase): quoting=csv.QUOTE_ALL, quotechar='') self.assertRaises(TypeError, ctor, arg, quoting=csv.QUOTE_ALL, quotechar=None) + self.assertRaises(TypeError, ctor, arg, + quoting=csv.QUOTE_NONE, quotechar='') def test_reader_arg_valid(self): self._test_arg_valid(csv.reader, []) @@ -342,7 +344,6 @@ class Test_Csv(unittest.TestCase): self._read_test(['a,^b,c'], [['a', 'b', 'c']], escapechar='^') self._read_test(['a,\0b,c'], [['a', 'b', 'c']], escapechar='\0') self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar=None) - self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar='') self._read_test(['a,\\b,c'], [['a', '\\b', 'c']]) def test_read_quoting(self): @@ -913,6 +914,12 @@ class TestDialectValidity(unittest.TestCase): self.assertEqual(d.quotechar, '"') self.assertTrue(d.doublequote) + mydialect.quotechar = "" + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + '"quotechar" must be a 1-character string') + mydialect.quotechar = "''" with self.assertRaises(csv.Error) as cm: mydialect() @@ -977,6 +984,10 @@ class TestDialectValidity(unittest.TestCase): d = mydialect() self.assertEqual(d.escapechar, "\\") + mydialect.escapechar = "" + with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): + mydialect() + mydialect.escapechar = "**" with self.assertRaisesRegex(csv.Error, '"escapechar" must be a 1-character string'): mydialect() diff --git a/Misc/NEWS.d/next/Library/2021-10-10-00-25-36.bpo-20028.bPx4Z8.rst b/Misc/NEWS.d/next/Library/2021-10-10-00-25-36.bpo-20028.bPx4Z8.rst new file mode 100644 index 0000000..9db15bc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-10-10-00-25-36.bpo-20028.bPx4Z8.rst @@ -0,0 +1,2 @@ +Empty escapechar/quotechar is not allowed when initializing +:class:`csv.Dialect`. Patch by Vajrasky Kok and Dong-hee Na. diff --git a/Modules/_csv.c b/Modules/_csv.c index 469c1a1..1c2f504 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -250,16 +250,14 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt if (len < 0) { return -1; } - if (len > 1) { + if (len != 1) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a 1-character string", name); return -1; } /* PyUnicode_READY() is called in PyUnicode_GetLength() */ - else if (len > 0) { - *target = PyUnicode_READ_CHAR(src, 0); - } + *target = PyUnicode_READ_CHAR(src, 0); } } return 0; @@ -272,7 +270,6 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) *target = dflt; } else { - *target = NOT_SET; if (!PyUnicode_Check(src)) { PyErr_Format(PyExc_TypeError, "\"%s\" must be string, not %.200s", name, @@ -283,16 +280,14 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt) if (len < 0) { return -1; } - if (len > 1) { + if (len != 1) { PyErr_Format(PyExc_TypeError, "\"%s\" must be a 1-character string", name); return -1; } /* PyUnicode_READY() is called in PyUnicode_GetLength() */ - else if (len > 0) { - *target = PyUnicode_READ_CHAR(src, 0); - } + *target = PyUnicode_READ_CHAR(src, 0); } return 0; } -- cgit v0.12