summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2024-02-20 17:53:29 (GMT)
committerGitHub <noreply@github.com>2024-02-20 17:53:29 (GMT)
commit5ea86f496a4cfb34abbe2b7bb6fa7f25eeeb6294 (patch)
treeab1442b1fc6794a6454d65f514b3cd8b7cc3335a
parent20907ca844f6c32a2ecf66f9ea3ab4b8bba93fa5 (diff)
downloadcpython-5ea86f496a4cfb34abbe2b7bb6fa7f25eeeb6294.zip
cpython-5ea86f496a4cfb34abbe2b7bb6fa7f25eeeb6294.tar.gz
cpython-5ea86f496a4cfb34abbe2b7bb6fa7f25eeeb6294.tar.bz2
[3.12] gh-115712: Support CSV dialects with delimiter=' ' and skipinitialspace=True (GH-115721) (GH-115729)
csv.writer() now quotes empty fields if delimiter is a space and skipinitialspace is true and raises exception if quoting is not possible. (cherry picked from commit 937d2821501de7adaa5ed8491eef4b7f3dc0940a)
-rw-r--r--Lib/test/test_csv.py73
-rw-r--r--Misc/NEWS.d/next/Library/2024-02-20-16-42-54.gh-issue-115712.EXVMXw.rst3
-rw-r--r--Modules/_csv.c24
3 files changed, 92 insertions, 8 deletions
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
index 3038369..66c1375 100644
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -46,6 +46,20 @@ class Test_Csv(unittest.TestCase):
quoting=csv.QUOTE_ALL, quotechar=None)
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_NONE, quotechar='')
+ ctor(arg, delimiter=' ')
+ ctor(arg, escapechar=' ')
+ ctor(arg, quotechar=' ')
+ ctor(arg, delimiter='\t', skipinitialspace=True)
+ ctor(arg, escapechar='\t', skipinitialspace=True)
+ ctor(arg, quotechar='\t', skipinitialspace=True)
+ ctor(arg, delimiter=' ', skipinitialspace=True)
+ ctor(arg, delimiter='^')
+ ctor(arg, escapechar='^')
+ ctor(arg, quotechar='^')
+ ctor(arg, delimiter='\x85')
+ ctor(arg, escapechar='\x85')
+ ctor(arg, quotechar='\x85')
+ ctor(arg, lineterminator='\x85')
def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
@@ -152,9 +166,6 @@ class Test_Csv(unittest.TestCase):
def test_write_arg_valid(self):
self._write_error_test(csv.Error, None)
- self._write_test((), '')
- self._write_test([None], '""')
- self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
self._write_error_test(OSError, BadIterable())
class BadList:
@@ -168,7 +179,6 @@ class Test_Csv(unittest.TestCase):
def __str__(self):
raise OSError
self._write_error_test(OSError, [BadItem()])
-
def test_write_bigfield(self):
# This exercises the buffer realloc functionality
bigstring = 'X' * 50000
@@ -275,6 +285,49 @@ class Test_Csv(unittest.TestCase):
fileobj.seek(0)
self.assertEqual(fileobj.read(), 'a\r\n""\r\n')
+
+ def test_write_empty_fields(self):
+ self._write_test((), '')
+ self._write_test([''], '""')
+ self._write_error_test(csv.Error, [''], quoting=csv.QUOTE_NONE)
+ self._write_test([''], '""', quoting=csv.QUOTE_STRINGS)
+ self._write_test([''], '""', quoting=csv.QUOTE_NOTNULL)
+ self._write_test([None], '""')
+ self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NONE)
+ self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_STRINGS)
+ self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NOTNULL)
+ self._write_test(['', ''], ',')
+ self._write_test([None, None], ',')
+
+ def test_write_empty_fields_space_delimiter(self):
+ self._write_test([''], '""', delimiter=' ', skipinitialspace=False)
+ self._write_test([''], '""', delimiter=' ', skipinitialspace=True)
+ self._write_test([None], '""', delimiter=' ', skipinitialspace=False)
+ self._write_test([None], '""', delimiter=' ', skipinitialspace=True)
+
+ self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False)
+ self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True)
+ self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False)
+ self._write_test([None, None], '"" ""', delimiter=' ', skipinitialspace=True)
+
+ self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False,
+ quoting=csv.QUOTE_NONE)
+ self._write_error_test(csv.Error, ['', ''],
+ delimiter=' ', skipinitialspace=True,
+ quoting=csv.QUOTE_NONE)
+ for quoting in csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL:
+ self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=False,
+ quoting=quoting)
+ self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True,
+ quoting=quoting)
+
+ for quoting in csv.QUOTE_NONE, csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL:
+ self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False,
+ quoting=quoting)
+ self._write_error_test(csv.Error, [None, None],
+ delimiter=' ', skipinitialspace=True,
+ quoting=quoting)
+
def test_writerows_errors(self):
with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
writer = csv.writer(fileobj)
@@ -376,6 +429,14 @@ class Test_Csv(unittest.TestCase):
[['no space', 'space', 'spaces', '\ttab']],
skipinitialspace=True)
+ def test_read_space_delimiter(self):
+ self._read_test(['a b', ' a ', ' ', ''],
+ [['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []],
+ delimiter=' ', skipinitialspace=False)
+ self._read_test(['a b', ' a ', ' ', ''],
+ [['a', 'b'], ['a', ''], [''], []],
+ delimiter=' ', skipinitialspace=True)
+
def test_read_bigfield(self):
# This exercises the buffer realloc functionality and field size
# limits.
@@ -502,10 +563,10 @@ class TestDialectRegistry(unittest.TestCase):
escapechar = "\\"
with TemporaryFile("w+", encoding="utf-8") as fileobj:
- fileobj.write("abc def\nc1ccccc1 benzene\n")
+ fileobj.write("abc def\nc1ccccc1 benzene\n")
fileobj.seek(0)
reader = csv.reader(fileobj, dialect=space())
- self.assertEqual(next(reader), ["abc", "def"])
+ self.assertEqual(next(reader), ["abc", "", "", "def"])
self.assertEqual(next(reader), ["c1ccccc1", "benzene"])
def compare_dialect_123(self, expected, *writeargs, **kwwriteargs):
diff --git a/Misc/NEWS.d/next/Library/2024-02-20-16-42-54.gh-issue-115712.EXVMXw.rst b/Misc/NEWS.d/next/Library/2024-02-20-16-42-54.gh-issue-115712.EXVMXw.rst
new file mode 100644
index 0000000..70243dc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-02-20-16-42-54.gh-issue-115712.EXVMXw.rst
@@ -0,0 +1,3 @@
+:func:`csv.writer()` now quotes empty fields if delimiter is a
+space and skipinitialspace is true and raises exception if quoting is not
+possible.
diff --git a/Modules/_csv.c b/Modules/_csv.c
index 91cb636..23437a3 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -1180,6 +1180,7 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
static int
join_append(WriterObj *self, PyObject *field, int quoted)
{
+ DialectObj *dialect = self->dialect;
int field_kind = -1;
const void *field_data = NULL;
Py_ssize_t field_len = 0;
@@ -1192,6 +1193,19 @@ join_append(WriterObj *self, PyObject *field, int quoted)
field_data = PyUnicode_DATA(field);
field_len = PyUnicode_GET_LENGTH(field);
}
+ if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) {
+ if (dialect->quoting == QUOTE_NONE ||
+ (field == NULL &&
+ (dialect->quoting == QUOTE_STRINGS ||
+ dialect->quoting == QUOTE_NOTNULL)))
+ {
+ PyErr_Format(self->error_obj,
+ "empty field must be quoted if delimiter is a space "
+ "and skipinitialspace is true");
+ return 0;
+ }
+ quoted = 1;
+ }
rec_len = join_append_data(self, field_kind, field_data, field_len,
&quoted, 0);
if (rec_len < 0)
@@ -1243,6 +1257,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
{
DialectObj *dialect = self->dialect;
PyObject *iter, *field, *line, *result;
+ bool null_field = false;
iter = PyObject_GetIter(seq);
if (iter == NULL) {
@@ -1279,11 +1294,12 @@ csv_writerow(WriterObj *self, PyObject *seq)
break;
}
+ null_field = (field == Py_None);
if (PyUnicode_Check(field)) {
append_ok = join_append(self, field, quoted);
Py_DECREF(field);
}
- else if (field == Py_None) {
+ else if (null_field) {
append_ok = join_append(self, NULL, quoted);
Py_DECREF(field);
}
@@ -1309,7 +1325,11 @@ csv_writerow(WriterObj *self, PyObject *seq)
return NULL;
if (self->num_fields > 0 && self->rec_len == 0) {
- if (dialect->quoting == QUOTE_NONE) {
+ if (dialect->quoting == QUOTE_NONE ||
+ (null_field &&
+ (dialect->quoting == QUOTE_STRINGS ||
+ dialect->quoting == QUOTE_NOTNULL)))
+ {
PyErr_Format(self->error_obj,
"single empty field record must be quoted");
return NULL;