summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/whatsnew/3.12.rst2
-rw-r--r--Lib/test/test_csv.py25
-rw-r--r--Misc/NEWS.d/next/Library/2024-01-05-16-27-34.gh-issue-113732.fgDRXA.rst2
-rw-r--r--Modules/_csv.c46
4 files changed, 57 insertions, 18 deletions
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index 77b12f9..100312a 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -690,7 +690,7 @@ csv
* Add :const:`csv.QUOTE_NOTNULL` and :const:`csv.QUOTE_STRINGS` flags to
provide finer grained control of ``None`` and empty strings by
- :class:`csv.writer` objects.
+ :class:`~csv.reader` and :class:`~csv.writer` objects.
dis
---
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
index 69fef59..21a4cb586 100644
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -392,10 +392,26 @@ class Test_Csv(unittest.TestCase):
# will this fail where locale uses comma for decimals?
self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]],
quoting=csv.QUOTE_NONNUMERIC)
+ self._read_test([',3,"5",7.3, 9'], [[None, '3', '5', '7.3', ' 9']],
+ quoting=csv.QUOTE_NOTNULL)
+ self._read_test([',3,"5",7.3, 9'], [[None, 3, '5', 7.3, 9]],
+ quoting=csv.QUOTE_STRINGS)
+
+ self._read_test([',,"",'], [['', '', '', '']])
+ self._read_test([',,"",'], [['', '', '', '']],
+ quoting=csv.QUOTE_NONNUMERIC)
+ self._read_test([',,"",'], [[None, None, '', None]],
+ quoting=csv.QUOTE_NOTNULL)
+ self._read_test([',,"",'], [[None, None, '', None]],
+ quoting=csv.QUOTE_STRINGS)
+
self._read_test(['"a\nb", 7'], [['a\nb', ' 7']])
self.assertRaises(ValueError, self._read_test,
['abc,3'], [[]],
quoting=csv.QUOTE_NONNUMERIC)
+ self.assertRaises(ValueError, self._read_test,
+ ['abc,3'], [[]],
+ quoting=csv.QUOTE_STRINGS)
self._read_test(['1,@,3,@,5'], [['1', ',3,', '5']], quotechar='@')
self._read_test(['1,\0,3,\0,5'], [['1', ',3,', '5']], quotechar='\0')
@@ -403,6 +419,15 @@ class Test_Csv(unittest.TestCase):
self._read_test(['no space, space, spaces,\ttab'],
[['no space', 'space', 'spaces', '\ttab']],
skipinitialspace=True)
+ self._read_test([' , , '],
+ [['', '', '']],
+ skipinitialspace=True)
+ self._read_test([' , , '],
+ [[None, None, None]],
+ skipinitialspace=True, quoting=csv.QUOTE_NOTNULL)
+ self._read_test([' , , '],
+ [[None, None, None]],
+ skipinitialspace=True, quoting=csv.QUOTE_STRINGS)
def test_read_bigfield(self):
# This exercises the buffer realloc functionality and field size
diff --git a/Misc/NEWS.d/next/Library/2024-01-05-16-27-34.gh-issue-113732.fgDRXA.rst b/Misc/NEWS.d/next/Library/2024-01-05-16-27-34.gh-issue-113732.fgDRXA.rst
new file mode 100644
index 0000000..7582603
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-01-05-16-27-34.gh-issue-113732.fgDRXA.rst
@@ -0,0 +1,2 @@
+Fix support of :data:`~csv.QUOTE_NOTNULL` and :data:`~csv.QUOTE_STRINGS` in
+:func:`csv.reader`.
diff --git a/Modules/_csv.c b/Modules/_csv.c
index 929c215..3aa648b 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -131,7 +131,7 @@ typedef struct {
Py_UCS4 *field; /* temporary buffer */
Py_ssize_t field_size; /* size of allocated buffer */
Py_ssize_t field_len; /* length of current field */
- int numeric_field; /* treat field as numeric */
+ bool unquoted_field; /* true if no quotes around the current field */
unsigned long line_num; /* Source-file line number */
} ReaderObj;
@@ -644,22 +644,33 @@ _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
static int
parse_save_field(ReaderObj *self)
{
+ int quoting = self->dialect->quoting;
PyObject *field;
- field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
- (void *) self->field, self->field_len);
- if (field == NULL)
- return -1;
- self->field_len = 0;
- if (self->numeric_field) {
- PyObject *tmp;
-
- self->numeric_field = 0;
- tmp = PyNumber_Float(field);
- Py_DECREF(field);
- if (tmp == NULL)
+ if (self->unquoted_field &&
+ self->field_len == 0 &&
+ (quoting == QUOTE_NOTNULL || quoting == QUOTE_STRINGS))
+ {
+ field = Py_NewRef(Py_None);
+ }
+ else {
+ field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
+ (void *) self->field, self->field_len);
+ if (field == NULL) {
return -1;
- field = tmp;
+ }
+ if (self->unquoted_field &&
+ self->field_len != 0 &&
+ (quoting == QUOTE_NONNUMERIC || quoting == QUOTE_STRINGS))
+ {
+ PyObject *tmp = PyNumber_Float(field);
+ Py_DECREF(field);
+ if (tmp == NULL) {
+ return -1;
+ }
+ field = tmp;
+ }
+ self->field_len = 0;
}
if (PyList_Append(self->fields, field) < 0) {
Py_DECREF(field);
@@ -721,6 +732,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
/* fallthru */
case START_FIELD:
/* expecting field */
+ self->unquoted_field = true;
if (c == '\n' || c == '\r' || c == EOL) {
/* save empty field - return [fields] */
if (parse_save_field(self) < 0)
@@ -730,10 +742,12 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
else if (c == dialect->quotechar &&
dialect->quoting != QUOTE_NONE) {
/* start quoted field */
+ self->unquoted_field = false;
self->state = IN_QUOTED_FIELD;
}
else if (c == dialect->escapechar) {
/* possible escaped character */
+ self->unquoted_field = false;
self->state = ESCAPED_CHAR;
}
else if (c == ' ' && dialect->skipinitialspace)
@@ -746,8 +760,6 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
}
else {
/* begin new unquoted field */
- if (dialect->quoting == QUOTE_NONNUMERIC)
- self->numeric_field = 1;
if (parse_add_char(self, module_state, c) < 0)
return -1;
self->state = IN_FIELD;
@@ -892,7 +904,7 @@ parse_reset(ReaderObj *self)
return -1;
self->field_len = 0;
self->state = START_RECORD;
- self->numeric_field = 0;
+ self->unquoted_field = false;
return 0;
}