From 330a942b6303c889d0f42f23d5ae2b42af92ecc4 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Wed, 12 Apr 2023 17:32:30 -0500 Subject: gh-67230: add quoting rules to csv module (GH-29469) Add two quoting styles for csv dialects. They will help to work with certain databases in particular. Automerge-Triggered-By: GH:merwok --- Doc/library/csv.rst | 22 ++++++++++++++++++++-- Lib/csv.py | 2 ++ Lib/test/test_csv.py | 4 ++++ .../2021-11-07-15-31-25.bpo-23041.564i32.rst | 2 ++ Modules/_csv.c | 16 +++++++++++++++- 5 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index f177655..64baa69 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -327,7 +327,7 @@ The :mod:`csv` module defines the following constants: Instructs :class:`writer` objects to quote all non-numeric fields. - Instructs the reader to convert all non-quoted fields to type *float*. + Instructs :class:`reader` objects to convert all non-quoted fields to type *float*. .. data:: QUOTE_NONE @@ -337,7 +337,25 @@ The :mod:`csv` module defines the following constants: character. If *escapechar* is not set, the writer will raise :exc:`Error` if any characters that require escaping are encountered. - Instructs :class:`reader` to perform no special processing of quote characters. + Instructs :class:`reader` objects to perform no special processing of quote characters. + +.. data:: QUOTE_NOTNULL + + Instructs :class:`writer` objects to quote all fields which are not + ``None``. This is similar to :data:`QUOTE_ALL`, except that if a + field value is ``None`` an empty (unquoted) string is written. + + Instructs :class:`reader` objects to interpret an empty (unquoted) field as None and + to otherwise behave as :data:`QUOTE_ALL`. + +.. data:: QUOTE_STRINGS + + Instructs :class:`writer` objects to always place quotes around fields + which are strings. This is similar to :data:`QUOTE_NONNUMERIC`, except that if a + field value is ``None`` an empty (unquoted) string is written. + + Instructs :class:`reader` objects to interpret an empty (unquoted) string as ``None`` and + to otherwise behave as :data:`QUOTE_NONNUMERIC`. The :mod:`csv` module defines the following exception: diff --git a/Lib/csv.py b/Lib/csv.py index 4ef8be4..77f30c8 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -9,12 +9,14 @@ from _csv import Error, __version__, writer, reader, register_dialect, \ unregister_dialect, get_dialect, list_dialects, \ field_size_limit, \ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \ + QUOTE_STRINGS, QUOTE_NOTNULL, \ __doc__ from _csv import Dialect as _Dialect from io import StringIO __all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE", + "QUOTE_STRINGS", "QUOTE_NOTNULL", "Error", "Dialect", "__doc__", "excel", "excel_tab", "field_size_limit", "reader", "writer", "register_dialect", "get_dialect", "list_dialects", "Sniffer", diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 8289ddb..8fb97bc 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -187,6 +187,10 @@ class Test_Csv(unittest.TestCase): quoting = csv.QUOTE_ALL) self._write_test(['a\nb',1], '"a\nb","1"', quoting = csv.QUOTE_ALL) + self._write_test(['a','',None,1], '"a","",,1', + quoting = csv.QUOTE_STRINGS) + self._write_test(['a','',None,1], '"a","",,"1"', + quoting = csv.QUOTE_NOTNULL) def test_write_escape(self): self._write_test(['a',1,'p,q'], 'a,1,"p,q"', diff --git a/Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst b/Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst new file mode 100644 index 0000000..53c32d3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst @@ -0,0 +1,2 @@ +Add :data:`~csv.QUOTE_STRINGS` and :data:`~csv.QUOTE_NOTNULL` to the suite +of :mod:`csv` module quoting styles. diff --git a/Modules/_csv.c b/Modules/_csv.c index bd33708..2217cc2 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -82,7 +82,8 @@ typedef enum { } ParserState; typedef enum { - QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE + QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, + QUOTE_STRINGS, QUOTE_NOTNULL } QuoteStyle; typedef struct { @@ -95,6 +96,8 @@ static const StyleDesc quote_styles[] = { { QUOTE_ALL, "QUOTE_ALL" }, { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" }, { QUOTE_NONE, "QUOTE_NONE" }, + { QUOTE_STRINGS, "QUOTE_STRINGS" }, + { QUOTE_NOTNULL, "QUOTE_NOTNULL" }, { 0 } }; @@ -1264,6 +1267,12 @@ csv_writerow(WriterObj *self, PyObject *seq) case QUOTE_ALL: quoted = 1; break; + case QUOTE_STRINGS: + quoted = PyUnicode_Check(field); + break; + case QUOTE_NOTNULL: + quoted = field != Py_None; + break; default: quoted = 0; break; @@ -1659,6 +1668,11 @@ PyDoc_STRVAR(csv_module_doc, " csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" " fields which do not parse as integers or floating point\n" " numbers.\n" +" csv.QUOTE_STRINGS means that quotes are always placed around\n" +" fields which are strings. Note that the Python value None\n" +" is not a string.\n" +" csv.QUOTE_NOTNULL means that quotes are only placed around fields\n" +" that are not the Python value None.\n" " csv.QUOTE_NONE means that quotes are never placed around fields.\n" " * escapechar - specifies a one-character string used to escape\n" " the delimiter when quoting is set to QUOTE_NONE.\n" -- cgit v0.12