summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSkip Montanaro <skip.montanaro@gmail.com>2023-04-12 22:32:30 (GMT)
committerGitHub <noreply@github.com>2023-04-12 22:32:30 (GMT)
commit330a942b6303c889d0f42f23d5ae2b42af92ecc4 (patch)
tree9cadad6696562afb944bd5b9b67f010148a08c99
parent2b6e8777672da03f5d5cd12366e8378e47c550da (diff)
downloadcpython-330a942b6303c889d0f42f23d5ae2b42af92ecc4.zip
cpython-330a942b6303c889d0f42f23d5ae2b42af92ecc4.tar.gz
cpython-330a942b6303c889d0f42f23d5ae2b42af92ecc4.tar.bz2
gh-67230: add quoting rules to csv module (GH-29469)
Add two quoting styles for csv dialects. They will help to work with certain databases in particular. Automerge-Triggered-By: GH:merwok
-rw-r--r--Doc/library/csv.rst22
-rw-r--r--Lib/csv.py2
-rw-r--r--Lib/test/test_csv.py4
-rw-r--r--Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst2
-rw-r--r--Modules/_csv.c16
5 files changed, 43 insertions, 3 deletions
diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst
index f177655..64baa69 100644
--- a/Doc/library/csv.rst
+++ b/Doc/library/csv.rst
@@ -327,7 +327,7 @@ The :mod:`csv` module defines the following constants:
Instructs :class:`writer` objects to quote all non-numeric fields.
- Instructs the reader to convert all non-quoted fields to type *float*.
+ Instructs :class:`reader` objects to convert all non-quoted fields to type *float*.
.. data:: QUOTE_NONE
@@ -337,7 +337,25 @@ The :mod:`csv` module defines the following constants:
character. If *escapechar* is not set, the writer will raise :exc:`Error` if
any characters that require escaping are encountered.
- Instructs :class:`reader` to perform no special processing of quote characters.
+ Instructs :class:`reader` objects to perform no special processing of quote characters.
+
+.. data:: QUOTE_NOTNULL
+
+ Instructs :class:`writer` objects to quote all fields which are not
+ ``None``. This is similar to :data:`QUOTE_ALL`, except that if a
+ field value is ``None`` an empty (unquoted) string is written.
+
+ Instructs :class:`reader` objects to interpret an empty (unquoted) field as None and
+ to otherwise behave as :data:`QUOTE_ALL`.
+
+.. data:: QUOTE_STRINGS
+
+ Instructs :class:`writer` objects to always place quotes around fields
+ which are strings. This is similar to :data:`QUOTE_NONNUMERIC`, except that if a
+ field value is ``None`` an empty (unquoted) string is written.
+
+ Instructs :class:`reader` objects to interpret an empty (unquoted) string as ``None`` and
+ to otherwise behave as :data:`QUOTE_NONNUMERIC`.
The :mod:`csv` module defines the following exception:
diff --git a/Lib/csv.py b/Lib/csv.py
index 4ef8be4..77f30c8 100644
--- a/Lib/csv.py
+++ b/Lib/csv.py
@@ -9,12 +9,14 @@ from _csv import Error, __version__, writer, reader, register_dialect, \
unregister_dialect, get_dialect, list_dialects, \
field_size_limit, \
QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \
+ QUOTE_STRINGS, QUOTE_NOTNULL, \
__doc__
from _csv import Dialect as _Dialect
from io import StringIO
__all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
+ "QUOTE_STRINGS", "QUOTE_NOTNULL",
"Error", "Dialect", "__doc__", "excel", "excel_tab",
"field_size_limit", "reader", "writer",
"register_dialect", "get_dialect", "list_dialects", "Sniffer",
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
index 8289ddb..8fb97bc 100644
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -187,6 +187,10 @@ class Test_Csv(unittest.TestCase):
quoting = csv.QUOTE_ALL)
self._write_test(['a\nb',1], '"a\nb","1"',
quoting = csv.QUOTE_ALL)
+ self._write_test(['a','',None,1], '"a","",,1',
+ quoting = csv.QUOTE_STRINGS)
+ self._write_test(['a','',None,1], '"a","",,"1"',
+ quoting = csv.QUOTE_NOTNULL)
def test_write_escape(self):
self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
diff --git a/Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst b/Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst
new file mode 100644
index 0000000..53c32d3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-11-07-15-31-25.bpo-23041.564i32.rst
@@ -0,0 +1,2 @@
+Add :data:`~csv.QUOTE_STRINGS` and :data:`~csv.QUOTE_NOTNULL` to the suite
+of :mod:`csv` module quoting styles.
diff --git a/Modules/_csv.c b/Modules/_csv.c
index bd33708..2217cc2 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -82,7 +82,8 @@ typedef enum {
} ParserState;
typedef enum {
- QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
+ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE,
+ QUOTE_STRINGS, QUOTE_NOTNULL
} QuoteStyle;
typedef struct {
@@ -95,6 +96,8 @@ static const StyleDesc quote_styles[] = {
{ QUOTE_ALL, "QUOTE_ALL" },
{ QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
{ QUOTE_NONE, "QUOTE_NONE" },
+ { QUOTE_STRINGS, "QUOTE_STRINGS" },
+ { QUOTE_NOTNULL, "QUOTE_NOTNULL" },
{ 0 }
};
@@ -1264,6 +1267,12 @@ csv_writerow(WriterObj *self, PyObject *seq)
case QUOTE_ALL:
quoted = 1;
break;
+ case QUOTE_STRINGS:
+ quoted = PyUnicode_Check(field);
+ break;
+ case QUOTE_NOTNULL:
+ quoted = field != Py_None;
+ break;
default:
quoted = 0;
break;
@@ -1659,6 +1668,11 @@ PyDoc_STRVAR(csv_module_doc,
" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
" fields which do not parse as integers or floating point\n"
" numbers.\n"
+" csv.QUOTE_STRINGS means that quotes are always placed around\n"
+" fields which are strings. Note that the Python value None\n"
+" is not a string.\n"
+" csv.QUOTE_NOTNULL means that quotes are only placed around fields\n"
+" that are not the Python value None.\n"
" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
" * escapechar - specifies a one-character string used to escape\n"
" the delimiter when quoting is set to QUOTE_NONE.\n"