summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2024-01-22 13:34:16 (GMT)
committerGitHub <noreply@github.com>2024-01-22 13:34:16 (GMT)
commitc8351a617b8970dbe0d3af721c6aea873019c466 (patch)
tree3d38887fb1dada30112ecd0f7c4b41245ff00dab
parent2f2ddabd1a02e3095b751100b94b529e4e0bcd20 (diff)
downloadcpython-c8351a617b8970dbe0d3af721c6aea873019c466.zip
cpython-c8351a617b8970dbe0d3af721c6aea873019c466.tar.gz
cpython-c8351a617b8970dbe0d3af721c6aea873019c466.tar.bz2
gh-113796: Add more validation checks in the csv.Dialect constructor (GH-113797)
ValueError is now raised if the same character is used in different roles.
-rw-r--r--Lib/test/test_csv.py67
-rw-r--r--Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst3
-rw-r--r--Modules/_csv.c39
3 files changed, 96 insertions, 13 deletions
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
index 36da86e..69fef59 100644
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase):
in TestDialectRegistry.
"""
def _test_arg_valid(self, ctor, arg):
+ ctor(arg)
self.assertRaises(TypeError, ctor)
self.assertRaises(TypeError, ctor, None)
- self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
- self.assertRaises(TypeError, ctor, arg, delimiter = 0)
- self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
+ self.assertRaises(TypeError, ctor, arg, bad_attr=0)
+ self.assertRaises(TypeError, ctor, arg, delimiter='')
+ self.assertRaises(TypeError, ctor, arg, escapechar='')
+ self.assertRaises(TypeError, ctor, arg, quotechar='')
+ self.assertRaises(TypeError, ctor, arg, delimiter='^^')
+ self.assertRaises(TypeError, ctor, arg, escapechar='^^')
+ self.assertRaises(TypeError, ctor, arg, quotechar='^^')
self.assertRaises(csv.Error, ctor, arg, 'foo')
self.assertRaises(TypeError, ctor, arg, delimiter=None)
self.assertRaises(TypeError, ctor, arg, delimiter=1)
+ self.assertRaises(TypeError, ctor, arg, escapechar=1)
self.assertRaises(TypeError, ctor, arg, quotechar=1)
self.assertRaises(TypeError, ctor, arg, lineterminator=None)
self.assertRaises(TypeError, ctor, arg, lineterminator=1)
@@ -46,6 +52,40 @@ class Test_Csv(unittest.TestCase):
quoting=csv.QUOTE_ALL, quotechar=None)
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_NONE, quotechar='')
+ self.assertRaises(ValueError, ctor, arg, delimiter='\n')
+ self.assertRaises(ValueError, ctor, arg, escapechar='\n')
+ self.assertRaises(ValueError, ctor, arg, quotechar='\n')
+ self.assertRaises(ValueError, ctor, arg, delimiter='\r')
+ self.assertRaises(ValueError, ctor, arg, escapechar='\r')
+ self.assertRaises(ValueError, ctor, arg, quotechar='\r')
+ ctor(arg, delimiter=' ')
+ ctor(arg, escapechar=' ')
+ ctor(arg, quotechar=' ')
+ ctor(arg, delimiter='\t', skipinitialspace=True)
+ ctor(arg, escapechar='\t', skipinitialspace=True)
+ ctor(arg, quotechar='\t', skipinitialspace=True)
+ self.assertRaises(ValueError, ctor, arg,
+ delimiter=' ', skipinitialspace=True)
+ self.assertRaises(ValueError, ctor, arg,
+ escapechar=' ', skipinitialspace=True)
+ self.assertRaises(ValueError, ctor, arg,
+ quotechar=' ', skipinitialspace=True)
+ ctor(arg, delimiter='^')
+ ctor(arg, escapechar='^')
+ ctor(arg, quotechar='^')
+ self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^')
+ self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^')
+ self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^')
+ ctor(arg, delimiter='\x85')
+ ctor(arg, escapechar='\x85')
+ ctor(arg, quotechar='\x85')
+ ctor(arg, lineterminator='\x85')
+ self.assertRaises(ValueError, ctor, arg,
+ delimiter='\x85', lineterminator='\x85')
+ self.assertRaises(ValueError, ctor, arg,
+ escapechar='\x85', lineterminator='\x85')
+ self.assertRaises(ValueError, ctor, arg,
+ quotechar='\x85', lineterminator='\x85')
def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
@@ -535,14 +575,6 @@ class TestDialectRegistry(unittest.TestCase):
finally:
csv.unregister_dialect('testC')
- def test_bad_dialect(self):
- # Unknown parameter
- self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
- # Bad values
- self.assertRaises(TypeError, csv.reader, [], delimiter = None)
- self.assertRaises(TypeError, csv.reader, [], quoting = -1)
- self.assertRaises(TypeError, csv.reader, [], quoting = 100)
-
def test_copy(self):
for name in csv.list_dialects():
dialect = csv.get_dialect(name)
@@ -1088,10 +1120,15 @@ class TestDialectValidity(unittest.TestCase):
'"lineterminator" must be a string')
def test_invalid_chars(self):
- def create_invalid(field_name, value):
+ def create_invalid(field_name, value, **kwargs):
class mydialect(csv.Dialect):
- pass
+ delimiter = ','
+ quoting = csv.QUOTE_ALL
+ quotechar = '"'
+ lineterminator = '\r\n'
setattr(mydialect, field_name, value)
+ for field_name, value in kwargs.items():
+ setattr(mydialect, field_name, value)
d = mydialect()
for field_name in ("delimiter", "escapechar", "quotechar"):
@@ -1100,6 +1137,10 @@ class TestDialectValidity(unittest.TestCase):
self.assertRaises(csv.Error, create_invalid, field_name, "abc")
self.assertRaises(csv.Error, create_invalid, field_name, b'x')
self.assertRaises(csv.Error, create_invalid, field_name, 5)
+ self.assertRaises(ValueError, create_invalid, field_name, "\n")
+ self.assertRaises(ValueError, create_invalid, field_name, "\r")
+ self.assertRaises(ValueError, create_invalid, field_name, " ",
+ skipinitialspace=True)
class TestSniffer(unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst b/Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst
new file mode 100644
index 0000000..e9d4aba
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst
@@ -0,0 +1,3 @@
+Add more validation checks in the :class:`csv.Dialect` constructor.
+:exc:`ValueError` is now raised if the same character is used in different
+roles.
diff --git a/Modules/_csv.c b/Modules/_csv.c
index 8d94156..929c215 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -331,6 +331,33 @@ dialect_check_quoting(int quoting)
return -1;
}
+static int
+dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect)
+{
+ if (c == '\r' || c == '\n' || (dialect->skipinitialspace && c == ' ')) {
+ PyErr_Format(PyExc_ValueError, "bad %s value", name);
+ return -1;
+ }
+ if (PyUnicode_FindChar(
+ dialect->lineterminator, c, 0,
+ PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0)
+ {
+ PyErr_Format(PyExc_ValueError, "bad %s or lineterminator value", name);
+ return -1;
+ }
+ return 0;
+}
+
+ static int
+dialect_check_chars(const char *name1, const char *name2, Py_UCS4 c1, Py_UCS4 c2)
+{
+ if (c1 == c2 && c1 != NOT_SET) {
+ PyErr_Format(PyExc_ValueError, "bad %s or %s value", name1, name2);
+ return -1;
+ }
+ return 0;
+}
+
#define D_OFF(x) offsetof(DialectObj, x)
static struct PyMemberDef Dialect_memberlist[] = {
@@ -508,6 +535,18 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
goto err;
}
+ if (dialect_check_char("delimiter", self->delimiter, self) ||
+ dialect_check_char("escapechar", self->escapechar, self) ||
+ dialect_check_char("quotechar", self->quotechar, self) ||
+ dialect_check_chars("delimiter", "escapechar",
+ self->delimiter, self->escapechar) ||
+ dialect_check_chars("delimiter", "quotechar",
+ self->delimiter, self->quotechar) ||
+ dialect_check_chars("escapechar", "quotechar",
+ self->escapechar, self->quotechar))
+ {
+ goto err;
+ }
ret = Py_NewRef(self);
err: